Blender  V2.93
opencl_util.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef WITH_OPENCL
18 
19 # include "device/device_intern.h"
21 
22 # include "util/util_debug.h"
23 # include "util/util_logging.h"
24 # include "util/util_md5.h"
25 # include "util/util_path.h"
26 # include "util/util_semaphore.h"
27 # include "util/util_system.h"
28 # include "util/util_time.h"
29 
30 using std::cerr;
31 using std::endl;
32 
34 
35 OpenCLCache::Slot::ProgramEntry::ProgramEntry() : program(NULL), mutex(NULL)
36 {
37 }
38 
39 OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry &rhs)
40  : program(rhs.program), mutex(NULL)
41 {
42 }
43 
44 OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
45 {
46  delete mutex;
47 }
48 
49 OpenCLCache::Slot::Slot() : context_mutex(NULL), context(NULL)
50 {
51 }
52 
53 OpenCLCache::Slot::Slot(const Slot &rhs)
54  : context_mutex(NULL), context(NULL), programs(rhs.programs)
55 {
56 }
57 
58 OpenCLCache::Slot::~Slot()
59 {
60  delete context_mutex;
61 }
62 
63 OpenCLCache &OpenCLCache::global_instance()
64 {
65  static OpenCLCache instance;
66  return instance;
67 }
68 
69 cl_context OpenCLCache::get_context(cl_platform_id platform,
70  cl_device_id device,
71  thread_scoped_lock &slot_locker)
72 {
73  assert(platform != NULL);
74 
75  OpenCLCache &self = global_instance();
76 
77  thread_scoped_lock cache_lock(self.cache_lock);
78 
79  pair<CacheMap::iterator, bool> ins = self.cache.insert(
80  CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
81 
82  Slot &slot = ins.first->second;
83 
84  /* create slot lock only while holding cache lock */
85  if (!slot.context_mutex)
86  slot.context_mutex = new thread_mutex;
87 
88  /* need to unlock cache before locking slot, to allow store to complete */
89  cache_lock.unlock();
90 
91  /* lock the slot */
92  slot_locker = thread_scoped_lock(*slot.context_mutex);
93 
94  /* If the thing isn't cached */
95  if (slot.context == NULL) {
96  /* return with the caller's lock holder holding the slot lock */
97  return NULL;
98  }
99 
100  /* the item was already cached, release the slot lock */
101  slot_locker.unlock();
102 
103  cl_int ciErr = clRetainContext(slot.context);
104  assert(ciErr == CL_SUCCESS);
105  (void)ciErr;
106 
107  return slot.context;
108 }
109 
110 cl_program OpenCLCache::get_program(cl_platform_id platform,
111  cl_device_id device,
112  ustring key,
113  thread_scoped_lock &slot_locker)
114 {
115  assert(platform != NULL);
116 
117  OpenCLCache &self = global_instance();
118 
119  thread_scoped_lock cache_lock(self.cache_lock);
120 
121  pair<CacheMap::iterator, bool> ins = self.cache.insert(
122  CacheMap::value_type(PlatformDevicePair(platform, device), Slot()));
123 
124  Slot &slot = ins.first->second;
125 
126  pair<Slot::EntryMap::iterator, bool> ins2 = slot.programs.insert(
127  Slot::EntryMap::value_type(key, Slot::ProgramEntry()));
128 
129  Slot::ProgramEntry &entry = ins2.first->second;
130 
131  /* create slot lock only while holding cache lock */
132  if (!entry.mutex)
133  entry.mutex = new thread_mutex;
134 
135  /* need to unlock cache before locking slot, to allow store to complete */
136  cache_lock.unlock();
137 
138  /* lock the slot */
139  slot_locker = thread_scoped_lock(*entry.mutex);
140 
141  /* If the thing isn't cached */
142  if (entry.program == NULL) {
143  /* return with the caller's lock holder holding the slot lock */
144  return NULL;
145  }
146 
147  /* the item was already cached, release the slot lock */
148  slot_locker.unlock();
149 
150  cl_int ciErr = clRetainProgram(entry.program);
151  assert(ciErr == CL_SUCCESS);
152  (void)ciErr;
153 
154  return entry.program;
155 }
156 
157 void OpenCLCache::store_context(cl_platform_id platform,
158  cl_device_id device,
159  cl_context context,
160  thread_scoped_lock &slot_locker)
161 {
162  assert(platform != NULL);
163  assert(device != NULL);
164  assert(context != NULL);
165 
166  OpenCLCache &self = global_instance();
167 
168  thread_scoped_lock cache_lock(self.cache_lock);
169  CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
170  cache_lock.unlock();
171 
172  Slot &slot = i->second;
173 
174  /* sanity check */
175  assert(i != self.cache.end());
176  assert(slot.context == NULL);
177 
178  slot.context = context;
179 
180  /* unlock the slot */
181  slot_locker.unlock();
182 
183  /* increment reference count in OpenCL.
184  * The caller is going to release the object when done with it. */
185  cl_int ciErr = clRetainContext(context);
186  assert(ciErr == CL_SUCCESS);
187  (void)ciErr;
188 }
189 
190 void OpenCLCache::store_program(cl_platform_id platform,
191  cl_device_id device,
192  cl_program program,
193  ustring key,
194  thread_scoped_lock &slot_locker)
195 {
196  assert(platform != NULL);
197  assert(device != NULL);
198  assert(program != NULL);
199 
200  OpenCLCache &self = global_instance();
201 
202  thread_scoped_lock cache_lock(self.cache_lock);
203 
204  CacheMap::iterator i = self.cache.find(PlatformDevicePair(platform, device));
205  assert(i != self.cache.end());
206  Slot &slot = i->second;
207 
208  Slot::EntryMap::iterator i2 = slot.programs.find(key);
209  assert(i2 != slot.programs.end());
210  Slot::ProgramEntry &entry = i2->second;
211 
212  assert(entry.program == NULL);
213 
214  cache_lock.unlock();
215 
216  entry.program = program;
217 
218  /* unlock the slot */
219  slot_locker.unlock();
220 
221  /* Increment reference count in OpenCL.
222  * The caller is going to release the object when done with it.
223  */
224  cl_int ciErr = clRetainProgram(program);
225  assert(ciErr == CL_SUCCESS);
226  (void)ciErr;
227 }
228 
229 string OpenCLCache::get_kernel_md5()
230 {
231  OpenCLCache &self = global_instance();
232  thread_scoped_lock lock(self.kernel_md5_lock);
233 
234  if (self.kernel_md5.empty()) {
235  self.kernel_md5 = path_files_md5_hash(path_get("source"));
236  }
237  return self.kernel_md5;
238 }
239 
240 static string get_program_source(const string &kernel_file)
241 {
242  string source = "#include \"kernel/kernels/opencl/" + kernel_file + "\"\n";
243  /* We compile kernels consisting of many files. unfortunately OpenCL
244  * kernel caches do not seem to recognize changes in included files.
245  * so we force recompile on changes by adding the md5 hash of all files.
246  */
247  source = path_source_replace_includes(source, path_get("source"));
248  source += "\n// " + util_md5_string(source) + "\n";
249  return source;
250 }
251 
252 OpenCLDevice::OpenCLProgram::OpenCLProgram(OpenCLDevice *device,
253  const string &program_name,
254  const string &kernel_file,
255  const string &kernel_build_options,
256  bool use_stdout)
257  : device(device),
258  program_name(program_name),
259  kernel_file(kernel_file),
260  kernel_build_options(kernel_build_options),
261  use_stdout(use_stdout)
262 {
263  loaded = false;
264  needs_compiling = true;
265  program = NULL;
266 }
267 
268 OpenCLDevice::OpenCLProgram::~OpenCLProgram()
269 {
270  release();
271 }
272 
273 void OpenCLDevice::OpenCLProgram::release()
274 {
275  for (map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end();
276  ++kernel) {
277  if (kernel->second) {
278  clReleaseKernel(kernel->second);
279  kernel->second = NULL;
280  }
281  }
282  if (program) {
283  clReleaseProgram(program);
284  program = NULL;
285  }
286 }
287 
288 void OpenCLDevice::OpenCLProgram::add_log(const string &msg, bool debug)
289 {
290  if (!use_stdout) {
291  log += msg + "\n";
292  }
293  else if (!debug) {
294  printf("%s\n", msg.c_str());
295  fflush(stdout);
296  }
297  else {
298  VLOG(2) << msg;
299  }
300 }
301 
302 void OpenCLDevice::OpenCLProgram::add_error(const string &msg)
303 {
304  if (use_stdout) {
305  fprintf(stderr, "%s\n", msg.c_str());
306  }
307  if (error_msg == "") {
308  error_msg += "\n";
309  }
310  error_msg += msg;
311 }
312 
313 void OpenCLDevice::OpenCLProgram::add_kernel(ustring name)
314 {
315  if (!kernels.count(name)) {
316  kernels[name] = NULL;
317  }
318 }
319 
320 bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
321 {
322  string build_options;
323  build_options = device->kernel_build_options(debug_src) + kernel_build_options;
324 
325  VLOG(1) << "Build options passed to clBuildProgram: '" << build_options << "'.";
326  cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
327 
328  /* show warnings even if build is successful */
329  size_t ret_val_size = 0;
330 
331  clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
332 
333  if (ciErr != CL_SUCCESS) {
334  add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) +
335  ", errors in console.");
336  }
337 
338  if (ret_val_size > 1) {
339  vector<char> build_log(ret_val_size + 1);
340  clGetProgramBuildInfo(
341  program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
342 
343  build_log[ret_val_size] = '\0';
344  /* Skip meaningless empty output from the NVidia compiler. */
345  if (!(ret_val_size == 2 && build_log[0] == '\n')) {
346  add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]),
347  ciErr == CL_SUCCESS);
348  }
349  }
350 
351  return (ciErr == CL_SUCCESS);
352 }
353 
354 bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
355 {
356  string source = get_program_source(kernel_file);
357 
358  if (debug_src) {
359  path_write_text(*debug_src, source);
360  }
361 
362  size_t source_len = source.size();
363  const char *source_str = source.c_str();
364  cl_int ciErr;
365 
366  program = clCreateProgramWithSource(device->cxContext, 1, &source_str, &source_len, &ciErr);
367 
368  if (ciErr != CL_SUCCESS) {
369  add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
370  return false;
371  }
372 
373  double starttime = time_dt();
374  add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
375  add_log(string("Build flags: ") + kernel_build_options, true);
376 
377  if (!build_kernel(debug_src))
378  return false;
379 
380  double elapsed = time_dt() - starttime;
381  add_log(
382  string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
383  false);
384 
385  return true;
386 }
387 
388 static void escape_python_string(string &str)
389 {
390  /* Escape string to be passed as a Python raw string with '' quotes'. */
391  string_replace(str, "'", "\'");
392 }
393 
394 static int opencl_compile_process_limit()
395 {
396  /* Limit number of concurrent processes compiling, with a heuristic based
397  * on total physical RAM and estimate of memory usage needed when compiling
398  * with all Cycles features enabled.
399  *
400  * This is somewhat arbitrary as we don't know the actual available RAM or
401  * how much the kernel compilation will needed depending on the features, but
402  * better than not limiting at all. */
403  static const int64_t GB = 1024LL * 1024LL * 1024LL;
404  static const int64_t process_memory = 2 * GB;
405  static const int64_t base_memory = 2 * GB;
406  static const int64_t system_memory = system_physical_ram();
407  static const int64_t process_limit = (system_memory - base_memory) / process_memory;
408 
409  return max((int)process_limit, 1);
410 }
411 
412 bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
413 {
414  /* Construct arguments. */
415  vector<string> args;
416  args.push_back("--background");
417  args.push_back("--factory-startup");
418  args.push_back("--python-expr");
419 
420  int device_platform_id = device->device_num;
421  string device_name = device->device_name;
422  string platform_name = device->platform_name;
423  string build_options = device->kernel_build_options(NULL) + kernel_build_options;
424  string kernel_file_escaped = kernel_file;
425  string clbin_escaped = clbin;
426 
427  escape_python_string(device_name);
428  escape_python_string(platform_name);
429  escape_python_string(build_options);
430  escape_python_string(kernel_file_escaped);
431  escape_python_string(clbin_escaped);
432 
433  args.push_back(string_printf(
434  "import _cycles; _cycles.opencl_compile(r'%d', r'%s', r'%s', r'%s', r'%s', r'%s')",
435  device_platform_id,
436  device_name.c_str(),
437  platform_name.c_str(),
438  build_options.c_str(),
439  kernel_file_escaped.c_str(),
440  clbin_escaped.c_str()));
441 
442  /* Limit number of concurrent processes compiling. */
443  static thread_counting_semaphore semaphore(opencl_compile_process_limit());
444  semaphore.acquire();
445 
446  /* Compile. */
447  const double starttime = time_dt();
448  add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
449  add_log(string("Build flags: ") + kernel_build_options, true);
450  const bool success = system_call_self(args);
451  const double elapsed = time_dt() - starttime;
452 
453  semaphore.release();
454 
455  if (!success || !path_exists(clbin)) {
456  return false;
457  }
458 
459  add_log(
460  string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
461  false);
462 
463  return load_binary(clbin);
464 }
465 
466 /* Compile opencl kernel. This method is called from the _cycles Python
467  * module compile kernels. Parameters must match function above. */
469 {
470  int device_platform_id = std::stoi(parameters[0]);
471  const string &device_name = parameters[1];
472  const string &platform_name = parameters[2];
473  const string &build_options = parameters[3];
474  const string &kernel_file = parameters[4];
475  const string &binary_path = parameters[5];
476 
477  if (clewInit() != CLEW_SUCCESS) {
478  return false;
479  }
480 
481  vector<OpenCLPlatformDevice> usable_devices;
482  OpenCLInfo::get_usable_devices(&usable_devices);
483  if (device_platform_id >= usable_devices.size()) {
484  return false;
485  }
486 
487  OpenCLPlatformDevice &platform_device = usable_devices[device_platform_id];
488  if (platform_device.platform_name != platform_name ||
489  platform_device.device_name != device_name) {
490  return false;
491  }
492 
493  cl_platform_id platform = platform_device.platform_id;
494  cl_device_id device = platform_device.device_id;
495  const cl_context_properties context_props[] = {
496  CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0, 0};
497 
498  cl_int err;
499  cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
500  if (err != CL_SUCCESS) {
501  return false;
502  }
503 
504  string source = get_program_source(kernel_file);
505  size_t source_len = source.size();
506  const char *source_str = source.c_str();
507  cl_program program = clCreateProgramWithSource(context, 1, &source_str, &source_len, &err);
508  bool result = false;
509 
510  if (err == CL_SUCCESS) {
511  err = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
512 
513  if (err == CL_SUCCESS) {
514  size_t size = 0;
515  clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
516  if (size > 0) {
517  vector<uint8_t> binary(size);
518  uint8_t *bytes = &binary[0];
519  clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &bytes, NULL);
520  result = path_write_binary(binary_path, binary);
521  }
522  }
523  clReleaseProgram(program);
524  }
525 
526  clReleaseContext(context);
527 
528  return result;
529 }
530 
531 bool OpenCLDevice::OpenCLProgram::load_binary(const string &clbin, const string *debug_src)
532 {
533  /* read binary into memory */
534  vector<uint8_t> binary;
535 
536  if (!path_read_binary(clbin, binary)) {
537  add_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
538  return false;
539  }
540 
541  /* create program */
542  cl_int status, ciErr;
543  size_t size = binary.size();
544  const uint8_t *bytes = &binary[0];
545 
546  program = clCreateProgramWithBinary(
547  device->cxContext, 1, &device->cdDevice, &size, &bytes, &status, &ciErr);
548 
549  if (status != CL_SUCCESS || ciErr != CL_SUCCESS) {
550  add_error(string("OpenCL failed create program from cached binary ") + clbin + ": " +
551  clewErrorString(status) + " " + clewErrorString(ciErr));
552  return false;
553  }
554 
555  if (!build_kernel(debug_src))
556  return false;
557 
558  return true;
559 }
560 
561 bool OpenCLDevice::OpenCLProgram::save_binary(const string &clbin)
562 {
563  size_t size = 0;
564  clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
565 
566  if (!size)
567  return false;
568 
569  vector<uint8_t> binary(size);
570  uint8_t *bytes = &binary[0];
571 
572  clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &bytes, NULL);
573 
574  return path_write_binary(clbin, binary);
575 }
576 
578 {
579  loaded = false;
580  string device_md5 = device->device_md5_hash(kernel_build_options);
581 
582  /* Try to use cached kernel. */
583  thread_scoped_lock cache_locker;
584  ustring cache_key(program_name + device_md5);
585  program = device->load_cached_kernel(cache_key, cache_locker);
586  if (!program) {
587  add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
588 
589  /* need to create source to get md5 */
590  string source = get_program_source(kernel_file);
591 
592  string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" +
593  util_md5_string(source);
594  basename = path_cache_get(path_join("kernels", basename));
595  string clbin = basename + ".clbin";
596 
597  /* If binary kernel exists already, try use it. */
598  if (path_exists(clbin) && load_binary(clbin)) {
599  /* Kernel loaded from binary, nothing to do. */
600  add_log(string("Loaded program from ") + clbin + ".", true);
601 
602  /* Cache the program. */
603  device->store_cached_kernel(program, cache_key, cache_locker);
604  }
605  else {
606  add_log(string("OpenCL program ") + program_name + " not found on disk.", true);
607  cache_locker.unlock();
608  }
609  }
610 
611  if (program) {
612  create_kernels();
613  loaded = true;
614  needs_compiling = false;
615  }
616 
617  return loaded;
618 }
619 
620 void OpenCLDevice::OpenCLProgram::compile()
621 {
622  assert(device);
623 
624  string device_md5 = device->device_md5_hash(kernel_build_options);
625 
626  /* Try to use cached kernel. */
627  thread_scoped_lock cache_locker;
628  ustring cache_key(program_name + device_md5);
629  program = device->load_cached_kernel(cache_key, cache_locker);
630 
631  if (!program) {
632 
633  add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
634 
635  /* need to create source to get md5 */
636  string source = get_program_source(kernel_file);
637 
638  string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" +
639  util_md5_string(source);
640  basename = path_cache_get(path_join("kernels", basename));
641  string clbin = basename + ".clbin";
642 
643  /* path to preprocessed source for debugging */
644  string clsrc, *debug_src = NULL;
645 
646  if (OpenCLInfo::use_debug()) {
647  clsrc = basename + ".cl";
648  debug_src = &clsrc;
649  }
650 
651  if (DebugFlags().running_inside_blender && compile_separate(clbin)) {
652  add_log(string("Built and loaded program from ") + clbin + ".", true);
653  loaded = true;
654  }
655  else {
656  if (DebugFlags().running_inside_blender) {
657  add_log(string("Separate-process building of ") + clbin +
658  " failed, will fall back to regular building.",
659  true);
660  }
661 
662  /* If does not exist or loading binary failed, compile kernel. */
663  if (!compile_kernel(debug_src)) {
664  needs_compiling = false;
665  return;
666  }
667 
668  /* Save binary for reuse. */
669  if (!save_binary(clbin)) {
670  add_log(string("Saving compiled OpenCL kernel to ") + clbin + " failed!", true);
671  }
672  }
673 
674  /* Cache the program. */
675  device->store_cached_kernel(program, cache_key, cache_locker);
676  }
677 
678  create_kernels();
679  needs_compiling = false;
680  loaded = true;
681 }
682 
683 void OpenCLDevice::OpenCLProgram::create_kernels()
684 {
685  for (map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end();
686  ++kernel) {
687  assert(kernel->second == NULL);
688  cl_int ciErr;
689  string name = "kernel_ocl_" + kernel->first.string();
690  kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
691  if (device->opencl_error(ciErr)) {
692  add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " +
693  clewErrorString(ciErr));
694  return;
695  }
696  }
697 }
698 
699 bool OpenCLDevice::OpenCLProgram::wait_for_availability()
700 {
701  add_log(string("Waiting for availability of ") + program_name + ".", true);
702  while (needs_compiling) {
703  time_sleep(0.1);
704  }
705  return loaded;
706 }
707 
708 void OpenCLDevice::OpenCLProgram::report_error()
709 {
710  /* If loaded is true, there was no error. */
711  if (loaded)
712  return;
713  /* if use_stdout is true, the error was already reported. */
714  if (use_stdout)
715  return;
716 
717  cerr << error_msg << endl;
718  if (!compile_output.empty()) {
719  cerr << "OpenCL kernel build output for " << program_name << ":" << endl;
720  cerr << compile_output << endl;
721  }
722 }
723 
725 {
726  assert(kernels.size() == 1);
727  return kernels.begin()->second;
728 }
729 
730 cl_kernel OpenCLDevice::OpenCLProgram::operator()(ustring name)
731 {
732  assert(kernels.count(name));
733  return kernels[name];
734 }
735 
736 cl_device_type OpenCLInfo::device_type()
737 {
738  switch (DebugFlags().opencl.device_type) {
740  return 0;
742  return CL_DEVICE_TYPE_ALL;
744  return CL_DEVICE_TYPE_DEFAULT;
746  return CL_DEVICE_TYPE_CPU;
748  return CL_DEVICE_TYPE_GPU;
750  return CL_DEVICE_TYPE_ACCELERATOR;
751  default:
752  return CL_DEVICE_TYPE_ALL;
753  }
754 }
755 
756 bool OpenCLInfo::use_debug()
757 {
758  return DebugFlags().opencl.debug;
759 }
760 
761 bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_id device_id)
762 {
763  cl_device_type device_type;
764  if (!get_device_type(device_id, &device_type)) {
765  return false;
766  }
767  string device_name;
768  if (!get_device_name(device_id, &device_name)) {
769  return false;
770  }
771 
772  int driver_major = 0;
773  int driver_minor = 0;
774  if (!get_driver_version(device_id, &driver_major, &driver_minor)) {
775  return false;
776  }
777  VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
778 
779  if (getenv("CYCLES_OPENCL_TEST")) {
780  return true;
781  }
782 
783  /* Allow Intel GPUs on Intel OpenCL platform. */
784  if (platform_name.find("Intel") != string::npos) {
785  if (device_type != CL_DEVICE_TYPE_GPU) {
786  /* OpenCL on Intel CPU is not an officially supported configuration.
787  * Use hybrid CPU+GPU rendering to utilize both GPU and CPU. */
788  return false;
789  }
790 
791 # ifdef __APPLE__
792  /* Apple uses own framework, which can also put Iris onto AMD frame-work.
793  * This isn't supported configuration. */
794  return false;
795 # else
796  if (device_name.find("Iris") != string::npos || device_name.find("Xe") != string::npos) {
797  return true;
798  }
799 # endif
800  }
801 
802  if (platform_name == "AMD Accelerated Parallel Processing" &&
803  device_type == CL_DEVICE_TYPE_GPU) {
804  if (driver_major < 2236) {
805  VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
806  return false;
807  }
808  const char *blacklist[] = {/* GCN 1 */
809  "Tahiti",
810  "Pitcairn",
811  "Capeverde",
812  "Oland",
813  "Hainan",
814  NULL};
815  for (int i = 0; blacklist[i] != NULL; i++) {
816  if (device_name == blacklist[i]) {
817  VLOG(1) << "AMD device " << device_name << " not supported";
818  return false;
819  }
820  }
821  return true;
822  }
823  if (platform_name == "Apple" && device_type == CL_DEVICE_TYPE_GPU) {
824  return false;
825  }
826  return false;
827 }
828 
829 bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error)
830 {
831  const int req_major = 1, req_minor = 1;
832  int major, minor;
833  char version[256];
834  clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
835  if (sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
836  if (error != NULL) {
837  *error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
838  }
839  return false;
840  }
841  if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
842  if (error != NULL) {
843  *error = string_printf(
844  "OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
845  }
846  return false;
847  }
848  if (error != NULL) {
849  *error = "";
850  }
851  return true;
852 }
853 
854 bool OpenCLInfo::get_device_version(cl_device_id device, int *r_major, int *r_minor, string *error)
855 {
856  char version[256];
857  clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
858  if (sscanf(version, "OpenCL C %d.%d", r_major, r_minor) < 2) {
859  if (error != NULL) {
860  *error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
861  }
862  return false;
863  }
864  if (error != NULL) {
865  *error = "";
866  }
867  return true;
868 }
869 
870 bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
871 {
872  const int req_major = 1, req_minor = 1;
873  int major, minor;
874  if (!get_device_version(device, &major, &minor, error)) {
875  return false;
876  }
877 
878  if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
879  if (error != NULL) {
880  *error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);
881  }
882  return false;
883  }
884  if (error != NULL) {
885  *error = "";
886  }
887  return true;
888 }
889 
890 string OpenCLInfo::get_hardware_id(const string &platform_name, cl_device_id device_id)
891 {
892  if (platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
893  /* Use cl_amd_device_topology extension. */
894  cl_char topology[24];
895  if (clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS &&
896  topology[0] == 1) {
897  return string_printf("%02x:%02x.%01x",
898  (unsigned int)topology[21],
899  (unsigned int)topology[22],
900  (unsigned int)topology[23]);
901  }
902  }
903  else if (platform_name == "NVIDIA CUDA") {
904  /* Use two undocumented options of the cl_nv_device_attribute_query extension. */
905  cl_int bus_id, slot_id;
906  if (clGetDeviceInfo(device_id, 0x4008, sizeof(cl_int), &bus_id, NULL) == CL_SUCCESS &&
907  clGetDeviceInfo(device_id, 0x4009, sizeof(cl_int), &slot_id, NULL) == CL_SUCCESS) {
908  return string_printf("%02x:%02x.%01x",
909  (unsigned int)(bus_id),
910  (unsigned int)(slot_id >> 3),
911  (unsigned int)(slot_id & 0x7));
912  }
913  }
914  /* No general way to get a hardware ID from OpenCL => give up. */
915  return "";
916 }
917 
918 void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
919 {
920  const cl_device_type device_type = OpenCLInfo::device_type();
921  static bool first_time = true;
922 # define FIRST_VLOG(severity) \
923  if (first_time) \
924  VLOG(severity)
925 
926  usable_devices->clear();
927 
928  if (device_type == 0) {
929  FIRST_VLOG(2) << "OpenCL devices are forced to be disabled.";
930  first_time = false;
931  return;
932  }
933 
934  cl_int error;
935  vector<cl_device_id> device_ids;
936  vector<cl_platform_id> platform_ids;
937 
938  /* Get platforms. */
939  if (!get_platforms(&platform_ids, &error)) {
940  FIRST_VLOG(2) << "Error fetching platforms:" << string(clewErrorString(error));
941  first_time = false;
942  return;
943  }
944  if (platform_ids.size() == 0) {
945  FIRST_VLOG(2) << "No OpenCL platforms were found.";
946  first_time = false;
947  return;
948  }
949  /* Devices are numbered consecutively across platforms. */
950  for (int platform = 0; platform < platform_ids.size(); platform++) {
951  cl_platform_id platform_id = platform_ids[platform];
952  string platform_name;
953  if (!get_platform_name(platform_id, &platform_name)) {
954  FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
955  continue;
956  }
957  FIRST_VLOG(2) << "Enumerating devices for platform " << platform_name << ".";
958  if (!platform_version_check(platform_id)) {
959  FIRST_VLOG(2) << "Ignoring platform " << platform_name
960  << " due to too old compiler version.";
961  continue;
962  }
963  if (!get_platform_devices(platform_id, device_type, &device_ids, &error)) {
964  FIRST_VLOG(2) << "Ignoring platform " << platform_name
965  << ", failed to fetch of devices: " << string(clewErrorString(error));
966  continue;
967  }
968  if (device_ids.size() == 0) {
969  FIRST_VLOG(2) << "Ignoring platform " << platform_name << ", it has no devices.";
970  continue;
971  }
972  for (int num = 0; num < device_ids.size(); num++) {
973  const cl_device_id device_id = device_ids[num];
974  string device_name;
975  if (!get_device_name(device_id, &device_name, &error)) {
976  FIRST_VLOG(2) << "Failed to fetch device name: " << string(clewErrorString(error))
977  << ", ignoring.";
978  continue;
979  }
980  if (!device_version_check(device_id)) {
981  FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version.";
982  continue;
983  }
984  if (device_supported(platform_name, device_id)) {
985  cl_device_type device_type;
986  if (!get_device_type(device_id, &device_type, &error)) {
987  FIRST_VLOG(2) << "Ignoring device " << device_name
988  << ", failed to fetch device type:" << string(clewErrorString(error));
989  continue;
990  }
991  string readable_device_name = get_readable_device_name(device_id);
992  if (readable_device_name != device_name) {
993  FIRST_VLOG(2) << "Using more readable device name: " << readable_device_name;
994  }
995  FIRST_VLOG(2) << "Adding new device " << readable_device_name << ".";
996  string hardware_id = get_hardware_id(platform_name, device_id);
997  string device_extensions = get_device_extensions(device_id);
998  usable_devices->push_back(OpenCLPlatformDevice(platform_id,
999  platform_name,
1000  device_id,
1001  device_type,
1002  readable_device_name,
1003  hardware_id,
1004  device_extensions));
1005  }
1006  else {
1007  FIRST_VLOG(2) << "Ignoring device " << device_name << ", not officially supported yet.";
1008  }
1009  }
1010  }
1011  first_time = false;
1012 }
1013 
1014 bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error)
1015 {
1016  /* Reset from possible previous state. */
1017  platform_ids->resize(0);
1018  cl_uint num_platforms;
1019  if (!get_num_platforms(&num_platforms, error)) {
1020  return false;
1021  }
1022  /* Get actual platforms. */
1023  cl_int err;
1024  platform_ids->resize(num_platforms);
1025  if ((err = clGetPlatformIDs(num_platforms, &platform_ids->at(0), NULL)) != CL_SUCCESS) {
1026  if (error != NULL) {
1027  *error = err;
1028  }
1029  return false;
1030  }
1031  if (error != NULL) {
1032  *error = CL_SUCCESS;
1033  }
1034  return true;
1035 }
1036 
1037 vector<cl_platform_id> OpenCLInfo::get_platforms()
1038 {
1039  vector<cl_platform_id> platform_ids;
1040  get_platforms(&platform_ids);
1041  return platform_ids;
1042 }
1043 
1044 bool OpenCLInfo::get_num_platforms(cl_uint *num_platforms, cl_int *error)
1045 {
1046  cl_int err;
1047  if ((err = clGetPlatformIDs(0, NULL, num_platforms)) != CL_SUCCESS) {
1048  if (error != NULL) {
1049  *error = err;
1050  }
1051  *num_platforms = 0;
1052  return false;
1053  }
1054  if (error != NULL) {
1055  *error = CL_SUCCESS;
1056  }
1057  return true;
1058 }
1059 
1060 cl_uint OpenCLInfo::get_num_platforms()
1061 {
1062  cl_uint num_platforms;
1063  if (!get_num_platforms(&num_platforms)) {
1064  return 0;
1065  }
1066  return num_platforms;
1067 }
1068 
1069 bool OpenCLInfo::get_platform_name(cl_platform_id platform_id, string *platform_name)
1070 {
1071  char buffer[256];
1072  if (clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), &buffer, NULL) !=
1073  CL_SUCCESS) {
1074  *platform_name = "";
1075  return false;
1076  }
1077  *platform_name = buffer;
1078  return true;
1079 }
1080 
1081 string OpenCLInfo::get_platform_name(cl_platform_id platform_id)
1082 {
1083  string platform_name;
1084  if (!get_platform_name(platform_id, &platform_name)) {
1085  return "";
1086  }
1087  return platform_name;
1088 }
1089 
1090 bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
1091  cl_device_type device_type,
1092  cl_uint *num_devices,
1093  cl_int *error)
1094 {
1095  cl_int err;
1096  if ((err = clGetDeviceIDs(platform_id, device_type, 0, NULL, num_devices)) != CL_SUCCESS) {
1097  if (error != NULL) {
1098  *error = err;
1099  }
1100  *num_devices = 0;
1101  return false;
1102  }
1103  if (error != NULL) {
1104  *error = CL_SUCCESS;
1105  }
1106  return true;
1107 }
1108 
1109 cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
1110  cl_device_type device_type)
1111 {
1112  cl_uint num_devices;
1113  if (!get_num_platform_devices(platform_id, device_type, &num_devices)) {
1114  return 0;
1115  }
1116  return num_devices;
1117 }
1118 
1119 bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
1120  cl_device_type device_type,
1121  vector<cl_device_id> *device_ids,
1122  cl_int *error)
1123 {
1124  /* Reset from possible previous state. */
1125  device_ids->resize(0);
1126  /* Get number of devices to pre-allocate memory. */
1127  cl_uint num_devices;
1128  if (!get_num_platform_devices(platform_id, device_type, &num_devices, error)) {
1129  return false;
1130  }
1131  /* Get actual device list. */
1132  device_ids->resize(num_devices);
1133  cl_int err;
1134  if ((err = clGetDeviceIDs(platform_id, device_type, num_devices, &device_ids->at(0), NULL)) !=
1135  CL_SUCCESS) {
1136  if (error != NULL) {
1137  *error = err;
1138  }
1139  return false;
1140  }
1141  if (error != NULL) {
1142  *error = CL_SUCCESS;
1143  }
1144  return true;
1145 }
1146 
1147 vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
1148  cl_device_type device_type)
1149 {
1151  get_platform_devices(platform_id, device_type, &devices);
1152  return devices;
1153 }
1154 
1155 bool OpenCLInfo::get_device_name(cl_device_id device_id, string *device_name, cl_int *error)
1156 {
1157  char buffer[1024];
1158  cl_int err;
1159  if ((err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(buffer), &buffer, NULL)) !=
1160  CL_SUCCESS) {
1161  if (error != NULL) {
1162  *error = err;
1163  }
1164  *device_name = "";
1165  return false;
1166  }
1167  if (error != NULL) {
1168  *error = CL_SUCCESS;
1169  }
1170  *device_name = buffer;
1171  return true;
1172 }
1173 
1174 string OpenCLInfo::get_device_name(cl_device_id device_id)
1175 {
1176  string device_name;
1177  if (!get_device_name(device_id, &device_name)) {
1178  return "";
1179  }
1180  return device_name;
1181 }
1182 
1183 bool OpenCLInfo::get_device_extensions(cl_device_id device_id,
1184  string *device_extensions,
1185  cl_int *error)
1186 {
1187  size_t extension_length = 0;
1188  cl_int err;
1189  /* Determine the size of the extension string*/
1190  if ((err = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, 0, 0, &extension_length)) !=
1191  CL_SUCCESS) {
1192  if (error != NULL) {
1193  *error = err;
1194  }
1195  *device_extensions = "";
1196  return false;
1197  }
1198  vector<char> buffer(extension_length);
1199  if ((err = clGetDeviceInfo(
1200  device_id, CL_DEVICE_EXTENSIONS, extension_length, buffer.data(), NULL)) !=
1201  CL_SUCCESS) {
1202  if (error != NULL) {
1203  *error = err;
1204  }
1205  *device_extensions = "";
1206  return false;
1207  }
1208  if (error != NULL) {
1209  *error = CL_SUCCESS;
1210  }
1211  *device_extensions = string(buffer.data());
1212  return true;
1213 }
1214 
1215 string OpenCLInfo::get_device_extensions(cl_device_id device_id)
1216 {
1217  string device_extensions;
1218  if (!get_device_extensions(device_id, &device_extensions)) {
1219  return "";
1220  }
1221  return device_extensions;
1222 }
1223 
1224 bool OpenCLInfo::get_device_type(cl_device_id device_id,
1225  cl_device_type *device_type,
1226  cl_int *error)
1227 {
1228  cl_int err;
1229  if ((err = clGetDeviceInfo(
1230  device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), device_type, NULL)) != CL_SUCCESS) {
1231  if (error != NULL) {
1232  *error = err;
1233  }
1234  *device_type = 0;
1235  return false;
1236  }
1237  if (error != NULL) {
1238  *error = CL_SUCCESS;
1239  }
1240  return true;
1241 }
1242 
1243 cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
1244 {
1245  cl_device_type device_type;
1246  if (!get_device_type(device_id, &device_type)) {
1247  return 0;
1248  }
1249  return device_type;
1250 }
1251 
1252 string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
1253 {
1254  string name = "";
1255  char board_name[1024];
1256  size_t length = 0;
1257  if (clGetDeviceInfo(
1258  device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(board_name), &board_name, &length) ==
1259  CL_SUCCESS) {
1260  if (length != 0 && board_name[0] != '\0') {
1261  name = board_name;
1262  }
1263  }
1264 
1265  /* Fallback to standard device name API. */
1266  if (name.empty()) {
1267  name = get_device_name(device_id);
1268  }
1269 
1270  /* Special exception for AMD Vega, need to be able to tell
1271  * Vega 56 from 64 apart.
1272  */
1273  if (name == "Radeon RX Vega") {
1274  cl_int max_compute_units = 0;
1275  if (clGetDeviceInfo(device_id,
1276  CL_DEVICE_MAX_COMPUTE_UNITS,
1277  sizeof(max_compute_units),
1278  &max_compute_units,
1279  NULL) == CL_SUCCESS) {
1280  name += " " + to_string(max_compute_units);
1281  }
1282  }
1283 
1284  /* Distinguish from our native CPU device. */
1285  if (get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
1286  name += " (OpenCL)";
1287  }
1288 
1289  return name;
1290 }
1291 
1292 bool OpenCLInfo::get_driver_version(cl_device_id device_id, int *major, int *minor, cl_int *error)
1293 {
1294  char buffer[1024];
1295  cl_int err;
1296  if ((err = clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(buffer), &buffer, NULL)) !=
1297  CL_SUCCESS) {
1298  if (error != NULL) {
1299  *error = err;
1300  }
1301  return false;
1302  }
1303  if (error != NULL) {
1304  *error = CL_SUCCESS;
1305  }
1306  if (sscanf(buffer, "%d.%d", major, minor) < 2) {
1307  VLOG(1) << string_printf("OpenCL: failed to parse driver version string (%s).", buffer);
1308  return false;
1309  }
1310  return true;
1311 }
1312 
1313 int OpenCLInfo::mem_sub_ptr_alignment(cl_device_id device_id)
1314 {
1315  int base_align_bits;
1316  if (clGetDeviceInfo(
1317  device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(int), &base_align_bits, NULL) ==
1318  CL_SUCCESS) {
1319  return base_align_bits / 8;
1320  }
1321  return 1;
1322 }
1323 
1325 
1326 #endif
ThreadMutex mutex
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE btScalar length(const btQuaternion &q)
Return the length of a quaternion.
Definition: btQuaternion.h:895
SIMD_FORCE_INLINE btVector3 operator()(const btVector3 &x) const
Return the transform of the vector.
Definition: btTransform.h:90
OpenCL opencl
Definition: util_debug.h:186
static char * basename(char *string)
Definition: datatoc.c:33
bool device_opencl_compile_kernel(const vector< string > &parameters)
#define str(s)
static FT_Error err
Definition: freetypefont.c:52
#define CCL_NAMESPACE_END
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
double parameters[NUM_PARAMETERS]
static void error(const char *str)
Definition: meshlaplacian.c:65
int load(istream &in, Vec3r &v)
Definition: ViewMapIO.cpp:61
INLINE Rall1d< T, V, S > log(const Rall1d< T, V, S > &arg)
Definition: rall1d.h:303
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
struct blender::compositor::@172::@175 opencl
std::string to_string(const T &n)
struct SELECTID_Context context
Definition: select_engine.c:47
__int64 int64_t
Definition: stdint.h:92
unsigned char uint8_t
Definition: stdint.h:81
float max
DebugFlags & DebugFlags()
Definition: util_debug.h:205
#define VLOG(severity)
Definition: util_logging.h:50
string util_md5_string(const string &str)
Definition: util_md5.cpp:380
string path_cache_get(const string &sub)
Definition: util_path.cpp:371
string path_get(const string &sub)
Definition: util_path.cpp:351
string path_source_replace_includes(const string &source, const string &path, const string &source_filename)
Definition: util_path.cpp:904
string path_files_md5_hash(const string &dir)
Definition: util_path.cpp:619
string path_join(const string &dir, const string &file)
Definition: util_path.cpp:426
bool path_exists(const string &path)
Definition: util_path.cpp:572
bool path_write_binary(const string &path, const vector< uint8_t > &binary)
Definition: util_path.cpp:661
bool path_write_text(const string &path, string &text)
Definition: util_path.cpp:679
bool path_read_binary(const string &path, vector< uint8_t > &binary)
Definition: util_path.cpp:687
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: util_string.cpp:32
void string_replace(string &haystack, const string &needle, const string &other)
size_t system_physical_ram()
bool system_call_self(const vector< string > &args)
std::unique_lock< std::mutex > thread_scoped_lock
Definition: util_thread.h:41
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition: util_thread.h:40
void time_sleep(double t)
Definition: util_time.cpp:57
CCL_NAMESPACE_BEGIN double time_dt()
Definition: util_time.cpp:48