34 struct texture_slot_t {
35 texture_slot_t(
const string &name,
int slot) : name(name), slot(slot)
42 static const string NON_SPLIT_KERNELS =
48 static const string SPLIT_BUNDLE_KERNELS =
57 "next_iteration_setup "
58 "indirect_subsurface "
63 "adaptive_adjust_samples";
65 const string OpenCLDevice::get_opencl_program_name(
const string &kernel_name)
67 if (NON_SPLIT_KERNELS.find(kernel_name) != std::string::npos) {
70 else if (SPLIT_BUNDLE_KERNELS.find(kernel_name) != std::string::npos) {
71 return "split_bundle";
74 return "split_" + kernel_name;
78 const string OpenCLDevice::get_opencl_program_filename(
const string &kernel_name)
80 if (kernel_name ==
"denoising") {
83 else if (SPLIT_BUNDLE_KERNELS.find(kernel_name) != std::string::npos) {
84 return "kernel_split_bundle.cl";
87 return "kernel_" + kernel_name +
".cl";
110 const string &opencl_program_name)
113 if (opencl_program_name ==
"base" || opencl_program_name ==
"denoising") {
116 else if (opencl_program_name ==
"bake") {
121 enable_default_features(features);
132 else if (opencl_program_name ==
"displace") {
136 enable_default_features(features);
151 else if (opencl_program_name ==
"background") {
155 enable_default_features(features);
174 string build_options =
"-D__SPLIT_KERNEL__ ";
176 cl_device_type device_type;
177 OpenCLInfo::get_device_type(this->cdDevice, &device_type, &this->ciErr);
178 assert(this->ciErr == CL_SUCCESS);
179 if (device_type == CL_DEVICE_TYPE_GPU) {
180 build_options +=
"-D__COMPUTE_DEVICE_GPU__ ";
184 enable_default_features(nofeatures);
187 if (opencl_program_name ==
"split_do_volume" && !requested_features.
use_volume) {
192 enable_default_features(features);
201 if (opencl_program_name ==
"split_bundle") {
211 return build_options;
214 OpenCLDevice::OpenCLSplitPrograms::OpenCLSplitPrograms(OpenCLDevice *device_)
219 OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
221 program_split.release();
222 program_lamp_emission.release();
223 program_do_volume.release();
224 program_indirect_background.release();
225 program_shader_eval.release();
226 program_holdout_emission_blurring_pathtermination_ao.release();
227 program_subsurface_scatter.release();
228 program_direct_lighting.release();
229 program_shadow_blocked_ao.release();
230 program_shadow_blocked_dl.release();
233 void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
237 # define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) \
238 program_split.add_kernel(ustring("path_trace_" #kernel_name));
239 # define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
240 const string program_name_##kernel_name = "split_" #kernel_name; \
241 program_##kernel_name = OpenCLDevice::OpenCLProgram( \
243 program_name_##kernel_name, \
244 "kernel_" #kernel_name ".cl", \
245 device->get_build_options(requested_features, program_name_##kernel_name)); \
246 program_##kernel_name.add_kernel(ustring("path_trace_" #kernel_name)); \
247 programs.push_back(&program_##kernel_name);
250 ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
251 ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
254 ADD_SPLIT_KERNEL_PROGRAM(do_volume);
256 ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
257 ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
258 ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
259 ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
260 ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
264 program_split = OpenCLDevice::OpenCLProgram(
267 "kernel_split_bundle.cl",
268 device->get_build_options(requested_features,
"split_bundle"));
270 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(
data_init);
271 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size);
272 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(
path_init);
274 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(queue_enqueue);
275 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_setup);
276 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(shader_sort);
277 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(enqueue_inactive);
278 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
279 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
280 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
281 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping);
282 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x);
283 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y);
284 ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples);
285 programs.push_back(&program_split);
287 # undef ADD_SPLIT_KERNEL_PROGRAM
288 # undef ADD_SPLIT_KERNEL_BUNDLE_PROGRAM
297 typedef struct KernelGlobalsDummy {
301 # define KERNEL_TEX(type, name) TextureInfo name;
306 } KernelGlobalsDummy;
310 struct CachedSplitMemory {
322 OpenCLDevice *device;
323 OpenCLDevice::OpenCLProgram
program;
324 CachedSplitMemory &cached_memory;
327 OpenCLSplitKernelFunction(OpenCLDevice *device, CachedSplitMemory &cached_memory)
328 : device(device), cached_memory(cached_memory), cached_id(cached_memory.
id - 1)
332 ~OpenCLSplitKernelFunction()
339 if (cached_id != cached_memory.id) {
340 cl_uint start_arg_index = device->kernel_set_args(
341 program(), 0,
kg,
data, *cached_memory.split_data, *cached_memory.ray_state);
343 device->set_kernel_arg_buffers(
program(), &start_arg_index);
345 start_arg_index += device->kernel_set_args(
program(),
347 *cached_memory.queue_index,
348 *cached_memory.use_queues_flag,
349 *cached_memory.work_pools,
350 *cached_memory.buffer);
352 cached_id = cached_memory.id;
355 device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
365 device->opencl_assert_err(device->ciErr,
"clEnqueueNDRangeKernel");
367 if (device->ciErr != CL_SUCCESS) {
368 string message =
string_printf(
"OpenCL error: %s in clEnqueueNDRangeKernel()",
369 clewErrorString(device->ciErr));
370 device->opencl_error(message);
379 OpenCLDevice *device;
380 CachedSplitMemory cached_memory;
383 explicit OpenCLSplitKernel(OpenCLDevice *device) :
DeviceSplitKernel(device), device(device)
390 OpenCLSplitKernelFunction *kernel =
new OpenCLSplitKernelFunction(device, cached_memory);
392 const string program_name = device->get_opencl_program_name(kernel_name);
393 kernel->program = OpenCLDevice::OpenCLProgram(
396 device->get_opencl_program_filename(kernel_name),
397 device->get_build_options(requested_features, program_name));
399 kernel->program.add_kernel(ustring(
"path_trace_" + kernel_name));
400 kernel->program.load();
402 if (!kernel->program.is_loaded()) {
413 size_buffer.alloc(1);
414 size_buffer.zero_to_device();
417 OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
418 cl_kernel kernel_state_buffer_size = programs->program_split(
419 ustring(
"path_trace_state_buffer_size"));
420 device->kernel_set_args(kernel_state_buffer_size, 0,
kg,
data,
threads, size_buffer);
422 size_t global_size = 64;
423 device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
424 kernel_state_buffer_size,
433 device->opencl_assert_err(device->ciErr,
"clEnqueueNDRangeKernel");
435 size_buffer.copy_from_device(0, 1, 1);
436 size_t size = size_buffer[0];
439 if (device->ciErr != CL_SUCCESS) {
440 string message =
string_printf(
"OpenCL error: %s in clEnqueueNDRangeKernel()",
441 clewErrorString(device->ciErr));
442 device->opencl_error(message);
451 int num_global_elements,
468 OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
469 cl_kernel
kernel_data_init = programs->program_split(ustring(
"path_trace_data_init"));
499 device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue,
509 device->opencl_assert_err(device->ciErr,
"clEnqueueNDRangeKernel");
511 if (device->ciErr != CL_SUCCESS) {
512 string message =
string_printf(
"OpenCL error: %s in clEnqueueNDRangeKernel()",
513 clewErrorString(device->ciErr));
514 device->opencl_error(message);
518 cached_memory.split_data = &split_data;
522 cached_memory.work_pools = &work_pool_wgs;
523 cached_memory.buffer = &rtile.
buffer;
538 cl_device_type
type = OpenCLInfo::get_device_type(device->cdDevice);
540 if (
type == CL_DEVICE_TYPE_CPU) {
541 VLOG(1) <<
"Global size: (64, 64).";
545 cl_ulong max_buffer_size;
547 device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), &max_buffer_size,
NULL);
550 max_buffer_size =
min(max_buffer_size,
558 max_buffer_size =
min(max_buffer_size / 2, (cl_ulong)2l * 1024 * 1024 * 1024);
562 (
int)
sqrt(num_elements));
564 if (device->info.description.find(
"Intel") != string::npos) {
568 VLOG(1) <<
"Global size: " << global_size <<
".";
573 bool OpenCLDevice::opencl_error(cl_int
err)
575 if (
err != CL_SUCCESS) {
579 fprintf(stderr,
"%s\n", message.c_str());
586 void OpenCLDevice::opencl_error(
const string &message)
590 fprintf(stderr,
"%s\n", message.c_str());
593 void OpenCLDevice::opencl_assert_err(cl_int
err,
const char *where)
595 if (
err != CL_SUCCESS) {
597 "OpenCL error (%d): %s in %s",
err, clewErrorString(
err), where);
600 fprintf(stderr,
"%s\n", message.c_str());
608 :
Device(info, stats, profiler, background),
609 load_kernel_num_compiling(0),
610 kernel_programs(this),
611 memory_manager(this),
612 texture_info(this,
"__texture_info",
MEM_GLOBAL)
617 cqCommandQueue =
NULL;
618 device_initialized =
false;
619 textures_need_update =
true;
622 OpenCLInfo::get_usable_devices(&usable_devices);
623 if (usable_devices.size() == 0) {
624 opencl_error(
"OpenCL: no devices found.");
627 assert(info.
num < usable_devices.size());
628 OpenCLPlatformDevice &platform_device = usable_devices[info.
num];
629 device_num = info.
num;
630 cpPlatform = platform_device.platform_id;
631 cdDevice = platform_device.device_id;
632 platform_name = platform_device.platform_name;
633 device_name = platform_device.device_name;
634 VLOG(2) <<
"Creating new Cycles device for OpenCL platform " << platform_name <<
", device "
635 << device_name <<
".";
640 cxContext = OpenCLCache::get_context(cpPlatform, cdDevice, cache_locker);
642 if (cxContext ==
NULL) {
644 const cl_context_properties context_props[] = {
645 CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, 0, 0};
648 cxContext = clCreateContext(
649 context_props, 1, &cdDevice, context_notify_callback, cdDevice, &ciErr);
651 if (opencl_error(ciErr)) {
652 opencl_error(
"OpenCL: clCreateContext failed");
657 OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker);
661 cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
662 if (opencl_error(ciErr)) {
663 opencl_error(
"OpenCL: Error creating command queue");
669 texture_info.resize(1);
670 memory_manager.alloc(
"texture_info", texture_info);
672 device_initialized =
true;
674 split_kernel =
new OpenCLSplitKernel(
this);
677 OpenCLDevice::~OpenCLDevice()
680 load_required_kernel_task_pool.cancel();
681 load_kernel_task_pool.cancel();
683 memory_manager.free();
685 ConstMemMap::iterator mt;
686 for (mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
690 base_program.release();
691 bake_program.release();
692 displace_program.release();
693 background_program.release();
694 denoising_program.release();
697 clReleaseCommandQueue(cqCommandQueue);
699 clReleaseContext(cxContext);
704 void CL_CALLBACK OpenCLDevice::context_notify_callback(
const char *err_info,
709 string device_name = OpenCLInfo::get_device_name((cl_device_id)
user_data);
710 fprintf(stderr,
"OpenCL error (%s): %s\n", device_name.c_str(), err_info);
713 bool OpenCLDevice::opencl_version_check()
716 if (!OpenCLInfo::platform_version_check(cpPlatform, &
error)) {
720 if (!OpenCLInfo::device_version_check(cdDevice, &
error)) {
727 string OpenCLDevice::device_md5_hash(
string kernel_custom_build_options)
730 char version[256], driver[256], name[256], vendor[256];
732 clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR,
sizeof(vendor), &vendor,
NULL);
733 clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION,
sizeof(version), &version,
NULL);
734 clGetDeviceInfo(cdDevice, CL_DEVICE_NAME,
sizeof(name), &name,
NULL);
735 clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION,
sizeof(driver), &driver,
NULL);
742 string options = kernel_build_options();
743 options += kernel_custom_build_options;
751 VLOG(2) <<
"Loading kernels for platform " << platform_name <<
", device " << device_name <<
".";
753 if (!device_initialized) {
754 fprintf(stderr,
"OpenCL: failed to initialize device.\n");
759 if (!opencl_version_check())
762 load_required_kernels(requested_features);
765 kernel_programs.load_kernels(programs, requested_features);
768 denoising_program = OpenCLProgram(
769 this,
"denoising",
"filter.cl", get_build_options(requested_features,
"denoising"));
770 denoising_program.add_kernel(ustring(
"filter_divide_shadow"));
771 denoising_program.add_kernel(ustring(
"filter_get_feature"));
772 denoising_program.add_kernel(ustring(
"filter_write_feature"));
773 denoising_program.add_kernel(ustring(
"filter_detect_outliers"));
774 denoising_program.add_kernel(ustring(
"filter_combine_halves"));
775 denoising_program.add_kernel(ustring(
"filter_construct_transform"));
776 denoising_program.add_kernel(ustring(
"filter_nlm_calc_difference"));
777 denoising_program.add_kernel(ustring(
"filter_nlm_blur"));
778 denoising_program.add_kernel(ustring(
"filter_nlm_calc_weight"));
779 denoising_program.add_kernel(ustring(
"filter_nlm_update_output"));
780 denoising_program.add_kernel(ustring(
"filter_nlm_normalize"));
781 denoising_program.add_kernel(ustring(
"filter_nlm_construct_gramian"));
782 denoising_program.add_kernel(ustring(
"filter_finalize"));
783 programs.push_back(&denoising_program);
786 load_required_kernel_task_pool.wait_work();
791 foreach (OpenCLProgram *
program, programs) {
793 load_kernel_num_compiling++;
794 load_kernel_task_pool.push([=] {
796 load_kernel_num_compiling--;
806 base_program = OpenCLProgram(
807 this,
"base",
"kernel_base.cl", get_build_options(requested_features,
"base"));
808 base_program.add_kernel(ustring(
"convert_to_byte"));
809 base_program.add_kernel(ustring(
"convert_to_half_float"));
810 base_program.add_kernel(ustring(
"zero_buffer"));
811 programs.push_back(&base_program);
814 displace_program = OpenCLProgram(
815 this,
"displace",
"kernel_displace.cl", get_build_options(requested_features,
"displace"));
816 displace_program.add_kernel(ustring(
"displace"));
817 programs.push_back(&displace_program);
821 background_program = OpenCLProgram(
this,
823 "kernel_background.cl",
824 get_build_options(requested_features,
"background"));
825 background_program.add_kernel(ustring(
"background"));
826 programs.push_back(&background_program);
830 bake_program = OpenCLProgram(
831 this,
"bake",
"kernel_bake.cl", get_build_options(requested_features,
"bake"));
832 bake_program.add_kernel(ustring(
"bake"));
833 programs.push_back(&bake_program);
836 foreach (OpenCLProgram *
program, programs) {
850 load_kernel_task_pool.wait_work();
851 return split_kernel->load_kernels(requested_features);
854 OpenCLDevice::OpenCLSplitPrograms *OpenCLDevice::get_split_programs()
856 return &kernel_programs;
867 VLOG(1) <<
"Buffer allocate: " << mem.
name <<
", "
875 cl_ulong max_alloc_size = 0;
876 clGetDeviceInfo(cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), &max_alloc_size,
NULL);
882 if (
size > max_alloc_size) {
883 string error =
"Scene too complex to fit in available memory.";
892 cl_mem_flags mem_flag;
893 void *mem_ptr =
NULL;
896 mem_flag = CL_MEM_READ_ONLY;
898 mem_flag = CL_MEM_READ_WRITE;
907 opencl_assert_err(ciErr,
"clCreateBuffer");
935 opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
948 void OpenCLDevice::mem_copy_from(
device_memory &mem,
int y,
int w,
int h,
int elem)
950 size_t offset = elem *
y *
w;
951 size_t size = elem *
w * h;
953 opencl_assert(clEnqueueReadBuffer(cqCommandQueue,
966 base_program.wait_for_availability();
967 cl_kernel ckZeroBuffer = base_program(ustring(
"zero_buffer"));
969 size_t global_size[] = {1024, 1024};
970 size_t num_threads = global_size[0] * global_size[1];
972 cl_mem d_buffer = CL_MEM_PTR(mem);
973 cl_ulong d_offset = 0;
976 while (d_offset <
size) {
977 d_size = std::min<cl_ulong>(num_threads *
sizeof(float4),
size - d_offset);
979 kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
981 ciErr = clEnqueueNDRangeKernel(
983 opencl_assert_err(ciErr,
"clEnqueueNDRangeKernel");
996 if (base_program.is_loaded()) {
1004 if (!base_program.is_loaded()) {
1012 opencl_assert(clEnqueueWriteBuffer(cqCommandQueue,
1040 opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.
device_pointer)));
1050 int OpenCLDevice::mem_sub_ptr_alignment()
1052 return OpenCLInfo::mem_sub_ptr_alignment(cdDevice);
1057 cl_mem_flags mem_flag;
1059 mem_flag = CL_MEM_READ_ONLY;
1061 mem_flag = CL_MEM_READ_WRITE;
1063 cl_buffer_region info;
1068 CL_MEM_PTR(mem.
device_pointer), mem_flag, CL_BUFFER_CREATE_TYPE_REGION, &info, &ciErr);
1069 opencl_assert_err(ciErr,
"clCreateSubBuffer");
1073 void OpenCLDevice::mem_free_sub_ptr(
device_ptr device_pointer)
1075 if (device_pointer != 0) {
1076 opencl_assert(clReleaseMemObject(CL_MEM_PTR(device_pointer)));
1080 void OpenCLDevice::const_copy_to(
const char *name,
void *host,
size_t size)
1082 ConstMemMap::iterator i = const_mem_map.find(name);
1085 if (i == const_mem_map.end()) {
1088 const_mem_map.insert(ConstMemMap::value_type(name,
data));
1095 data->copy_to_device();
1100 VLOG(1) <<
"Global memory allocate: " << mem.
name <<
", "
1104 memory_manager.alloc(mem.
name, mem);
1109 textures_need_update =
true;
1117 if (memory_manager.free(mem)) {
1118 textures_need_update =
true;
1121 foreach (TexturesMap::value_type &value,
textures) {
1122 if (value.second == &mem) {
1132 VLOG(1) <<
"Texture allocate: " << mem.
name <<
", "
1136 memory_manager.alloc(mem.
name, mem);
1141 textures_need_update =
true;
1149 size_t OpenCLDevice::global_size_round_up(
int group_size,
int global_size)
1151 int r = global_size % group_size;
1152 return global_size + ((
r == 0) ? 0 : group_size -
r);
1155 void OpenCLDevice::enqueue_kernel(
1156 cl_kernel kernel,
size_t w,
size_t h,
bool x_workgroups,
size_t max_workgroup_size)
1158 size_t workgroup_size, max_work_items[3];
1160 clGetKernelWorkGroupInfo(
1161 kernel, cdDevice, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(
size_t), &workgroup_size,
NULL);
1163 cdDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(
size_t) * 3, max_work_items,
NULL);
1165 if (max_workgroup_size > 0 && workgroup_size > max_workgroup_size) {
1166 workgroup_size = max_workgroup_size;
1170 size_t local_size[2];
1172 local_size[0] = workgroup_size;
1176 size_t sqrt_workgroup_size =
max((
size_t)
sqrt((
double)workgroup_size), 1);
1177 local_size[0] = local_size[1] = sqrt_workgroup_size;
1181 if (local_size[1] > max_work_items[1]) {
1182 local_size[0] = workgroup_size / max_work_items[1];
1183 local_size[1] = max_work_items[1];
1186 size_t global_size[2] = {global_size_round_up(local_size[0],
w),
1187 global_size_round_up(local_size[1], h)};
1200 clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2,
NULL, global_size,
NULL, 0,
NULL,
NULL));
1201 opencl_assert(clFlush(cqCommandQueue));
1204 void OpenCLDevice::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg,
const char *name)
1208 MemMap::iterator i = mem_map.find(name);
1209 if (i != mem_map.end()) {
1210 ptr = CL_MEM_PTR(i->second);
1216 opencl_assert(clSetKernelArg(kernel, (*narg)++,
sizeof(
ptr), (
void *)&
ptr));
1219 void OpenCLDevice::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
1221 flush_texture_buffers();
1223 memory_manager.set_kernel_arg_buffers(kernel, narg);
1226 void OpenCLDevice::flush_texture_buffers()
1228 if (!textures_need_update) {
1231 textures_need_update =
false;
1238 # define KERNEL_TEX(type, name) \
1239 if (textures.find(#name) != textures.end()) { \
1240 texture_slots.push_back(texture_slot_t(#name, num_slots)); \
1245 int num_data_slots = num_slots;
1247 foreach (TexturesMap::value_type &
tex,
textures) {
1248 string name =
tex.first;
1253 texture_slots.push_back(texture_slot_t(name, num_data_slots +
id));
1254 num_slots =
max(num_slots, num_data_slots +
id + 1);
1259 memory_manager.free(texture_info);
1260 texture_info.resize(num_slots);
1261 memory_manager.alloc(
"texture_info", texture_info);
1264 foreach (texture_slot_t &slot, texture_slots) {
1282 memory_manager.free(texture_info);
1283 memory_manager.alloc(
"texture_info", texture_info);
1288 flush_texture_buffers();
1296 kgbuffer.alloc_to_device(1);
1299 while (
task.acquire_tile(
this, tile,
task.tile_types)) {
1304 split_kernel->path_trace(
task, tile, kgbuffer, *const_mem_map[
"__data"]);
1315 clFinish(cqCommandQueue);
1322 denoise(tile, denoising);
1323 task.update_progress(&tile, tile.
w * tile.
h);
1326 task.release_tile(tile);
1352 denoise(tile, denoising);
1353 task.update_progress(&tile, tile.
w * tile.
h);
1363 cl_mem d_data = CL_MEM_PTR(const_mem_map[
"__data"]->device_pointer);
1364 cl_mem d_rgba = (rgba_byte) ? CL_MEM_PTR(rgba_byte) : CL_MEM_PTR(rgba_half);
1365 cl_mem d_buffer = CL_MEM_PTR(
buffer);
1366 cl_int d_x =
task.x;
1367 cl_int d_y =
task.y;
1368 cl_int d_w =
task.w;
1369 cl_int d_h =
task.h;
1370 cl_float d_sample_scale = 1.0f / (
task.sample + 1);
1371 cl_int d_offset =
task.offset;
1372 cl_int d_stride =
task.stride;
1374 cl_kernel ckFilmConvertKernel = (rgba_byte) ? base_program(ustring(
"convert_to_byte")) :
1375 base_program(ustring(
"convert_to_half_float"));
1377 cl_uint start_arg_index = kernel_set_args(ckFilmConvertKernel, 0, d_data, d_rgba, d_buffer);
1379 set_kernel_arg_buffers(ckFilmConvertKernel, &start_arg_index);
1381 start_arg_index += kernel_set_args(ckFilmConvertKernel,
1391 enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
1394 bool OpenCLDevice::denoising_non_local_means(
device_ptr image_ptr,
1401 int w =
task->buffer.width;
1402 int h =
task->buffer.h;
1403 int r =
task->nlm_state.r;
1404 int f =
task->nlm_state.f;
1405 float a =
task->nlm_state.a;
1406 float k_2 =
task->nlm_state.k_2;
1408 int pass_stride =
task->buffer.pass_stride;
1409 int num_shifts = (2 *
r + 1) * (2 *
r + 1);
1410 int channel_offset =
task->nlm_state.is_color ?
task->buffer.pass_stride : 0;
1414 task->buffer.temporary_mem, pass_stride * num_shifts, pass_stride * num_shifts);
1416 task->buffer.temporary_mem, 2 * pass_stride * num_shifts, pass_stride);
1417 cl_mem weightAccum_mem = CL_MEM_PTR(*weightAccum);
1418 cl_mem difference_mem = CL_MEM_PTR(*difference);
1419 cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference);
1421 cl_mem image_mem = CL_MEM_PTR(image_ptr);
1422 cl_mem guide_mem = CL_MEM_PTR(guide_ptr);
1423 cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
1424 cl_mem out_mem = CL_MEM_PTR(out_ptr);
1425 cl_mem scale_mem =
NULL;
1427 mem_zero_kernel(*weightAccum,
sizeof(
float) * pass_stride);
1428 mem_zero_kernel(out_ptr,
sizeof(
float) * pass_stride);
1430 cl_kernel ckNLMCalcDifference = denoising_program(ustring(
"filter_nlm_calc_difference"));
1431 cl_kernel ckNLMBlur = denoising_program(ustring(
"filter_nlm_blur"));
1432 cl_kernel ckNLMCalcWeight = denoising_program(ustring(
"filter_nlm_calc_weight"));
1433 cl_kernel ckNLMUpdateOutput = denoising_program(ustring(
"filter_nlm_update_output"));
1434 cl_kernel ckNLMNormalize = denoising_program(ustring(
"filter_nlm_normalize"));
1436 kernel_set_args(ckNLMCalcDifference,
1452 ckNLMBlur, 0, difference_mem, blurDifference_mem,
w, h,
stride, pass_stride,
r, f);
1454 ckNLMCalcWeight, 0, blurDifference_mem, difference_mem,
w, h,
stride, pass_stride,
r, f);
1455 kernel_set_args(ckNLMUpdateOutput,
1469 enqueue_kernel(ckNLMCalcDifference,
w * h, num_shifts,
true);
1470 enqueue_kernel(ckNLMBlur,
w * h, num_shifts,
true);
1471 enqueue_kernel(ckNLMCalcWeight,
w * h, num_shifts,
true);
1472 enqueue_kernel(ckNLMBlur,
w * h, num_shifts,
true);
1473 enqueue_kernel(ckNLMUpdateOutput,
w * h, num_shifts,
true);
1475 kernel_set_args(ckNLMNormalize, 0, out_mem, weightAccum_mem,
w, h,
stride);
1476 enqueue_kernel(ckNLMNormalize,
w, h);
1483 cl_mem buffer_mem = CL_MEM_PTR(
task->buffer.mem.device_pointer);
1484 cl_mem transform_mem = CL_MEM_PTR(
task->storage.transform.device_pointer);
1485 cl_mem rank_mem = CL_MEM_PTR(
task->storage.rank.device_pointer);
1486 cl_mem tile_info_mem = CL_MEM_PTR(
task->tile_info_mem.device_pointer);
1488 char use_time =
task->buffer.use_time ? 1 : 0;
1490 cl_kernel ckFilterConstructTransform = denoising_program(ustring(
"filter_construct_transform"));
1492 int arg_ofs = kernel_set_args(ckFilterConstructTransform, 0, buffer_mem, tile_info_mem);
1494 for (
int i = 0; i < 9; i++) {
1495 buffers[i] = CL_MEM_PTR(
task->tile_info->buffers[i]);
1496 arg_ofs += kernel_set_args(ckFilterConstructTransform, arg_ofs, buffers[i]);
1498 kernel_set_args(ckFilterConstructTransform,
1504 task->buffer.pass_stride,
1505 task->buffer.frame_stride,
1508 task->pca_threshold);
1510 enqueue_kernel(ckFilterConstructTransform,
task->storage.w,
task->storage.h, 256);
1515 bool OpenCLDevice::denoising_accumulate(
device_ptr color_ptr,
1521 cl_mem color_mem = CL_MEM_PTR(color_ptr);
1522 cl_mem color_variance_mem = CL_MEM_PTR(color_variance_ptr);
1523 cl_mem scale_mem = CL_MEM_PTR(scale_ptr);
1525 cl_mem buffer_mem = CL_MEM_PTR(
task->buffer.mem.device_pointer);
1526 cl_mem transform_mem = CL_MEM_PTR(
task->storage.transform.device_pointer);
1527 cl_mem rank_mem = CL_MEM_PTR(
task->storage.rank.device_pointer);
1528 cl_mem XtWX_mem = CL_MEM_PTR(
task->storage.XtWX.device_pointer);
1529 cl_mem XtWY_mem = CL_MEM_PTR(
task->storage.XtWY.device_pointer);
1531 cl_kernel ckNLMCalcDifference = denoising_program(ustring(
"filter_nlm_calc_difference"));
1532 cl_kernel ckNLMBlur = denoising_program(ustring(
"filter_nlm_blur"));
1533 cl_kernel ckNLMCalcWeight = denoising_program(ustring(
"filter_nlm_calc_weight"));
1534 cl_kernel ckNLMConstructGramian = denoising_program(ustring(
"filter_nlm_construct_gramian"));
1536 int w =
task->reconstruction_state.source_w;
1537 int h =
task->reconstruction_state.source_h;
1539 int frame_offset = frame *
task->buffer.frame_stride;
1540 int t =
task->tile_info->frames[frame];
1541 char use_time =
task->buffer.use_time ? 1 : 0;
1543 int r =
task->radius;
1544 int pass_stride =
task->buffer.pass_stride;
1545 int num_shifts = (2 *
r + 1) * (2 *
r + 1);
1549 task->buffer.temporary_mem, pass_stride * num_shifts, pass_stride * num_shifts);
1550 cl_mem difference_mem = CL_MEM_PTR(*difference);
1551 cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference);
1553 kernel_set_args(ckNLMCalcDifference,
1569 ckNLMBlur, 0, difference_mem, blurDifference_mem,
w, h,
stride, pass_stride,
r, 4);
1571 ckNLMCalcWeight, 0, blurDifference_mem, difference_mem,
w, h,
stride, pass_stride,
r, 4);
1572 kernel_set_args(ckNLMConstructGramian,
1581 task->reconstruction_state.filter_window,
1591 enqueue_kernel(ckNLMCalcDifference,
w * h, num_shifts,
true);
1592 enqueue_kernel(ckNLMBlur,
w * h, num_shifts,
true);
1593 enqueue_kernel(ckNLMCalcWeight,
w * h, num_shifts,
true);
1594 enqueue_kernel(ckNLMBlur,
w * h, num_shifts,
true);
1595 enqueue_kernel(ckNLMConstructGramian,
w * h, num_shifts,
true, 256);
1602 cl_kernel ckFinalize = denoising_program(ustring(
"filter_finalize"));
1604 cl_mem output_mem = CL_MEM_PTR(output_ptr);
1605 cl_mem rank_mem = CL_MEM_PTR(
task->storage.rank.device_pointer);
1606 cl_mem XtWX_mem = CL_MEM_PTR(
task->storage.XtWX.device_pointer);
1607 cl_mem XtWY_mem = CL_MEM_PTR(
task->storage.XtWY.device_pointer);
1609 int w =
task->reconstruction_state.source_w;
1610 int h =
task->reconstruction_state.source_h;
1612 kernel_set_args(ckFinalize,
1619 task->reconstruction_state.buffer_params,
1620 task->render_buffer.samples);
1621 enqueue_kernel(ckFinalize,
w, h);
1626 bool OpenCLDevice::denoising_combine_halves(
device_ptr a_ptr,
1634 cl_mem a_mem = CL_MEM_PTR(a_ptr);
1635 cl_mem b_mem = CL_MEM_PTR(b_ptr);
1636 cl_mem mean_mem = CL_MEM_PTR(mean_ptr);
1637 cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
1639 cl_kernel ckFilterCombineHalves = denoising_program(ustring(
"filter_combine_halves"));
1641 kernel_set_args(ckFilterCombineHalves, 0, mean_mem, variance_mem, a_mem, b_mem, rect,
r);
1642 enqueue_kernel(ckFilterCombineHalves,
task->rect.z -
task->rect.x,
task->rect.w -
task->rect.y);
1647 bool OpenCLDevice::denoising_divide_shadow(
device_ptr a_ptr,
1654 cl_mem a_mem = CL_MEM_PTR(a_ptr);
1655 cl_mem b_mem = CL_MEM_PTR(b_ptr);
1656 cl_mem sample_variance_mem = CL_MEM_PTR(sample_variance_ptr);
1657 cl_mem sv_variance_mem = CL_MEM_PTR(sv_variance_ptr);
1658 cl_mem buffer_variance_mem = CL_MEM_PTR(buffer_variance_ptr);
1660 cl_mem tile_info_mem = CL_MEM_PTR(
task->tile_info_mem.device_pointer);
1662 cl_kernel ckFilterDivideShadow = denoising_program(ustring(
"filter_divide_shadow"));
1664 int arg_ofs = kernel_set_args(
1665 ckFilterDivideShadow, 0,
task->render_buffer.samples, tile_info_mem);
1667 for (
int i = 0; i < 9; i++) {
1668 buffers[i] = CL_MEM_PTR(
task->tile_info->buffers[i]);
1669 arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs, buffers[i]);
1671 kernel_set_args(ckFilterDivideShadow,
1675 sample_variance_mem,
1677 buffer_variance_mem,
1679 task->render_buffer.pass_stride,
1680 task->render_buffer.offset);
1681 enqueue_kernel(ckFilterDivideShadow,
task->rect.z -
task->rect.x,
task->rect.w -
task->rect.y);
1686 bool OpenCLDevice::denoising_get_feature(
int mean_offset,
1687 int variance_offset,
1693 cl_mem mean_mem = CL_MEM_PTR(mean_ptr);
1694 cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
1696 cl_mem tile_info_mem = CL_MEM_PTR(
task->tile_info_mem.device_pointer);
1698 cl_kernel ckFilterGetFeature = denoising_program(ustring(
"filter_get_feature"));
1700 int arg_ofs = kernel_set_args(ckFilterGetFeature, 0,
task->render_buffer.samples, tile_info_mem);
1702 for (
int i = 0; i < 9; i++) {
1703 buffers[i] = CL_MEM_PTR(
task->tile_info->buffers[i]);
1704 arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs, buffers[i]);
1706 kernel_set_args(ckFilterGetFeature,
1714 task->render_buffer.pass_stride,
1715 task->render_buffer.offset);
1716 enqueue_kernel(ckFilterGetFeature,
task->rect.z -
task->rect.x,
task->rect.w -
task->rect.y);
1721 bool OpenCLDevice::denoising_write_feature(
int out_offset,
1726 cl_mem from_mem = CL_MEM_PTR(from_ptr);
1727 cl_mem buffer_mem = CL_MEM_PTR(buffer_ptr);
1729 cl_kernel ckFilterWriteFeature = denoising_program(ustring(
"filter_write_feature"));
1731 kernel_set_args(ckFilterWriteFeature,
1733 task->render_buffer.samples,
1734 task->reconstruction_state.buffer_params,
1740 enqueue_kernel(ckFilterWriteFeature,
task->filter_area.z,
task->filter_area.w);
1745 bool OpenCLDevice::denoising_detect_outliers(
device_ptr image_ptr,
1751 cl_mem image_mem = CL_MEM_PTR(image_ptr);
1752 cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
1753 cl_mem depth_mem = CL_MEM_PTR(depth_ptr);
1754 cl_mem output_mem = CL_MEM_PTR(output_ptr);
1756 cl_kernel ckFilterDetectOutliers = denoising_program(ustring(
"filter_detect_outliers"));
1758 kernel_set_args(ckFilterDetectOutliers,
1765 task->buffer.pass_stride);
1766 enqueue_kernel(ckFilterDetectOutliers,
task->rect.z -
task->rect.x,
task->rect.w -
task->rect.y);
1774 &OpenCLDevice::denoising_construct_transform,
this, &denoising);
1776 &OpenCLDevice::denoising_accumulate,
this, _1, _2, _3, _4, &denoising);
1779 &OpenCLDevice::denoising_divide_shadow,
this, _1, _2, _3, _4, _5, &denoising);
1781 &OpenCLDevice::denoising_non_local_means,
this, _1, _2, _3, _4, &denoising);
1783 &OpenCLDevice::denoising_combine_halves,
this, _1, _2, _3, _4, _5, _6, &denoising);
1785 &OpenCLDevice::denoising_get_feature,
this, _1, _2, _3, _4, _5, &denoising);
1787 &OpenCLDevice::denoising_write_feature,
this, _1, _2, _3, &denoising);
1789 &OpenCLDevice::denoising_detect_outliers,
this, _1, _2, _3, _4, &denoising);
1801 cl_mem d_data = CL_MEM_PTR(const_mem_map[
"__data"]->device_pointer);
1802 cl_mem d_input = CL_MEM_PTR(
task.shader_input);
1803 cl_mem d_output = CL_MEM_PTR(
task.shader_output);
1804 cl_int d_shader_eval_type =
task.shader_eval_type;
1805 cl_int d_shader_filter =
task.shader_filter;
1806 cl_int d_shader_x =
task.shader_x;
1807 cl_int d_shader_w =
task.shader_w;
1808 cl_int d_offset =
task.offset;
1810 OpenCLDevice::OpenCLProgram *
program = &background_program;
1814 program->wait_for_availability();
1815 cl_kernel kernel = (*program)();
1817 cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_input, d_output);
1819 set_kernel_arg_buffers(kernel, &start_arg_index);
1821 start_arg_index += kernel_set_args(kernel, start_arg_index, d_shader_eval_type);
1823 start_arg_index += kernel_set_args(kernel, start_arg_index, d_shader_filter);
1825 start_arg_index += kernel_set_args(kernel, start_arg_index, d_shader_x, d_shader_w, d_offset);
1829 if (
task.get_cancel())
1832 kernel_set_args(kernel, start_arg_index,
sample);
1834 enqueue_kernel(kernel,
task.shader_w, 1);
1836 clFinish(cqCommandQueue);
1847 cl_mem d_data = CL_MEM_PTR(const_mem_map[
"__data"]->device_pointer);
1848 cl_mem d_buffer = CL_MEM_PTR(rtile.
buffer);
1849 cl_int d_x = rtile.
x;
1850 cl_int d_y = rtile.
y;
1851 cl_int d_w = rtile.
w;
1852 cl_int d_h = rtile.
h;
1853 cl_int d_offset = rtile.
offset;
1854 cl_int d_stride = rtile.
stride;
1856 bake_program.wait_for_availability();
1857 cl_kernel kernel = bake_program();
1859 cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer);
1861 set_kernel_arg_buffers(kernel, &start_arg_index);
1863 start_arg_index += kernel_set_args(
1864 kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride);
1870 if (
task.get_cancel()) {
1871 if (
task.need_finish_queue ==
false)
1875 kernel_set_args(kernel, start_arg_index,
sample);
1877 enqueue_kernel(kernel, d_w, d_h);
1878 clFinish(cqCommandQueue);
1882 task.update_progress(&rtile, rtile.
w * rtile.
h);
1886 static bool kernel_build_opencl_2(cl_device_id cdDevice)
1892 int version_major, version_minor;
1893 if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
1894 if (version_major >= 2) {
1897 string device_name = OpenCLInfo::get_readable_device_name(cdDevice);
1902 char version[256] =
"";
1903 int driver_major, driver_minor;
1904 clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION,
sizeof(version), &version,
NULL);
1905 if (sscanf(version,
"OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) {
1906 return !(driver_major == 3075 && driver_minor <= 12);
1917 string OpenCLDevice::kernel_build_options(
const string *debug_src)
1919 string build_options =
"-cl-no-signed-zeros -cl-mad-enable ";
1921 if (kernel_build_opencl_2(cdDevice)) {
1922 build_options +=
"-cl-std=CL2.0 ";
1925 if (platform_name ==
"NVIDIA CUDA") {
1927 "-D__KERNEL_OPENCL_NVIDIA__ "
1928 "-cl-nv-maxrregcount=32 "
1931 uint compute_capability_major, compute_capability_minor;
1932 clGetDeviceInfo(cdDevice,
1933 CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
1935 &compute_capability_major,
1937 clGetDeviceInfo(cdDevice,
1938 CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
1940 &compute_capability_minor,
1943 build_options +=
string_printf(
"-D__COMPUTE_CAPABILITY__=%u ",
1944 compute_capability_major * 100 + compute_capability_minor * 10);
1947 else if (platform_name ==
"Apple")
1948 build_options +=
"-D__KERNEL_OPENCL_APPLE__ ";
1950 else if (platform_name ==
"AMD Accelerated Parallel Processing")
1951 build_options +=
"-D__KERNEL_OPENCL_AMD__ ";
1953 else if (platform_name ==
"Intel(R) OpenCL") {
1954 build_options +=
"-D__KERNEL_OPENCL_INTEL_CPU__ ";
1959 if (OpenCLInfo::use_debug() && debug_src)
1960 build_options +=
"-g -s \"" + *debug_src +
"\" ";
1963 if (info.has_half_images) {
1964 build_options +=
"-D__KERNEL_CL_KHR_FP16__ ";
1967 if (OpenCLInfo::use_debug()) {
1968 build_options +=
"-D__KERNEL_OPENCL_DEBUG__ ";
1971 # ifdef WITH_CYCLES_DEBUG
1972 build_options +=
"-D__KERNEL_DEBUG__ ";
1975 # ifdef WITH_NANOVDB
1976 if (info.has_nanovdb) {
1977 build_options +=
"-DWITH_NANOVDB ";
1981 return build_options;
1987 int OpenCLDevice::kernel_set_args(cl_kernel kernel,
1988 int start_argument_index,
1989 const ArgumentWrapper &arg1,
1990 const ArgumentWrapper &arg2,
1991 const ArgumentWrapper &arg3,
1992 const ArgumentWrapper &arg4,
1993 const ArgumentWrapper &arg5,
1994 const ArgumentWrapper &arg6,
1995 const ArgumentWrapper &arg7,
1996 const ArgumentWrapper &arg8,
1997 const ArgumentWrapper &arg9,
1998 const ArgumentWrapper &arg10,
1999 const ArgumentWrapper &arg11,
2000 const ArgumentWrapper &arg12,
2001 const ArgumentWrapper &arg13,
2002 const ArgumentWrapper &arg14,
2003 const ArgumentWrapper &arg15,
2004 const ArgumentWrapper &arg16,
2005 const ArgumentWrapper &arg17,
2006 const ArgumentWrapper &arg18,
2007 const ArgumentWrapper &arg19,
2008 const ArgumentWrapper &arg20,
2009 const ArgumentWrapper &arg21,
2010 const ArgumentWrapper &arg22,
2011 const ArgumentWrapper &arg23,
2012 const ArgumentWrapper &arg24,
2013 const ArgumentWrapper &arg25,
2014 const ArgumentWrapper &arg26,
2015 const ArgumentWrapper &arg27,
2016 const ArgumentWrapper &arg28,
2017 const ArgumentWrapper &arg29,
2018 const ArgumentWrapper &arg30,
2019 const ArgumentWrapper &arg31,
2020 const ArgumentWrapper &arg32,
2021 const ArgumentWrapper &arg33)
2023 int current_arg_index = 0;
2024 # define FAKE_VARARG_HANDLE_ARG(arg) \
2026 if (arg.pointer != NULL) { \
2027 opencl_assert(clSetKernelArg( \
2028 kernel, start_argument_index + current_arg_index, arg.size, arg.pointer)); \
2029 ++current_arg_index; \
2032 return current_arg_index; \
2035 FAKE_VARARG_HANDLE_ARG(arg1);
2036 FAKE_VARARG_HANDLE_ARG(arg2);
2037 FAKE_VARARG_HANDLE_ARG(arg3);
2038 FAKE_VARARG_HANDLE_ARG(arg4);
2039 FAKE_VARARG_HANDLE_ARG(arg5);
2040 FAKE_VARARG_HANDLE_ARG(arg6);
2041 FAKE_VARARG_HANDLE_ARG(arg7);
2042 FAKE_VARARG_HANDLE_ARG(arg8);
2043 FAKE_VARARG_HANDLE_ARG(arg9);
2044 FAKE_VARARG_HANDLE_ARG(arg10);
2045 FAKE_VARARG_HANDLE_ARG(arg11);
2046 FAKE_VARARG_HANDLE_ARG(arg12);
2047 FAKE_VARARG_HANDLE_ARG(arg13);
2048 FAKE_VARARG_HANDLE_ARG(arg14);
2049 FAKE_VARARG_HANDLE_ARG(arg15);
2050 FAKE_VARARG_HANDLE_ARG(arg16);
2051 FAKE_VARARG_HANDLE_ARG(arg17);
2052 FAKE_VARARG_HANDLE_ARG(arg18);
2053 FAKE_VARARG_HANDLE_ARG(arg19);
2054 FAKE_VARARG_HANDLE_ARG(arg20);
2055 FAKE_VARARG_HANDLE_ARG(arg21);
2056 FAKE_VARARG_HANDLE_ARG(arg22);
2057 FAKE_VARARG_HANDLE_ARG(arg23);
2058 FAKE_VARARG_HANDLE_ARG(arg24);
2059 FAKE_VARARG_HANDLE_ARG(arg25);
2060 FAKE_VARARG_HANDLE_ARG(arg26);
2061 FAKE_VARARG_HANDLE_ARG(arg27);
2062 FAKE_VARARG_HANDLE_ARG(arg28);
2063 FAKE_VARARG_HANDLE_ARG(arg29);
2064 FAKE_VARARG_HANDLE_ARG(arg30);
2065 FAKE_VARARG_HANDLE_ARG(arg31);
2066 FAKE_VARARG_HANDLE_ARG(arg32);
2067 FAKE_VARARG_HANDLE_ARG(arg33);
2068 # undef FAKE_VARARG_HANDLE_ARG
2069 return current_arg_index;
2072 void OpenCLDevice::release_kernel_safe(cl_kernel kernel)
2075 clReleaseKernel(kernel);
2079 void OpenCLDevice::release_mem_object_safe(cl_mem mem)
2082 clReleaseMemObject(mem);
2086 void OpenCLDevice::release_program_safe(cl_program
program)
2095 cl_program OpenCLDevice::load_cached_kernel(ustring key,
thread_scoped_lock &cache_locker)
2097 return OpenCLCache::get_program(cpPlatform, cdDevice, key, cache_locker);
2100 void OpenCLDevice::store_cached_kernel(cl_program
program,
2104 OpenCLCache::store_program(cpPlatform, cdDevice,
program, key, cache_locker);
2112 return new OpenCLDevice(info, stats, profiler, background);
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint * textures
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
void run_denoising(RenderTile &tile)
struct DenoisingTask::RenderBuffers render_buffer
struct DenoisingTask::DeviceFunctions functions
struct DenoisingTask::DenoiseBuffers buffer
bool use_integrator_branched
bool use_background_light
bool use_patch_evaluation
bool use_true_displacement
string get_build_options() const
virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, RenderTile &rtile, int num_global_elements, device_memory &kernel_globals, device_memory &kernel_data_, device_memory &split_data, device_memory &ray_state, device_memory &queue_index, device_memory &use_queues_flag, device_memory &work_pool_wgs)=0
virtual SplitKernelFunction * get_split_kernel_function(const string &kernel_name, const DeviceRequestedFeatures &)=0
size_t max_elements_for_max_buffer_size(device_memory &kg, device_memory &data, uint64_t max_buffer_size)
virtual int2 split_kernel_local_size()=0
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads)=0
virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task)=0
void append(const uint8_t *data, int size)
virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data)=0
void mem_free(size_t size)
void mem_alloc(size_t size)
size_t memory_elements_size(int elements)
device_ptr device_pointer
CCL_NAMESPACE_BEGIN struct Options options
@ DEVICE_KERNEL_USING_FEATURE_KERNEL
ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
#define CCL_NAMESPACE_END
#define make_int4(x, y, z, w)
void KERNEL_FUNCTION_FULL_NAME() data_init(KernelGlobals *kg, ccl_constant KernelData *data, ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, ccl_global int *Queue_index, int queuesize, ccl_global char *use_queues_flag, ccl_global unsigned int *work_pool_wgs, unsigned int num_samples, ccl_global float *buffer)
void KERNEL_FUNCTION_FULL_NAME() shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int filter, int i, int offset, int sample)
CCL_NAMESPACE_BEGIN ccl_device void kernel_data_init(KernelGlobals *kg, ccl_constant KernelData *data, ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, ccl_global int *Queue_index, int queuesize, ccl_global char *use_queues_flag, ccl_global unsigned int *work_pools, unsigned int num_samples, ccl_global float *buffer)
ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg, ShaderData *emission_sd, ccl_addr_space PathState *state, ccl_global float *buffer, ccl_addr_space Ray *ray)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char * use_queues_flag
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int * queue_index
__kernel void ccl_constant KernelData ccl_global void ccl_global char * ray_state
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int * work_pools
static void error(const char *str)
static void sample(SocketReader *reader, int x, int y, float color[4])
ListBase threads
list of all thread for every CPUDevice in cpudevices a thread exists.
struct blender::compositor::@172::@174 task
struct blender::compositor::@172::@175 opencl
static int bake(const BakeAPIRender *bkr, Object *ob_low, const ListBase *selected_objects, ReportList *reports)
unsigned __int64 uint64_t
function< bool(device_ptr a_ptr, device_ptr b_ptr, device_ptr mean_ptr, device_ptr variance_ptr, int r, int4 rect)> combine_halves
function< bool(device_ptr image_ptr, device_ptr variance_ptr, device_ptr depth_ptr, device_ptr output_ptr)> detect_outliers
function< bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature
function< bool(device_ptr output_ptr)> solve
function< bool(device_ptr a_ptr, device_ptr b_ptr, device_ptr sample_variance_ptr, device_ptr sv_variance_ptr, device_ptr buffer_variance_ptr)> divide_shadow
function< bool()> construct_transform
function< bool(int mean_offset, int variance_offset, device_ptr mean_ptr, device_ptr variance_ptr, float scale)> get_feature
function< bool(device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)> accumulate
function< bool(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr)> non_local_means
#define NODE_GROUP_LEVEL_MAX
#define NODE_FEATURE_VOLUME
void util_aligned_free(void *ptr)
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(size_t size, int alignment)
DebugFlags & DebugFlags()
void path_init(const string &path, const string &user_path)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
bool string_startswith(const string &s, const char *start)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock
ccl_device_inline size_t round_down(size_t x, size_t multiple)