27 static const double alpha = 0.1;
31 split_data(device,
"split_data"),
35 work_pool_wgs(device,
"work_pool_wgs"),
36 kernel_data_initialized(false)
38 avg_time_per_sample = 0.0;
40 kernel_path_init =
NULL;
41 kernel_scene_intersect =
NULL;
42 kernel_lamp_emission =
NULL;
43 kernel_do_volume =
NULL;
44 kernel_queue_enqueue =
NULL;
45 kernel_indirect_background =
NULL;
46 kernel_shader_setup =
NULL;
47 kernel_shader_sort =
NULL;
48 kernel_shader_eval =
NULL;
49 kernel_holdout_emission_blurring_pathtermination_ao =
NULL;
50 kernel_subsurface_scatter =
NULL;
51 kernel_direct_lighting =
NULL;
52 kernel_shadow_blocked_ao =
NULL;
53 kernel_shadow_blocked_dl =
NULL;
54 kernel_enqueue_inactive =
NULL;
55 kernel_next_iteration_setup =
NULL;
56 kernel_indirect_subsurface =
NULL;
57 kernel_buffer_update =
NULL;
58 kernel_adaptive_stopping =
NULL;
59 kernel_adaptive_filter_x =
NULL;
60 kernel_adaptive_filter_y =
NULL;
61 kernel_adaptive_adjust_samples =
NULL;
68 use_queues_flag.
free();
72 delete kernel_path_init;
73 delete kernel_scene_intersect;
74 delete kernel_lamp_emission;
75 delete kernel_do_volume;
76 delete kernel_queue_enqueue;
77 delete kernel_indirect_background;
78 delete kernel_shader_setup;
79 delete kernel_shader_sort;
80 delete kernel_shader_eval;
81 delete kernel_holdout_emission_blurring_pathtermination_ao;
82 delete kernel_subsurface_scatter;
83 delete kernel_direct_lighting;
84 delete kernel_shadow_blocked_ao;
85 delete kernel_shadow_blocked_dl;
86 delete kernel_enqueue_inactive;
87 delete kernel_next_iteration_setup;
88 delete kernel_indirect_subsurface;
89 delete kernel_buffer_update;
90 delete kernel_adaptive_stopping;
91 delete kernel_adaptive_filter_x;
92 delete kernel_adaptive_filter_y;
93 delete kernel_adaptive_adjust_samples;
98 #define LOAD_KERNEL(name) \
99 kernel_##name = get_split_kernel_function(#name, requested_features); \
100 if (!kernel_##name) { \
101 device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
116 LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
133 kernel_data_initialized =
false;
145 return max_buffer_size / size_per_element;
158 if (!kernel_data_initialized) {
159 kernel_data_initialized =
true;
163 local_size[0] = lsize[0];
164 local_size[1] = lsize[1];
172 global_size[0] =
round_up(gsize[0], local_size[0]);
173 global_size[1] =
round_up(gsize[1], local_size[1]);
175 int num_global_elements = global_size[0] * global_size[1];
183 unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
190 ray_state.
alloc(num_global_elements);
194 int num_global_elements = global_size[0] * global_size[1];
196 #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
197 if (device->have_error()) { \
200 if (!kernel_##name->enqueue( \
201 KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
208 int time_multiplier = 1;
215 const int initial_num_samples = 1;
217 const int samples_per_second = (avg_time_per_sample > 0.0) ?
218 int(
double(time_multiplier) / avg_time_per_sample) + 1 :
225 if (
task.adaptive_sampling.use) {
260 bool activeRaysAvailable =
true;
261 double cancel_time = DBL_MAX;
263 while (activeRaysAvailable) {
265 for (
int PathIter = 0; PathIter < 16; PathIter++) {
268 if (kernel_do_volume) {
277 holdout_emission_blurring_pathtermination_ao, global_size, local_size);
289 if (
task.get_cancel() && cancel_time == DBL_MAX) {
293 cancel_time =
time_dt() + 2.0 * time_multiplier;
304 activeRaysAvailable =
false;
306 for (
int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
310 device->
set_error(
"Split kernel error: invalid ray state");
315 activeRaysAvailable =
true;
326 if (
task.adaptive_sampling.use &&
task.adaptive_sampling.need_filter(filter_sample)) {
327 size_t buffer_size[2];
328 buffer_size[0] =
round_up(tile.
w, local_size[0]);
329 buffer_size[1] =
round_up(tile.
h, local_size[1]);
330 kernel_adaptive_stopping->
enqueue(
332 buffer_size[0] =
round_up(tile.
h, local_size[0]);
333 buffer_size[1] =
round_up(1, local_size[1]);
334 kernel_adaptive_filter_x->
enqueue(
336 buffer_size[0] =
round_up(tile.
w, local_size[0]);
337 buffer_size[1] =
round_up(1, local_size[1]);
338 kernel_adaptive_filter_y->
enqueue(
344 if (avg_time_per_sample == 0.0) {
346 avg_time_per_sample = time_per_sample;
349 avg_time_per_sample =
alpha * time_per_sample + (1.0 -
alpha) * avg_time_per_sample;
352 #undef ENQUEUE_SPLIT_KERNEL
357 time_multiplier =
min(time_multiplier << 1, 10);
359 if (
task.get_cancel()) {
364 if (
task.adaptive_sampling.use) {
379 size_t buffer_size[2];
380 buffer_size[0] =
round_up(tile.
w, local_size[0]);
381 buffer_size[1] =
round_up(tile.
h, local_size[1]);
382 kernel_adaptive_adjust_samples->
enqueue(
virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, RenderTile &rtile, int num_global_elements, device_memory &kernel_globals, device_memory &kernel_data_, device_memory &split_data, device_memory &ray_state, device_memory &queue_index, device_memory &use_queues_flag, device_memory &work_pool_wgs)=0
size_t max_elements_for_max_buffer_size(device_memory &kg, device_memory &data, uint64_t max_buffer_size)
virtual int2 split_kernel_local_size()=0
bool load_kernels(const DeviceRequestedFeatures &requested_features)
bool path_trace(DeviceTask &task, RenderTile &rtile, device_memory &kgbuffer, device_memory &kernel_data)
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads)=0
DeviceSplitKernel(Device *device)
virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task)=0
virtual ~DeviceSplitKernel()
virtual void set_error(const string &error)
virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data)=0
void alloc_to_device(size_t num, bool shrink_to_fit=true)
T * alloc(size_t width, size_t height=0, size_t depth=0)
#define LOAD_KERNEL(name)
static CCL_NAMESPACE_BEGIN const double alpha
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size)
ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
#define CCL_NAMESPACE_END
ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg, ShaderData *emission_sd, ccl_addr_space PathState *state, ccl_global float *buffer, ccl_addr_space Ray *ray)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char * use_queues_flag
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int * queue_index
__kernel void ccl_constant KernelData ccl_global void ccl_global char * ray_state
#define WORK_POOL_SIZE_GPU
#define IS_STATE(ray_state, ray_index, state)
#define WORK_POOL_SIZE_CPU
struct blender::compositor::@172::@174 task
unsigned __int64 uint64_t
void path_init(const string &path, const string &user_path)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
CCL_NAMESPACE_BEGIN double time_dt()
ccl_device_inline size_t round_up(size_t x, size_t multiple)