14# include <sycl/sycl.hpp>
24static OneAPIErrorCallback s_error_cb =
nullptr;
25static void *s_error_user_ptr =
nullptr;
27# ifdef WITH_EMBREE_GPU
28static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES = (
const RTCFeatureFlags)(
29 RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
30 RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT |
31 RTC_FEATURE_FLAG_MOTION_BLUR);
32static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES = (
const RTCFeatureFlags)(
33 CYCLES_ONEAPI_EMBREE_BASIC_FEATURES | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
34 RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
37void oneapi_set_error_cb(OneAPIErrorCallback cb,
void *user_ptr)
40 s_error_user_ptr = user_ptr;
43size_t oneapi_suggested_gpu_kernel_size(
const DeviceKernel kernel)
76bool oneapi_run_test_kernel(SyclQueue *queue_)
79 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
81 const size_t memory_byte_size =
sizeof(int) *
N;
83 bool is_computation_correct =
true;
85 int *A_host = (
int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
87 for (
size_t i = (
size_t)0;
i <
N;
i++) {
88 A_host[
i] = rand() % 32;
91 int *A_device = (
int *)sycl::malloc_device(memory_byte_size, *queue);
92 int *B_device = (
int *)sycl::malloc_device(memory_byte_size, *queue);
94 queue->memcpy(A_device, A_host, memory_byte_size);
95 queue->wait_and_throw();
97 queue->submit([&](sycl::handler &cgh) {
98 cgh.parallel_for(
N, [=](sycl::id<1> idx) { B_device[idx] = A_device[idx] + idx.get(0); });
100 queue->wait_and_throw();
102 int *B_host = (
int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
104 queue->memcpy(B_host, B_device, memory_byte_size);
105 queue->wait_and_throw();
107 for (
size_t i = (
size_t)0;
i <
N;
i++) {
108 const int expected_result =
i + A_host[
i];
109 if (B_host[
i] != expected_result) {
110 is_computation_correct =
false;
112 s_error_cb((
"Incorrect result in test kernel execution - expected " +
113 std::to_string(expected_result) +
", got " + std::to_string(B_host[
i]))
120 sycl::free(A_host, *queue);
121 sycl::free(B_host, *queue);
122 sycl::free(A_device, *queue);
123 sycl::free(B_device, *queue);
124 queue->wait_and_throw();
126 catch (
const sycl::exception &
e) {
128 s_error_cb(
e.what(), s_error_user_ptr);
133 return is_computation_correct;
136bool oneapi_zero_memory_on_device(SyclQueue *queue_,
void *device_pointer,
const size_t num_bytes)
139 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
141 queue->memset(device_pointer, 0, num_bytes);
142 queue->wait_and_throw();
145 catch (
const sycl::exception &
e) {
147 s_error_cb(
e.what(), s_error_user_ptr);
153bool oneapi_kernel_is_required_for_features(
const std::string &kernel_name,
154 const uint kernel_features)
157 if (kernel_name.find(
"oneapi_kernel_") == std::string::npos) {
184 std::string::npos) ||
186 std::string::npos) ||
188 std::string::npos) ||
198bool oneapi_kernel_is_compatible_with_hardware_raytracing(
const std::string &kernel_name)
202# if defined(RTC_VERSION) && RTC_VERSION < 40100
204 std::string::npos) &&
212bool oneapi_kernel_has_intersections(
const std::string &kernel_name)
225bool oneapi_load_kernels(SyclQueue *queue_,
226 const uint kernel_features,
227 bool use_hardware_raytracing)
230 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(queue_);
232# ifdef WITH_EMBREE_GPU
234 if (use_hardware_raytracing) {
236 sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
237 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
238 {queue->get_device()});
240 for (
const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
241 const std::string &kernel_name = kernel_id.get_name();
243 if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
244 !(oneapi_kernel_has_intersections(kernel_name) &&
245 oneapi_kernel_is_compatible_with_hardware_raytracing(kernel_name)))
250 sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
251 sycl::get_kernel_bundle<sycl::bundle_state::input>(
252 queue->get_context(), {queue->get_device()}, {kernel_id});
256 one_kernel_bundle_input
257 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
258 CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
259 sycl::build(one_kernel_bundle_input);
262 one_kernel_bundle_input
263 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
264 CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
265 sycl::build(one_kernel_bundle_input);
269 catch (
const sycl::exception &
e) {
271 s_error_cb(
e.what(), s_error_user_ptr);
279 sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
280 sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
281 {queue->get_device()});
283 for (
const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
284 const std::string &kernel_name = kernel_id.get_name();
288 if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
289 (use_hardware_raytracing && oneapi_kernel_has_intersections(kernel_name) &&
290 oneapi_kernel_is_compatible_with_hardware_raytracing(kernel_name)))
295# ifdef WITH_EMBREE_GPU
296 if (oneapi_kernel_has_intersections(kernel_name)) {
297 sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
298 sycl::get_kernel_bundle<sycl::bundle_state::input>(
299 queue->get_context(), {queue->get_device()}, {kernel_id});
300 one_kernel_bundle_input
301 .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
302 RTC_FEATURE_FLAG_NONE);
303 sycl::build(one_kernel_bundle_input);
309 (void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(
310 queue->get_context(), {queue->get_device()}, {kernel_id});
313 catch (
const sycl::exception &
e) {
315 s_error_cb(
e.what(), s_error_user_ptr);
322bool oneapi_enqueue_kernel(KernelContext *kernel_context,
324 const size_t global_size,
325 const size_t local_size,
326 const uint kernel_features,
327 bool use_hardware_raytracing,
333 sycl::queue *queue =
reinterpret_cast<sycl::queue *
>(kernel_context->queue);
341# pragma warning(error : 4062)
342# elif defined(__GNUC__)
343# pragma GCC diagnostic push
344# pragma GCC diagnostic error "-Wswitch"
352 max_shaders = (kernel_context->scene_max_shaders);
356 queue->submit([&](sycl::handler &cgh) {
357# ifdef WITH_EMBREE_GPU
361 const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
362 RTC_FEATURE_FLAG_NONE :
364 CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
365 CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
366 cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
367 used_embree_features);
370 (void)kernel_features;
372 switch (device_kernel) {
374 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
379 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_camera);
384 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_init_from_bake);
389 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_closest);
394 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_intersect_shadow);
403 oneapi_kernel_integrator_intersect_subsurface);
412 oneapi_kernel_integrator_intersect_volume_stack);
421 oneapi_kernel_integrator_intersect_dedicated_light);
426 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_background);
431 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_light);
436 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_shadow);
441 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface);
450 oneapi_kernel_integrator_shade_surface_raytrace);
455 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface_mnee);
460 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_volume);
469 oneapi_kernel_integrator_shade_dedicated_light);
474 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_queued_paths_array);
483 oneapi_kernel_integrator_queued_shadow_paths_array);
488 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_active_paths_array);
497 oneapi_kernel_integrator_terminated_paths_array);
506 oneapi_kernel_integrator_terminated_shadow_paths_array);
511 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_sorted_paths_array);
515 sycl::local_accessor<int> local_mem(max_shaders, cgh);
516 oneapi_kernel_integrator_sort_bucket_pass(kg,
529 sycl::local_accessor<int> local_mem(max_shaders, cgh);
530 oneapi_kernel_integrator_sort_write_pass(kg,
548 oneapi_kernel_integrator_compact_paths_array);
557 oneapi_kernel_integrator_compact_shadow_paths_array);
566 oneapi_kernel_adaptive_sampling_convergence_check);
571 kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_x);
576 kg, cgh, global_size, local_size, args, oneapi_kernel_adaptive_sampling_filter_y);
580 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_displace);
585 kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_background);
594 oneapi_kernel_shader_eval_curve_shadow_transparency);
598 oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum);
603 # define DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \
604 case DEVICE_KERNEL_FILM_CONVERT_##VARIANT: { \
605 oneapi_call(kg, cgh, \
609 oneapi_kernel_film_convert_##variant); \
613# define DEVICE_KERNEL_FILM_CONVERT(variant, VARIANT) \
614 DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \
615 DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT##_HALF_RGBA, variant##_half_rgba)
617 DEVICE_KERNEL_FILM_CONVERT(depth, DEPTH);
618 DEVICE_KERNEL_FILM_CONVERT(mist, MIST);
619 DEVICE_KERNEL_FILM_CONVERT(sample_count, SAMPLE_COUNT);
620 DEVICE_KERNEL_FILM_CONVERT(
float,
FLOAT);
621 DEVICE_KERNEL_FILM_CONVERT(light_path, LIGHT_PATH);
623 DEVICE_KERNEL_FILM_CONVERT(motion, MOTION);
624 DEVICE_KERNEL_FILM_CONVERT(cryptomatte, CRYPTOMATTE);
625 DEVICE_KERNEL_FILM_CONVERT(shadow_catcher, SHADOW_CATCHER);
626 DEVICE_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow,
627 SHADOW_CATCHER_MATTE_WITH_SHADOW);
628 DEVICE_KERNEL_FILM_CONVERT(combined, COMBINED);
631# undef DEVICE_KERNEL_FILM_CONVERT
632# undef DEVICE_KERNEL_FILM_CONVERT_PARTIAL
637 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_guiding_preprocess);
646 oneapi_kernel_filter_guiding_set_fake_albedo);
651 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_preprocess);
656 kg, cgh, global_size, local_size, args, oneapi_kernel_filter_color_postprocess);
661 kg, cgh, global_size, local_size, args, oneapi_kernel_cryptomatte_postprocess);
666 kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_compact_states);
675 oneapi_kernel_integrator_compact_shadow_states);
684 oneapi_kernel_integrator_shadow_catcher_count_possible_splits);
695 catch (
const sycl::exception &
e) {
697 s_error_cb(
e.what(), s_error_user_ptr);
703# pragma warning(default : 4062)
704# elif defined(__GNUC__)
705# pragma GCC diagnostic pop
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
#define kernel_assert(cond)
#define KERNEL_FEATURE_VOLUME
#define KERNEL_FEATURE_PATH_TRACING
#define KERNEL_FEATURE_HAIR
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_BAKING
#define KERNEL_FEATURE_MNEE
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
VecBase< float, 4 > float4
#define assert(assertion)
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK
@ DEVICE_KERNEL_INTEGRATOR_RESET
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y
@ DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X
@ DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
@ DEVICE_KERNEL_PREFIX_SUM
#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_SORT_BLOCK_SIZE