26# define __KERNEL_OPTIX__
31static void execute_optix_task(
TaskPool &pool, OptixTask task, OptixResult &failure_reason)
33 OptixTask additional_tasks[16];
34 unsigned int num_additional_tasks = 0;
36 const OptixResult
result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks);
37 if (
result == OPTIX_SUCCESS) {
38 for (
unsigned int i = 0;
i < num_additional_tasks; ++
i) {
39 pool.
push([&pool, additional_task = additional_tasks[
i], &failure_reason] {
40 execute_optix_task(pool, additional_task, failure_reason);
50 : CUDADevice(info, stats, profiler, headless),
55 launch_params(
this,
"kernel_params",
false)
62 const CUDAContextScope scope(
this);
65 OptixDeviceContextOptions
options = {};
66# ifdef WITH_CYCLES_LOGGING
68 options.logCallbackFunction = [](
unsigned int level,
const char *,
const char *message,
void *) {
89 options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
91 optix_assert(optixDeviceContextCreate(cuContext, &
options, &context));
92# ifdef WITH_CYCLES_LOGGING
93 optix_assert(optixDeviceContextSetLogCallback(
101 launch_params.alloc_to_device(1);
104OptiXDevice::~OptiXDevice()
107 const CUDAContextScope scope(
this);
109 free_bvh_memory_delayed();
113 launch_params.free();
116 if (optix_module !=
nullptr) {
117 optixModuleDestroy(optix_module);
119 for (
int i = 0;
i < 2; ++
i) {
120 if (builtin_modules[
i] !=
nullptr) {
121 optixModuleDestroy(builtin_modules[
i]);
124 for (
int i = 0;
i < NUM_PIPELINES; ++
i) {
125 if (pipelines[
i] !=
nullptr) {
126 optixPipelineDestroy(pipelines[
i]);
129 for (
int i = 0;
i < NUM_PROGRAM_GROUPS; ++
i) {
130 if (groups[
i] !=
nullptr) {
131 optixProgramGroupDestroy(groups[
i]);
136 if (osl_camera_module !=
nullptr) {
137 optixModuleDestroy(osl_camera_module);
139 for (
const OptixModule &
module : osl_modules) {
141 optixModuleDestroy(
module);
144 for (
const OptixProgramGroup &group : osl_groups) {
145 if (group !=
nullptr) {
146 optixProgramGroupDestroy(group);
149 osl_colorsystem.free();
152 optixDeviceContextDestroy(context);
157 return make_unique<OptiXDeviceQueue>(
this);
166static string get_optix_include_dir()
168 const char *env_dir = getenv(
"OPTIX_ROOT_DIR");
169 const char *default_dir = CYCLES_RUNTIME_OPTIX_ROOT_DIR;
171 if (env_dir && env_dir[0]) {
172 const string env_include_dir =
path_join(env_dir,
"include");
173 return env_include_dir;
175 if (default_dir[0]) {
176 const string default_include_dir =
path_join(default_dir,
"include");
177 return default_include_dir;
183string OptiXDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
185 string common_cflags = CUDADevice::compile_kernel_get_common_cflags(kernel_features);
188 common_cflags +=
string_printf(
" -I\"%s\"", get_optix_include_dir().c_str());
192 common_cflags +=
" --keep-device-functions";
195 return common_cflags;
198void OptiXDevice::create_optix_module(
TaskPool &pool,
199 OptixModuleCompileOptions &module_options,
204 OptixTask task =
nullptr;
205 result = optixModuleCreateWithTasks(context,
214 if (
result == OPTIX_SUCCESS) {
215 execute_optix_task(pool, task,
result);
219bool OptiXDevice::load_kernels(
const uint kernel_features)
231 const bool use_osl_shading =
false;
232 const bool use_osl_camera =
false;
236 const bool need_optix_kernels = (kernel_features &
242 string suffix = use_osl_shading ?
"_osl" :
247 if (need_optix_kernels) {
248 ptx_filename =
path_get(
"lib/kernel_optix" + suffix +
".ptx.zst");
249 if (use_adaptive_compilation() ||
path_file_size(ptx_filename) == -1) {
250 std::string optix_include_dir = get_optix_include_dir();
251 if (optix_include_dir.empty()) {
253 "Unable to compile OptiX kernels at runtime. Set OPTIX_ROOT_DIR environment variable "
254 "to a directory containing the OptiX SDK.");
259 "OptiX headers not found at %s, unable to compile OptiX kernels at runtime. Install "
260 "OptiX SDK in the specified location, or set OPTIX_ROOT_DIR environment variable to a "
261 "directory containing the OptiX SDK.",
262 optix_include_dir.c_str()));
269 if (!CUDADevice::load_kernels(kernel_features)) {
273 if (!need_optix_kernels) {
277 const CUDAContextScope scope(
this);
280 if (optix_module !=
nullptr) {
281 optixModuleDestroy(optix_module);
282 optix_module =
nullptr;
284 for (
int i = 0;
i < 2; ++
i) {
285 if (builtin_modules[
i] !=
nullptr) {
286 optixModuleDestroy(builtin_modules[
i]);
287 builtin_modules[
i] =
nullptr;
290 for (
int i = 0;
i < NUM_PIPELINES; ++
i) {
291 if (pipelines[
i] !=
nullptr) {
292 optixPipelineDestroy(pipelines[
i]);
293 pipelines[
i] =
nullptr;
296 for (
int i = 0;
i < NUM_PROGRAM_GROUPS; ++
i) {
297 if (groups[
i] !=
nullptr) {
298 optixProgramGroupDestroy(groups[
i]);
304 if (osl_camera_module !=
nullptr) {
305 optixModuleDestroy(osl_camera_module);
306 osl_camera_module =
nullptr;
310 for (
const OptixModule &
module : osl_modules) {
312 optixModuleDestroy(
module);
317 for (
const OptixProgramGroup &group : osl_groups) {
318 if (group !=
nullptr) {
319 optixProgramGroupDestroy(group);
325 OptixModuleCompileOptions module_options = {};
326 module_options.maxRegisterCount = 0;
329 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
330 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
333 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
334 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
337 module_options.boundValues =
nullptr;
338 module_options.numBoundValues = 0;
339 module_options.payloadTypes =
nullptr;
340 module_options.numPayloadTypes = 0;
343 pipeline_options.usesMotionBlur =
false;
344 pipeline_options.traversableGraphFlags =
345 OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
346 pipeline_options.numPayloadValues = 8;
347 pipeline_options.numAttributeValues = 2;
348 pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
349 pipeline_options.pipelineLaunchParamsVariableName =
"kernel_params";
351 pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
354 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
357 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
361 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
368 pipeline_options.usesMotionBlur =
true;
371 pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
376 if (use_adaptive_compilation() ||
path_file_size(ptx_filename) == -1) {
377 string cflags = compile_kernel_get_common_cflags(kernel_features);
378 ptx_filename = compile_kernel(cflags, (
"kernel" + suffix).c_str(),
"optix",
true);
381 set_error(
string_printf(
"Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
387 create_optix_module(pool, module_options, ptx_data, optix_module,
result);
389 if (
result != OPTIX_SUCCESS) {
390 set_error(
string_printf(
"Failed to load OptiX kernel from '%s' (%s)",
391 ptx_filename.c_str(),
392 optixGetErrorName(
result)));
398 OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
399 OptixProgramGroupOptions group_options = {};
400 group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
401 group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.module = optix_module;
402 group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.entryFunctionName =
403 "__raygen__kernel_optix_integrator_intersect_closest";
404 group_descs[PG_RGEN_INTERSECT_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
405 group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.module = optix_module;
406 group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.entryFunctionName =
407 "__raygen__kernel_optix_integrator_intersect_shadow";
408 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
409 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.module = optix_module;
410 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.entryFunctionName =
411 "__raygen__kernel_optix_integrator_intersect_subsurface";
412 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
413 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.module = optix_module;
414 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.entryFunctionName =
415 "__raygen__kernel_optix_integrator_intersect_volume_stack";
416 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
417 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].raygen.module = optix_module;
418 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].raygen.entryFunctionName =
419 "__raygen__kernel_optix_integrator_intersect_dedicated_light";
420 group_descs[PG_MISS].kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
421 group_descs[PG_MISS].miss.module = optix_module;
422 group_descs[PG_MISS].miss.entryFunctionName =
"__miss__kernel_optix_miss";
423 group_descs[PG_HITD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
424 group_descs[PG_HITD].hitgroup.moduleCH = optix_module;
425 group_descs[PG_HITD].hitgroup.entryFunctionNameCH =
"__closesthit__kernel_optix_hit";
426 group_descs[PG_HITD].hitgroup.moduleAH = optix_module;
427 group_descs[PG_HITD].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_visibility_test";
428 group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
429 group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
430 group_descs[PG_HITS].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_shadow_all_hit";
431 group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
432 group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
433 group_descs[PG_HITV].hitgroup.entryFunctionNameCH =
"__closesthit__kernel_optix_hit";
434 group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
435 group_descs[PG_HITV].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_volume_test";
437 OptixProgramGroupDesc ignore_desc = {};
438 ignore_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
439 ignore_desc.hitgroup.moduleCH = optix_module;
440 ignore_desc.hitgroup.entryFunctionNameCH =
"__closesthit__kernel_optix_ignore";
441 ignore_desc.hitgroup.moduleAH = optix_module;
442 ignore_desc.hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_ignore";
447 OptixBuiltinISOptions builtin_options = {};
448 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
449 builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
450 OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
451 OPTIX_BUILD_FLAG_ALLOW_UPDATE;
452 builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT;
453 builtin_options.usesMotionBlur =
false;
455 optix_assert(optixBuiltinISModuleGet(
456 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
458 group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
459 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
nullptr;
460 group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
461 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
nullptr;
463 if (pipeline_options.usesMotionBlur) {
464 builtin_options.usesMotionBlur =
true;
466 optix_assert(optixBuiltinISModuleGet(
467 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
469 group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
470 group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
471 group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
472 group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
473 group_descs[PG_HITV_MOTION] = ignore_desc;
474 group_descs[PG_HITL_MOTION] = ignore_desc;
479 group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
480 group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
481 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
"__intersection__curve_ribbon";
482 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
"__intersection__curve_ribbon";
487 group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
488 group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
489 group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
490 group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS =
"__intersection__point";
491 group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
492 group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
493 group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
494 group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS =
"__intersection__point";
495 group_descs[PG_HITV_POINTCLOUD] = ignore_desc;
496 group_descs[PG_HITL_POINTCLOUD] = ignore_desc;
501 group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
502 group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
503 group_descs[PG_HITL].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_local_hit";
508 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
509 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
510 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName =
511 "__raygen__kernel_optix_integrator_shade_surface_raytrace";
515 if (!use_osl_shading) {
516 group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
517 group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
518 group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC =
"__direct_callable__svm_node_ao";
519 group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
520 group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
521 group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
522 "__direct_callable__svm_node_bevel";
527 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
528 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
529 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
530 "__raygen__kernel_optix_integrator_shade_surface_mnee";
534 if (use_osl_shading) {
535 group_descs[PG_RGEN_SHADE_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
536 group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.module = optix_module;
537 group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.entryFunctionName =
538 "__raygen__kernel_optix_integrator_shade_background";
539 group_descs[PG_RGEN_SHADE_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
540 group_descs[PG_RGEN_SHADE_LIGHT].raygen.module = optix_module;
541 group_descs[PG_RGEN_SHADE_LIGHT].raygen.entryFunctionName =
542 "__raygen__kernel_optix_integrator_shade_light";
543 group_descs[PG_RGEN_SHADE_SURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
544 group_descs[PG_RGEN_SHADE_SURFACE].raygen.module = optix_module;
545 group_descs[PG_RGEN_SHADE_SURFACE].raygen.entryFunctionName =
546 "__raygen__kernel_optix_integrator_shade_surface";
547 group_descs[PG_RGEN_SHADE_VOLUME].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
548 group_descs[PG_RGEN_SHADE_VOLUME].raygen.module = optix_module;
549 group_descs[PG_RGEN_SHADE_VOLUME].raygen.entryFunctionName =
550 "__raygen__kernel_optix_integrator_shade_volume";
551 group_descs[PG_RGEN_SHADE_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
552 group_descs[PG_RGEN_SHADE_SHADOW].raygen.module = optix_module;
553 group_descs[PG_RGEN_SHADE_SHADOW].raygen.entryFunctionName =
554 "__raygen__kernel_optix_integrator_shade_shadow";
555 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
556 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].raygen.module = optix_module;
557 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].raygen.entryFunctionName =
558 "__raygen__kernel_optix_integrator_shade_dedicated_light";
559 group_descs[PG_RGEN_EVAL_DISPLACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
560 group_descs[PG_RGEN_EVAL_DISPLACE].raygen.module = optix_module;
561 group_descs[PG_RGEN_EVAL_DISPLACE].raygen.entryFunctionName =
562 "__raygen__kernel_optix_shader_eval_displace";
563 group_descs[PG_RGEN_EVAL_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
564 group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.module = optix_module;
565 group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.entryFunctionName =
566 "__raygen__kernel_optix_shader_eval_background";
567 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
568 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module;
569 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName =
570 "__raygen__kernel_optix_shader_eval_curve_shadow_transparency";
575 if (use_osl_camera) {
577 string ptx_data, ptx_filename =
path_get(
"lib/kernel_optix_osl_camera.ptx.zst");
580 string_printf(
"Failed to load OptiX OSL camera kernel from '%s'", ptx_filename.c_str()));
586 create_optix_module(pool, module_options, ptx_data, osl_camera_module,
result);
588 if (
result != OPTIX_SUCCESS) {
589 set_error(
string_printf(
"Failed to load OptiX kernel from '%s' (%s)",
590 ptx_filename.c_str(),
591 optixGetErrorName(
result)));
595 group_descs[PG_RGEN_INIT_FROM_CAMERA].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
596 group_descs[PG_RGEN_INIT_FROM_CAMERA].raygen.module = osl_camera_module;
597 group_descs[PG_RGEN_INIT_FROM_CAMERA].raygen.entryFunctionName =
598 "__raygen__kernel_optix_integrator_init_from_camera";
602 optix_assert(optixProgramGroupCreate(
603 context, group_descs, NUM_PROGRAM_GROUPS, &group_options,
nullptr,
nullptr, groups));
606 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
608 sbt_data.alloc(NUM_PROGRAM_GROUPS);
609 memset(sbt_data.host_pointer, 0,
sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
610 for (
int i = 0;
i < NUM_PROGRAM_GROUPS; ++
i) {
611 optix_assert(optixSbtRecordPackHeader(groups[
i], &sbt_data[
i]));
612 optix_assert(optixProgramGroupGetStackSize(groups[
i], &stack_size[
i],
nullptr));
614 sbt_data.copy_to_device();
617 unsigned int trace_css = stack_size[PG_HITD].cssCH;
619 trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
620 trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
621 trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
622 trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
623 trace_css = std::max(trace_css,
624 stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
625 trace_css = std::max(trace_css,
626 stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
627 trace_css = std::max(
628 trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
629 trace_css = std::max(
630 trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
632 OptixPipelineLinkOptions link_options = {};
633 link_options.maxTraceDepth = 1;
635 if (use_osl_shading || use_osl_camera) {
641 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
643 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
644 pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
645 pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
648 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
650 pipeline_groups.push_back(groups[PG_MISS]);
651 pipeline_groups.push_back(groups[PG_HITD]);
652 pipeline_groups.push_back(groups[PG_HITS]);
653 pipeline_groups.push_back(groups[PG_HITL]);
654 pipeline_groups.push_back(groups[PG_HITV]);
655 if (pipeline_options.usesMotionBlur) {
656 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
657 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
658 pipeline_groups.push_back(groups[PG_HITV_MOTION]);
659 pipeline_groups.push_back(groups[PG_HITL_MOTION]);
662 pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
663 pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
664 pipeline_groups.push_back(groups[PG_HITV_POINTCLOUD]);
665 pipeline_groups.push_back(groups[PG_HITL_POINTCLOUD]);
668 optix_assert(optixPipelineCreate(context,
671 pipeline_groups.data(),
672 pipeline_groups.size(),
675 &pipelines[PIP_SHADE]));
678 const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
679 stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG) +
680 link_options.maxTraceDepth * trace_css;
681 const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
682 stack_size[PG_CALL_SVM_BEVEL].dssDC);
685 optix_assert(optixPipelineSetStackSize(
686 pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
691 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
692 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]);
693 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]);
694 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]);
695 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]);
696 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_DEDICATED_LIGHT]);
697 pipeline_groups.push_back(groups[PG_MISS]);
698 pipeline_groups.push_back(groups[PG_HITD]);
699 pipeline_groups.push_back(groups[PG_HITS]);
700 pipeline_groups.push_back(groups[PG_HITL]);
701 pipeline_groups.push_back(groups[PG_HITV]);
702 if (pipeline_options.usesMotionBlur) {
703 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
704 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
707 pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
708 pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
711 optix_assert(optixPipelineCreate(context,
714 pipeline_groups.data(),
715 pipeline_groups.size(),
718 &pipelines[PIP_INTERSECT]));
721 const unsigned int css =
722 std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG,
723 std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG,
724 std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG,
725 stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) +
726 link_options.maxTraceDepth * trace_css;
728 optix_assert(optixPipelineSetStackSize(
729 pipelines[PIP_INTERSECT], 0, 0, css, pipeline_options.usesMotionBlur ? 3 : 2));
732 return !have_error();
735bool OptiXDevice::load_osl_kernels()
747 auto get_osl_kernel = [&](
const OSL::ShaderGroupRef &group) {
751 string osl_ptx, fused_name;
752 osl_globals.ss->getattribute(group.get(),
"group_fused_name", fused_name);
753 osl_globals.ss->getattribute(
754 group.get(),
"ptx_compiled_version", OSL::TypeDesc::PTR, &osl_ptx);
756 int groupdata_size = 0;
757 osl_globals.ss->getattribute(group.get(),
"llvm_groupdata_size", groupdata_size);
758 if (groupdata_size == 0) {
760 osl_globals.ss->getattribute(group.get(),
"groupdata_size", groupdata_size);
762 if (groupdata_size > 2048) {
764 string_printf(
"Requested OSL group data size (%d) is greater than the maximum "
765 "supported with OptiX (2048)",
770 return OSLKernel{std::move(osl_ptx), std::move(fused_name)};
776 osl_kernels.emplace_back(get_osl_kernel(osl_globals.camera_state));
777 for (
const OSL::ShaderGroupRef &group : osl_globals.surface_state) {
778 osl_kernels.emplace_back(get_osl_kernel(group));
780 for (
const OSL::ShaderGroupRef &group : osl_globals.volume_state) {
781 osl_kernels.emplace_back(get_osl_kernel(group));
783 for (
const OSL::ShaderGroupRef &group : osl_globals.displacement_state) {
784 osl_kernels.emplace_back(get_osl_kernel(group));
786 for (
const OSL::ShaderGroupRef &group : osl_globals.bump_state) {
787 osl_kernels.emplace_back(get_osl_kernel(group));
794 const CUDAContextScope scope(
this);
796 if (pipelines[PIP_SHADE]) {
797 optixPipelineDestroy(pipelines[PIP_SHADE]);
800 for (OptixModule &
module : osl_modules) {
802 optixModuleDestroy(
module);
806 for (OptixProgramGroup &group : osl_groups) {
807 if (group !=
nullptr) {
808 optixProgramGroupDestroy(group);
815 if (osl_kernels.size() == 1 && osl_kernels[0].ptx.empty()) {
819 OptixProgramGroupOptions group_options = {};
820 OptixModuleCompileOptions module_options = {};
821 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
822 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
826 size_t id_osl_services = osl_kernels.size();
827 size_t id_osl_shadeops = osl_kernels.size() + 1;
828 osl_groups.resize(osl_kernels.size() + 2);
829 osl_modules.resize(osl_kernels.size() + 2);
832 string osl_services_ptx, ptx_filename =
path_get(
"lib/kernel_optix_osl_services.ptx.zst");
834 set_error(
string_printf(
"Failed to load OptiX OSL services kernel from '%s'",
835 ptx_filename.c_str()));
839 const char *shadeops_ptx_ptr =
nullptr;
840 osl_globals.ss->getattribute(
"shadeops_cuda_ptx", OSL::TypeDesc::PTR, &shadeops_ptx_ptr);
841 int shadeops_ptx_size = 0;
842 osl_globals.ss->getattribute(
"shadeops_cuda_ptx_size", OSL::TypeDesc::INT, &shadeops_ptx_size);
843 string shadeops_ptx(shadeops_ptx_ptr, shadeops_ptx_size);
846 OptixResult services_result, shadeops_result;
848 pool, module_options, osl_services_ptx, osl_modules[id_osl_services], services_result);
850 pool, module_options, shadeops_ptx, osl_modules[id_osl_shadeops], shadeops_result);
854 if (services_result != OPTIX_SUCCESS) {
855 set_error(
string_printf(
"Failed to load OptiX OSL services kernel from '%s' (%s)",
856 ptx_filename.c_str(),
857 optixGetErrorName(services_result)));
860 OptixProgramGroupDesc group_desc = {};
861 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
862 group_desc.callables.entryFunctionNameDC =
"__direct_callable__dummy_services";
863 group_desc.callables.moduleDC = osl_modules[id_osl_services];
865 optix_assert(optixProgramGroupCreate(context,
871 &osl_groups[id_osl_services]));
875 if (shadeops_result != OPTIX_SUCCESS) {
876 set_error(
string_printf(
"Failed to load OptiX OSL shadeops kernel (%s)",
877 optixGetErrorName(shadeops_result)));
880 OptixProgramGroupDesc group_desc = {};
881 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
882 group_desc.callables.entryFunctionNameDC =
"__direct_callable__dummy_shadeops";
883 group_desc.callables.moduleDC = osl_modules[id_osl_shadeops];
885 optix_assert(optixProgramGroupCreate(context,
891 &osl_groups[id_osl_shadeops]));
898 for (
size_t i = 0;
i < osl_kernels.size(); ++
i) {
899 if (osl_kernels[
i].ptx.empty()) {
903 create_optix_module(pool, module_options, osl_kernels[
i].ptx, osl_modules[
i], results[
i]);
908 for (
size_t i = 0;
i < osl_kernels.size(); ++
i) {
909 if (osl_kernels[
i].ptx.empty()) {
913 if (results[
i] != OPTIX_SUCCESS) {
914 set_error(
string_printf(
"Failed to load OptiX OSL kernel for %s (%s)",
915 osl_kernels[
i].fused_entry.c_str(),
916 optixGetErrorName(results[
i])));
920 OptixProgramGroupDesc group_desc = {};
921 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
922 group_desc.callables.entryFunctionNameDC = osl_kernels[
i].fused_entry.c_str();
923 group_desc.callables.moduleDC = osl_modules[
i];
925 optix_assert(optixProgramGroupCreate(
926 context, &group_desc, 1, &group_options,
nullptr,
nullptr, &osl_groups[
i]));
930 sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
931 for (
int i = 0;
i < NUM_PROGRAM_GROUPS; ++
i) {
932 optix_assert(optixSbtRecordPackHeader(groups[
i], &sbt_data[
i]));
934 for (
size_t i = 0;
i < osl_groups.size(); ++
i) {
935 if (osl_groups[
i] !=
nullptr) {
936 optix_assert(optixSbtRecordPackHeader(osl_groups[
i], &sbt_data[NUM_PROGRAM_GROUPS +
i]));
941 optix_assert(optixSbtRecordPackHeader(osl_groups[id_osl_services],
942 &sbt_data[NUM_PROGRAM_GROUPS +
i]));
945 sbt_data.copy_to_device();
947 OptixPipelineLinkOptions link_options = {};
948 link_options.maxTraceDepth = 0;
952 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
953 pipeline_groups.push_back(groups[PG_RGEN_SHADE_BACKGROUND]);
954 pipeline_groups.push_back(groups[PG_RGEN_SHADE_LIGHT]);
955 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE]);
956 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
957 pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
958 pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
959 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
960 pipeline_groups.push_back(groups[PG_RGEN_SHADE_VOLUME]);
961 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SHADOW]);
962 pipeline_groups.push_back(groups[PG_RGEN_SHADE_DEDICATED_LIGHT]);
963 pipeline_groups.push_back(groups[PG_RGEN_EVAL_DISPLACE]);
964 pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]);
965 pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]);
966 pipeline_groups.push_back(groups[PG_RGEN_INIT_FROM_CAMERA]);
968 for (
const OptixProgramGroup &group : osl_groups) {
969 if (group !=
nullptr) {
970 pipeline_groups.push_back(group);
974 optix_assert(optixPipelineCreate(context,
977 pipeline_groups.data(),
978 pipeline_groups.size(),
981 &pipelines[PIP_SHADE]));
984 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
987 for (
int i = 0;
i < NUM_PROGRAM_GROUPS; ++
i) {
988 optix_assert(optixProgramGroupGetStackSize(groups[
i], &stack_size[
i],
nullptr));
990 for (
size_t i = 0;
i < osl_groups.size(); ++
i) {
991 if (osl_groups[
i] !=
nullptr) {
992 optix_assert(optixProgramGroupGetStackSize(
993 osl_groups[
i], &osl_stack_size[
i], pipelines[PIP_SHADE]));
997 const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
998 stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
999 unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
1000 stack_size[PG_CALL_SVM_BEVEL].dssDC);
1001 for (
unsigned int i = 0;
i < osl_stack_size.size(); ++
i) {
1002 dss = std::max(dss, osl_stack_size[
i].dssDC);
1005 optix_assert(optixPipelineSetStackSize(
1006 pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
1018 size_t cpu_data_sizes[2] = {0, 0};
1019 osl_globals.ss->getattribute(
"colorsystem", OSL::TypeDesc::PTR, &cpu_data);
1020 osl_globals.ss->getattribute(
1021 "colorsystem:sizes",
TypeDesc(TypeDesc::LONGLONG, 2), (
void *)cpu_data_sizes);
1023 size_t cpu_full_size = cpu_data_sizes[0];
1024 size_t num_strings = cpu_data_sizes[1];
1025 size_t fixed_data_size = cpu_full_size -
sizeof(ustringhash) * num_strings;
1028 uint8_t *gpu_data = osl_colorsystem.alloc(fixed_data_size +
sizeof(
size_t) * num_strings);
1031 memcpy(gpu_data, cpu_data, fixed_data_size);
1034 ustringhash *cpu_strings =
reinterpret_cast<ustringhash *
>(cpu_data + fixed_data_size);
1035 size_t *gpu_strings =
reinterpret_cast<size_t *
>(gpu_data + fixed_data_size);
1036 for (
int i = 0;
i < num_strings;
i++) {
1037 gpu_strings[
i] = cpu_strings[
i].hash();
1041 osl_colorsystem.copy_to_device();
1044 &osl_colorsystem.device_pointer,
1048 return !have_error();
1054OSLGlobals *OptiXDevice::get_cpu_osl_memory()
1057 return &osl_globals;
1063bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
1064 OptixBuildOperation operation,
1065 const OptixBuildInput &build_input,
1066 const uint16_t num_motion_steps)
1075 const CUDAContextScope scope(
this);
1080 OptixAccelBufferSizes sizes = {};
1081 OptixAccelBuildOptions
options = {};
1082 options.operation = operation;
1083 if (build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
1086 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
1087 OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1088 use_fast_trace_bvh =
true;
1090 else if (use_fast_trace_bvh) {
1091 VLOG_INFO <<
"Using fast to trace OptiX BVH";
1092 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
1095 VLOG_INFO <<
"Using fast to update OptiX BVH";
1096 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1099 options.motionOptions.numKeys = num_motion_steps;
1100 options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH;
1101 options.motionOptions.timeBegin = 0.0f;
1102 options.motionOptions.timeEnd = 1.0f;
1104 optix_assert(optixAccelComputeMemoryUsage(context, &
options, &build_input, 1, &sizes));
1108 temp_mem.alloc_to_device(
align_up(sizes.tempSizeInBytes, 8) + 8);
1109 if (!temp_mem.device_pointer) {
1116 if (operation == OPTIX_BUILD_OPERATION_BUILD) {
1128 OptixAccelEmitDesc compacted_size_prop = {};
1129 compacted_size_prop.
type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
1132 compacted_size_prop.result =
align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8);
1134 OptixTraversableHandle out_handle = 0;
1135 optix_assert(optixAccelBuild(context,
1140 temp_mem.device_pointer,
1141 sizes.tempSizeInBytes,
1143 sizes.outputSizeInBytes,
1145 use_fast_trace_bvh ? &compacted_size_prop :
nullptr,
1146 use_fast_trace_bvh ? 1 : 0));
1147 bvh->traversable_handle =
static_cast<uint64_t>(out_handle);
1150 cuda_assert(cuStreamSynchronize(
nullptr));
1154 if (use_fast_trace_bvh) {
1155 uint64_t compacted_size = sizes.outputSizeInBytes;
1156 cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result,
sizeof(compacted_size)));
1162 if (compacted_size < sizes.outputSizeInBytes) {
1164 compacted_data.alloc_to_device(compacted_size);
1165 if (!compacted_data.device_pointer) {
1168 return !have_error();
1171 optix_assert(optixAccelCompact(context,
1174 compacted_data.device_pointer,
1177 bvh->traversable_handle =
static_cast<uint64_t>(out_handle);
1180 cuda_assert(cuStreamSynchronize(
nullptr));
1182 std::swap(out_data.
device_size, compacted_data.device_size);
1183 std::swap(out_data.
device_pointer, compacted_data.device_pointer);
1189 return !have_error();
1196 free_bvh_memory_delayed();
1198 BVHOptiX *
const bvh_optix =
static_cast<BVHOptiX *
>(bvh);
1200 progress.set_substatus(
"Building OptiX acceleration structure");
1207 OptixBuildOperation operation = OPTIX_BUILD_OPERATION_BUILD;
1208 if (
refit && !use_fast_trace_bvh) {
1209 assert(bvh_optix->traversable_handle != 0);
1210 operation = OPTIX_BUILD_OPERATION_UPDATE;
1213 bvh_optix->as_data->free();
1214 bvh_optix->traversable_handle = 0;
1221 Hair *
const hair =
static_cast<Hair *const
>(geom);
1228 size_t num_motion_steps = 1;
1230 if (pipeline_options.usesMotionBlur && hair->get_use_motion_blur() && motion_keys) {
1231 num_motion_steps = hair->get_motion_steps();
1238 size_t num_vertices = num_segments * 4;
1241 index_data.alloc(num_segments);
1242 vertex_data.alloc(num_vertices * num_motion_steps);
1245 aabb_data.alloc(num_segments * num_motion_steps);
1249 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1251 const float3 *keys = hair->get_curve_keys().data();
1252 size_t center_step = (num_motion_steps - 1) / 2;
1253 if (
step != center_step) {
1254 size_t attr_offset = (
step > center_step) ?
step - 1 :
step;
1256 keys = motion_keys->
data_float3() + attr_offset * hair->get_curve_keys().size();
1260 for (
size_t curve_index = 0, segment_index = 0, vertex_index =
step * num_vertices;
1265 const array<float> &curve_radius = hair->get_curve_radius();
1267 const int first_key_index = curve.
first_key;
1269 vertex_data[vertex_index++] =
make_float4(keys[first_key_index].
x,
1270 keys[first_key_index].
y,
1271 keys[first_key_index].
z,
1272 curve_radius[first_key_index]);
1277 index_data[segment_index++] = vertex_index - 1;
1279 vertex_data[vertex_index++] =
make_float4(keys[first_key_index + k].
x,
1280 keys[first_key_index + k].
y,
1281 keys[first_key_index + k].
z,
1282 curve_radius[first_key_index + k]);
1285 const int last_key_index = first_key_index + curve.
num_keys - 1;
1287 vertex_data[vertex_index++] =
make_float4(keys[last_key_index].
x,
1288 keys[last_key_index].
y,
1289 keys[last_key_index].
z,
1290 curve_radius[last_key_index]);
1291 vertex_data[vertex_index++] =
make_float4(keys[last_key_index].
x,
1292 keys[last_key_index].
y,
1293 keys[last_key_index].
z,
1294 curve_radius[last_key_index]);
1299 for (
size_t curve_index = 0,
i = 0; curve_index < hair->
num_curves(); ++curve_index) {
1306 const size_t index =
step * num_segments +
i;
1307 aabb_data[index].minX =
bounds.min.x;
1308 aabb_data[index].minY =
bounds.min.y;
1309 aabb_data[index].minZ =
bounds.min.z;
1310 aabb_data[index].maxX =
bounds.max.x;
1311 aabb_data[index].maxY =
bounds.max.y;
1312 aabb_data[index].maxZ =
bounds.max.z;
1319 aabb_data.copy_to_device();
1320 index_data.copy_to_device();
1321 vertex_data.copy_to_device();
1324 aabb_ptrs.reserve(num_motion_steps);
1327 width_ptrs.reserve(num_motion_steps);
1328 vertex_ptrs.reserve(num_motion_steps);
1329 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1330 aabb_ptrs.push_back(aabb_data.device_pointer +
step * num_segments *
sizeof(OptixAabb));
1331 const device_ptr base_ptr = vertex_data.device_pointer +
1333 width_ptrs.push_back(base_ptr + 3 *
sizeof(
float));
1334 vertex_ptrs.push_back(base_ptr);
1338 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1339 OptixBuildInput build_input = {};
1341 build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
1342 build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
1343 build_input.curveArray.numPrimitives = num_segments;
1344 build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1345 build_input.curveArray.numVertices = num_vertices;
1346 build_input.curveArray.vertexStrideInBytes =
sizeof(
float4);
1347 build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
1348 build_input.curveArray.widthStrideInBytes =
sizeof(
float4);
1349 build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
1350 build_input.curveArray.indexStrideInBytes =
sizeof(int);
1351 build_input.curveArray.flag = build_flags;
1357 build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
1359 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1360 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1361 build_input.customPrimitiveArray.numPrimitives = num_segments;
1362 build_input.customPrimitiveArray.strideInBytes =
sizeof(OptixAabb);
1363 build_input.customPrimitiveArray.flags = &build_flags;
1364 build_input.customPrimitiveArray.numSbtRecords = 1;
1368 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1369 progress.set_error(
"Failed to build OptiX acceleration structure");
1374 Mesh *
const mesh =
static_cast<Mesh *const
>(geom);
1379 const size_t num_verts = mesh->get_verts().size();
1381 size_t num_motion_steps = 1;
1383 if (pipeline_options.usesMotionBlur && mesh->get_use_motion_blur() && motion_keys) {
1384 num_motion_steps = mesh->get_motion_steps();
1388 index_data.alloc(mesh->get_triangles().size());
1389 memcpy(index_data.data(),
1390 mesh->get_triangles().data(),
1391 mesh->get_triangles().size() *
sizeof(
int));
1393 vertex_data.alloc(num_verts * num_motion_steps);
1395 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1398 size_t center_step = (num_motion_steps - 1) / 2;
1400 if (
step != center_step) {
1404 memcpy(vertex_data.data() + num_verts *
step,
verts, num_verts *
sizeof(
float3));
1408 index_data.copy_to_device();
1409 vertex_data.copy_to_device();
1412 vertex_ptrs.reserve(num_motion_steps);
1413 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1414 vertex_ptrs.push_back(vertex_data.device_pointer + num_verts *
step *
sizeof(
float3));
1418 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1419 OptixBuildInput build_input = {};
1420 build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
1421 build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1422 build_input.triangleArray.numVertices = num_verts;
1423 build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
1424 build_input.triangleArray.vertexStrideInBytes =
sizeof(
float4);
1425 build_input.triangleArray.indexBuffer = index_data.device_pointer;
1426 build_input.triangleArray.numIndexTriplets = mesh->
num_triangles();
1427 build_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
1428 build_input.triangleArray.indexStrideInBytes = 3 *
sizeof(int);
1429 build_input.triangleArray.flags = &build_flags;
1433 build_input.triangleArray.numSbtRecords = 1;
1434 build_input.triangleArray.primitiveIndexOffset = mesh->
prim_offset;
1436 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1437 progress.set_error(
"Failed to build OptiX acceleration structure");
1443 const size_t num_points = pointcloud->
num_points();
1444 if (num_points == 0) {
1448 size_t num_motion_steps = 1;
1450 if (pipeline_options.usesMotionBlur && pointcloud->get_use_motion_blur() && motion_points) {
1451 num_motion_steps = pointcloud->get_motion_steps();
1455 aabb_data.alloc(num_points * num_motion_steps);
1458 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1460 size_t center_step = (num_motion_steps - 1) / 2;
1462 if (
step == center_step) {
1463 const float3 *points = pointcloud->get_points().data();
1464 const float *radius = pointcloud->get_radius().data();
1466 for (
size_t i = 0;
i < num_points; ++
i) {
1471 const size_t index =
step * num_points +
i;
1472 aabb_data[index].minX =
bounds.min.x;
1473 aabb_data[index].minY =
bounds.min.y;
1474 aabb_data[index].minZ =
bounds.min.z;
1475 aabb_data[index].maxX =
bounds.max.x;
1476 aabb_data[index].maxY =
bounds.max.y;
1477 aabb_data[index].maxZ =
bounds.max.z;
1481 size_t attr_offset = (
step > center_step) ?
step - 1 :
step;
1484 for (
size_t i = 0;
i < num_points; ++
i) {
1489 const size_t index =
step * num_points +
i;
1490 aabb_data[index].minX =
bounds.min.x;
1491 aabb_data[index].minY =
bounds.min.y;
1492 aabb_data[index].minZ =
bounds.min.z;
1493 aabb_data[index].maxX =
bounds.max.x;
1494 aabb_data[index].maxY =
bounds.max.y;
1495 aabb_data[index].maxZ =
bounds.max.z;
1501 aabb_data.copy_to_device();
1504 aabb_ptrs.reserve(num_motion_steps);
1505 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1506 aabb_ptrs.push_back(aabb_data.device_pointer +
step * num_points *
sizeof(OptixAabb));
1512 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
1513 OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1514 OptixBuildInput build_input = {};
1515 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1516 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1517 build_input.customPrimitiveArray.numPrimitives = num_points;
1518 build_input.customPrimitiveArray.strideInBytes =
sizeof(OptixAabb);
1519 build_input.customPrimitiveArray.flags = &build_flags;
1520 build_input.customPrimitiveArray.numSbtRecords = 1;
1521 build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->
prim_offset;
1523 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1524 progress.set_error(
"Failed to build OptiX acceleration structure");
1529 unsigned int num_instances = 0;
1530 unsigned int max_num_instances = 0xFFFFFFFF;
1532 bvh_optix->as_data->free();
1533 bvh_optix->traversable_handle = 0;
1534 bvh_optix->motion_transform_data->free();
1536 optixDeviceContextGetProperty(context,
1537 OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
1539 sizeof(max_num_instances));
1541 max_num_instances >>= 1;
1542 if (bvh->
objects.size() > max_num_instances) {
1544 "Failed to build OptiX acceleration structure because there are too many instances");
1550 instances.alloc(bvh->
objects.size());
1553 size_t motion_transform_offset = 0;
1554 if (pipeline_options.usesMotionBlur) {
1555 size_t total_motion_transform_size = 0;
1558 total_motion_transform_size =
align_up(total_motion_transform_size,
1559 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1560 const size_t motion_keys =
max(ob->get_motion().size(), (
size_t)2) - 2;
1561 total_motion_transform_size = total_motion_transform_size +
1562 sizeof(OptixSRTMotionTransform) +
1563 motion_keys *
sizeof(OptixSRTData);
1567 assert(bvh_optix->motion_transform_data->device ==
this);
1568 bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
1577 BVHOptiX *
const blas =
static_cast<BVHOptiX *
>(ob->get_geometry()->bvh.get());
1578 OptixTraversableHandle handle = blas->traversable_handle;
1583 OptixInstance &instance = instances[num_instances++];
1584 memset(&instance, 0,
sizeof(instance));
1587 instance.transform[0] = 1.0f;
1588 instance.transform[5] = 1.0f;
1589 instance.transform[10] = 1.0f;
1602 if (0 == instance.visibilityMask) {
1603 instance.visibilityMask = 0xFF;
1606 if (ob->get_geometry()->is_hair() &&
1609 if (pipeline_options.usesMotionBlur && ob->get_geometry()->has_motion_blur()) {
1611 instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
1614 else if (ob->get_geometry()->is_pointcloud()) {
1616 instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
1619 instance.visibilityMask |= 4;
1628 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
1632 if (pipeline_options.usesMotionBlur && ob->
use_motion()) {
1633 size_t motion_keys =
max(ob->get_motion().size(), (
size_t)2) - 2;
1634 size_t motion_transform_size =
sizeof(OptixSRTMotionTransform) +
1635 motion_keys *
sizeof(OptixSRTData);
1637 const CUDAContextScope scope(
this);
1639 motion_transform_offset =
align_up(motion_transform_offset,
1640 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1641 CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
1642 motion_transform_offset;
1643 motion_transform_offset += motion_transform_size;
1647 OptixSRTMotionTransform *motion_transform =
reinterpret_cast<OptixSRTMotionTransform *
>(
1648 motion_transform_storage.data());
1649 motion_transform->child = handle;
1650 motion_transform->motionOptions.numKeys = ob->get_motion().size();
1651 motion_transform->motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
1652 motion_transform->motionOptions.timeBegin = 0.0f;
1653 motion_transform->motionOptions.timeEnd = 1.0f;
1655 OptixSRTData *
const srt_data = motion_transform->srtData;
1658 decomp.data(), ob->get_motion().
data(), ob->get_motion().size());
1660 for (
size_t i = 0;
i < ob->get_motion().
size(); ++
i) {
1662 srt_data[
i].sx = decomp[
i].y.w;
1663 srt_data[
i].sy = decomp[
i].z.w;
1664 srt_data[
i].sz = decomp[
i].w.w;
1667 srt_data[
i].a = decomp[
i].z.x;
1668 srt_data[
i].b = decomp[
i].z.y;
1669 srt_data[
i].c = decomp[
i].w.x;
1675 srt_data[
i].pvx = 0.0f;
1676 srt_data[
i].pvy = 0.0f;
1677 srt_data[
i].pvz = 0.0f;
1680 srt_data[
i].qx = decomp[
i].x.x;
1681 srt_data[
i].qy = decomp[
i].x.y;
1682 srt_data[
i].qz = decomp[
i].x.z;
1683 srt_data[
i].qw = decomp[
i].x.w;
1686 srt_data[
i].tx = decomp[
i].y.x;
1687 srt_data[
i].ty = decomp[
i].y.y;
1688 srt_data[
i].tz = decomp[
i].y.z;
1692 cuMemcpyHtoD(motion_transform_gpu, motion_transform, motion_transform_size);
1693 motion_transform =
nullptr;
1694 motion_transform_storage.clear();
1697 optixConvertPointerToTraversableHandle(context,
1698 motion_transform_gpu,
1699 OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
1700 &instance.traversableHandle);
1703 instance.traversableHandle = handle;
1705 if (ob->get_geometry()->is_instanced()) {
1707 memcpy(instance.transform, &ob->get_tfm(),
sizeof(instance.transform));
1713 instances.resize(num_instances);
1714 instances.copy_to_device();
1717 OptixBuildInput build_input = {};
1718 build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
1719 build_input.instanceArray.instances = instances.device_pointer;
1720 build_input.instanceArray.numInstances = num_instances;
1722 if (!build_optix_bvh(bvh_optix, OPTIX_BUILD_OPERATION_BUILD, build_input, 0)) {
1723 progress.set_error(
"Failed to build OptiX acceleration structure");
1725 tlas_handle = bvh_optix->traversable_handle;
1729void OptiXDevice::release_bvh(
BVH *bvh)
1734 BVHOptiX *
const bvh_optix =
static_cast<BVHOptiX *
>(bvh);
1736 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
1737 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
1738 bvh_optix->traversable_handle = 0;
1741void OptiXDevice::free_bvh_memory_delayed()
1744 delayed_free_bvh_memory.free_memory();
1747void OptiXDevice::const_copy_to(
const char *name,
void *host,
const size_t size)
1750 CUDADevice::const_copy_to(name, host,
size);
1752 if (strcmp(name,
"data") == 0) {
1756 KernelData *
const data = (KernelData *)host;
1757 *(OptixTraversableHandle *)&
data->device_bvh = tlas_handle;
1764# define KERNEL_DATA_ARRAY(data_type, data_name) \
1765 if (strcmp(name, #data_name) == 0) { \
1766 update_launch_params(offsetof(KernelParamsOptiX, data_name), host, size); \
1770# include "kernel/data_arrays.h"
1771# undef KERNEL_DATA_ARRAY
1774void OptiXDevice::update_launch_params(
const size_t offset,
void *
data,
const size_t data_size)
1776 const CUDAContextScope scope(
this);
1778 cuda_assert(cuMemcpyHtoD(launch_params.device_pointer + offset,
data, data_size));
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Attribute * find(ustring name) const
vector< Geometry * > geometry
vector< Object * > objects
bool is_pointcloud() const
Curve get_curve(const size_t i) const
size_t curve_segment_offset
size_t num_curves() const
size_t num_segments() const
CurveShapeType curve_shape
device_ptr device_pointer
void alloc_to_device(const size_t num, bool shrink_to_fit=true)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
DebugFlags & DebugFlags()
#define KERNEL_FEATURE_OBJECT_MOTION
#define KERNEL_FEATURE_OSL_SHADING
#define KERNEL_FEATURE_SUBSURFACE
#define KERNEL_FEATURE_HAIR_THICK
#define KERNEL_FEATURE_PATH_TRACING
#define KERNEL_FEATURE_OSL_CAMERA
#define KERNEL_FEATURE_HAIR
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_BAKING
#define KERNEL_FEATURE_MNEE
#define KERNEL_FEATURE_POINTCLOUD
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
#define VLOG_IS_ON(severity)
Segment< FEdge *, Vec3r > segment
size_t path_file_size(const string &path)
bool path_is_directory(const string &path)
string path_get(const string &sub)
string path_join(const string &dir, const string &file)
bool path_read_compressed_text(const string &path, string &text)
static struct PyModuleDef module
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox &bounds) const
size_t num_triangles() const
int get_device_index() const
bool is_traceable() const
uint visibility_for_tracing() const
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
Point get_point(const int i) const
size_t num_points() const
void push(TaskRunFunction &&task)
void wait_work(Summary *stats=nullptr)
std::unique_lock< std::mutex > thread_scoped_lock
ccl_device_inline size_t align_up(const size_t offset, const size_t alignment)