29# define __KERNEL_OPTIX__
34# if OPTIX_ABI_VERSION >= 55
35static void execute_optix_task(
TaskPool &pool, OptixTask task, OptixResult &failure_reason)
37 OptixTask additional_tasks[16];
38 unsigned int num_additional_tasks = 0;
40 const OptixResult
result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks);
41 if (
result == OPTIX_SUCCESS) {
42 for (
unsigned int i = 0; i < num_additional_tasks; ++i) {
44 &execute_optix_task, std::ref(pool), additional_tasks[i], std::ref(failure_reason)));
54 : CUDADevice(info, stats, profiler, headless),
56 launch_params(this,
"kernel_params",
false)
63 const CUDAContextScope scope(
this);
66 OptixDeviceContextOptions
options = {};
67# ifdef WITH_CYCLES_LOGGING
69 options.logCallbackFunction = [](
unsigned int level,
const char *,
const char *message,
void *) {
88 options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
90 optix_assert(optixDeviceContextCreate(cuContext, &
options, &context));
91# ifdef WITH_CYCLES_LOGGING
92 optix_assert(optixDeviceContextSetLogCallback(
100 launch_params.alloc_to_device(1);
103OptiXDevice::~OptiXDevice()
106 const CUDAContextScope scope(
this);
108 free_bvh_memory_delayed();
112 launch_params.free();
115 if (optix_module !=
NULL) {
116 optixModuleDestroy(optix_module);
118 for (
int i = 0; i < 2; ++i) {
119 if (builtin_modules[i] !=
NULL) {
120 optixModuleDestroy(builtin_modules[i]);
123 for (
int i = 0; i < NUM_PIPELINES; ++i) {
124 if (pipelines[i] !=
NULL) {
125 optixPipelineDestroy(pipelines[i]);
128 for (
int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
129 if (groups[i] !=
NULL) {
130 optixProgramGroupDestroy(groups[i]);
135 for (
const OptixModule &
module : osl_modules) {
137 optixModuleDestroy(
module);
140 for (
const OptixProgramGroup &group : osl_groups) {
142 optixProgramGroupDestroy(group);
147 optixDeviceContextDestroy(context);
152 return make_unique<OptiXDeviceQueue>(
this);
161static string get_optix_include_dir()
163 const char *env_dir = getenv(
"OPTIX_ROOT_DIR");
164 const char *default_dir = CYCLES_RUNTIME_OPTIX_ROOT_DIR;
166 if (env_dir && env_dir[0]) {
167 const string env_include_dir =
path_join(env_dir,
"include");
168 return env_include_dir;
170 else if (default_dir[0]) {
171 const string default_include_dir =
path_join(default_dir,
"include");
172 return default_include_dir;
178string OptiXDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
180 string common_cflags = CUDADevice::compile_kernel_get_common_cflags(kernel_features);
183 common_cflags +=
string_printf(
" -I\"%s\"", get_optix_include_dir().c_str());
187 common_cflags +=
" --keep-device-functions";
190 return common_cflags;
193bool OptiXDevice::load_kernels(
const uint kernel_features)
203 const bool use_osl =
false;
207 const bool need_optix_kernels = (kernel_features &
213 string suffix = use_osl ?
"_osl" :
218 if (need_optix_kernels) {
219 ptx_filename =
path_get(
"lib/kernel_optix" + suffix +
".ptx.zst");
220 if (use_adaptive_compilation() ||
path_file_size(ptx_filename) == -1) {
221 std::string optix_include_dir = get_optix_include_dir();
222 if (optix_include_dir.empty()) {
224 "Unable to compile OptiX kernels at runtime. Set OPTIX_ROOT_DIR environment variable "
225 "to a directory containing the OptiX SDK.");
230 "OptiX headers not found at %s, unable to compile OptiX kernels at runtime. Install "
231 "OptiX SDK in the specified location, or set OPTIX_ROOT_DIR environment variable to a "
232 "directory containing the OptiX SDK.",
233 optix_include_dir.c_str()));
240 if (!CUDADevice::load_kernels(kernel_features)) {
244 if (!need_optix_kernels) {
248 const CUDAContextScope scope(
this);
251 if (optix_module !=
NULL) {
252 optixModuleDestroy(optix_module);
255 for (
int i = 0; i < 2; ++i) {
256 if (builtin_modules[i] !=
NULL) {
257 optixModuleDestroy(builtin_modules[i]);
258 builtin_modules[i] =
NULL;
261 for (
int i = 0; i < NUM_PIPELINES; ++i) {
262 if (pipelines[i] !=
NULL) {
263 optixPipelineDestroy(pipelines[i]);
267 for (
int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
268 if (groups[i] !=
NULL) {
269 optixProgramGroupDestroy(groups[i]);
276 for (
const OptixModule &
module : osl_modules) {
278 optixModuleDestroy(
module);
283 for (
const OptixProgramGroup &group : osl_groups) {
285 optixProgramGroupDestroy(group);
291 OptixModuleCompileOptions module_options = {};
292 module_options.maxRegisterCount = 0;
295 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
296 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
299 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
300 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
303 module_options.boundValues =
nullptr;
304 module_options.numBoundValues = 0;
305# if OPTIX_ABI_VERSION >= 55
306 module_options.payloadTypes =
nullptr;
307 module_options.numPayloadTypes = 0;
311 pipeline_options.usesMotionBlur =
false;
312 pipeline_options.traversableGraphFlags =
313 OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
314 pipeline_options.numPayloadValues = 8;
315 pipeline_options.numAttributeValues = 2;
316 pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
317 pipeline_options.pipelineLaunchParamsVariableName =
"kernel_params";
319 pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
322# if OPTIX_ABI_VERSION >= 55
323 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
325 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
329 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
332 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
339 pipeline_options.usesMotionBlur =
true;
342 pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
347 if (use_adaptive_compilation() ||
path_file_size(ptx_filename) == -1) {
348 string cflags = compile_kernel_get_common_cflags(kernel_features);
349 ptx_filename = compile_kernel(cflags, (
"kernel" + suffix).c_str(),
"optix",
true);
352 set_error(
string_printf(
"Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
356# if OPTIX_ABI_VERSION >= 84
357 OptixTask
task =
nullptr;
358 OptixResult
result = optixModuleCreateWithTasks(context,
367 if (
result == OPTIX_SUCCESS) {
369 execute_optix_task(pool, task,
result);
372# elif OPTIX_ABI_VERSION >= 55
373 OptixTask
task =
nullptr;
374 OptixResult
result = optixModuleCreateFromPTXWithTasks(context,
383 if (
result == OPTIX_SUCCESS) {
385 execute_optix_task(pool, task,
result);
389 const OptixResult
result = optixModuleCreateFromPTX(context,
398 if (
result != OPTIX_SUCCESS) {
399 set_error(
string_printf(
"Failed to load OptiX kernel from '%s' (%s)",
400 ptx_filename.c_str(),
401 optixGetErrorName(
result)));
407 OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
408 OptixProgramGroupOptions group_options = {};
409 group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
410 group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.module = optix_module;
411 group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.entryFunctionName =
412 "__raygen__kernel_optix_integrator_intersect_closest";
413 group_descs[PG_RGEN_INTERSECT_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
414 group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.module = optix_module;
415 group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.entryFunctionName =
416 "__raygen__kernel_optix_integrator_intersect_shadow";
417 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
418 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.module = optix_module;
419 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.entryFunctionName =
420 "__raygen__kernel_optix_integrator_intersect_subsurface";
421 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
422 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.module = optix_module;
423 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.entryFunctionName =
424 "__raygen__kernel_optix_integrator_intersect_volume_stack";
425 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
426 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].raygen.module = optix_module;
427 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].raygen.entryFunctionName =
428 "__raygen__kernel_optix_integrator_intersect_dedicated_light";
429 group_descs[PG_MISS].kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
430 group_descs[PG_MISS].miss.module = optix_module;
431 group_descs[PG_MISS].miss.entryFunctionName =
"__miss__kernel_optix_miss";
432 group_descs[PG_HITD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
433 group_descs[PG_HITD].hitgroup.moduleCH = optix_module;
434 group_descs[PG_HITD].hitgroup.entryFunctionNameCH =
"__closesthit__kernel_optix_hit";
435 group_descs[PG_HITD].hitgroup.moduleAH = optix_module;
436 group_descs[PG_HITD].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_visibility_test";
437 group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
438 group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
439 group_descs[PG_HITS].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_shadow_all_hit";
440 group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
441 group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
442 group_descs[PG_HITV].hitgroup.entryFunctionNameCH =
"__closesthit__kernel_optix_hit";
443 group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
444 group_descs[PG_HITV].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_volume_test";
449 OptixBuiltinISOptions builtin_options = {};
450# if OPTIX_ABI_VERSION >= 55
451 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
452 builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
453 OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
454 OPTIX_BUILD_FLAG_ALLOW_UPDATE;
455 builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT;
457 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
459 builtin_options.usesMotionBlur =
false;
461 optix_assert(optixBuiltinISModuleGet(
462 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
464 group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
465 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
nullptr;
466 group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
467 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
nullptr;
469 if (pipeline_options.usesMotionBlur) {
470 builtin_options.usesMotionBlur =
true;
472 optix_assert(optixBuiltinISModuleGet(
473 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
475 group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
476 group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
477 group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
478 group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
483 group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
484 group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
485 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
"__intersection__curve_ribbon";
486 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
"__intersection__curve_ribbon";
491 group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
492 group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
493 group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
494 group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS =
"__intersection__point";
495 group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
496 group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
497 group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
498 group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS =
"__intersection__point";
503 group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
504 group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
505 group_descs[PG_HITL].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_local_hit";
510 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
511 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
512 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName =
513 "__raygen__kernel_optix_integrator_shade_surface_raytrace";
517 group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
518 group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
519 group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC =
"__direct_callable__svm_node_ao";
520 group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
521 group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
522 group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
523 "__direct_callable__svm_node_bevel";
528 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
529 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
530 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
531 "__raygen__kernel_optix_integrator_shade_surface_mnee";
536 group_descs[PG_RGEN_SHADE_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
537 group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.module = optix_module;
538 group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.entryFunctionName =
539 "__raygen__kernel_optix_integrator_shade_background";
540 group_descs[PG_RGEN_SHADE_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
541 group_descs[PG_RGEN_SHADE_LIGHT].raygen.module = optix_module;
542 group_descs[PG_RGEN_SHADE_LIGHT].raygen.entryFunctionName =
543 "__raygen__kernel_optix_integrator_shade_light";
544 group_descs[PG_RGEN_SHADE_SURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
545 group_descs[PG_RGEN_SHADE_SURFACE].raygen.module = optix_module;
546 group_descs[PG_RGEN_SHADE_SURFACE].raygen.entryFunctionName =
547 "__raygen__kernel_optix_integrator_shade_surface";
548 group_descs[PG_RGEN_SHADE_VOLUME].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
549 group_descs[PG_RGEN_SHADE_VOLUME].raygen.module = optix_module;
550 group_descs[PG_RGEN_SHADE_VOLUME].raygen.entryFunctionName =
551 "__raygen__kernel_optix_integrator_shade_volume";
552 group_descs[PG_RGEN_SHADE_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
553 group_descs[PG_RGEN_SHADE_SHADOW].raygen.module = optix_module;
554 group_descs[PG_RGEN_SHADE_SHADOW].raygen.entryFunctionName =
555 "__raygen__kernel_optix_integrator_shade_shadow";
556 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
557 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].raygen.module = optix_module;
558 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].raygen.entryFunctionName =
559 "__raygen__kernel_optix_integrator_shade_dedicated_light";
560 group_descs[PG_RGEN_EVAL_DISPLACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
561 group_descs[PG_RGEN_EVAL_DISPLACE].raygen.module = optix_module;
562 group_descs[PG_RGEN_EVAL_DISPLACE].raygen.entryFunctionName =
563 "__raygen__kernel_optix_shader_eval_displace";
564 group_descs[PG_RGEN_EVAL_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
565 group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.module = optix_module;
566 group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.entryFunctionName =
567 "__raygen__kernel_optix_shader_eval_background";
568 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
569 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module;
570 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName =
571 "__raygen__kernel_optix_shader_eval_curve_shadow_transparency";
574 optix_assert(optixProgramGroupCreate(
575 context, group_descs, NUM_PROGRAM_GROUPS, &group_options,
nullptr, 0, groups));
578 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
580 sbt_data.alloc(NUM_PROGRAM_GROUPS);
581 memset(sbt_data.host_pointer, 0,
sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
582 for (
int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
583 optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
584# if OPTIX_ABI_VERSION >= 84
585 optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i],
nullptr));
587 optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
590 sbt_data.copy_to_device();
593 unsigned int trace_css = stack_size[PG_HITD].cssCH;
595 trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
596 trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
597 trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
598 trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
599 trace_css = std::max(trace_css,
600 stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
601 trace_css = std::max(trace_css,
602 stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
603 trace_css = std::max(
604 trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
605 trace_css = std::max(
606 trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
608 OptixPipelineLinkOptions link_options = {};
609 link_options.maxTraceDepth = 1;
610# if OPTIX_ABI_VERSION < 84
611 link_options.debugLevel = module_options.debugLevel;
621 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
623 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
624 pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
625 pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
628 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
630 pipeline_groups.push_back(groups[PG_MISS]);
631 pipeline_groups.push_back(groups[PG_HITD]);
632 pipeline_groups.push_back(groups[PG_HITS]);
633 pipeline_groups.push_back(groups[PG_HITL]);
634 pipeline_groups.push_back(groups[PG_HITV]);
635 if (pipeline_options.usesMotionBlur) {
636 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
637 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
640 pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
641 pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
644 optix_assert(optixPipelineCreate(context,
647 pipeline_groups.data(),
648 pipeline_groups.size(),
651 &pipelines[PIP_SHADE]));
654 const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
655 stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG) +
656 link_options.maxTraceDepth * trace_css;
657 const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
658 stack_size[PG_CALL_SVM_BEVEL].dssDC);
661 optix_assert(optixPipelineSetStackSize(
662 pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
667 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
668 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]);
669 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]);
670 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]);
671 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]);
672 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_DEDICATED_LIGHT]);
673 pipeline_groups.push_back(groups[PG_MISS]);
674 pipeline_groups.push_back(groups[PG_HITD]);
675 pipeline_groups.push_back(groups[PG_HITS]);
676 pipeline_groups.push_back(groups[PG_HITL]);
677 pipeline_groups.push_back(groups[PG_HITV]);
678 if (pipeline_options.usesMotionBlur) {
679 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
680 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
683 pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
684 pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
687 optix_assert(optixPipelineCreate(context,
690 pipeline_groups.data(),
691 pipeline_groups.size(),
694 &pipelines[PIP_INTERSECT]));
697 const unsigned int css =
698 std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG,
699 std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG,
700 std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG,
701 stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) +
702 link_options.maxTraceDepth * trace_css;
704 optix_assert(optixPipelineSetStackSize(
705 pipelines[PIP_INTERSECT], 0, 0, css, pipeline_options.usesMotionBlur ? 3 : 2));
708 return !have_error();
711bool OptiXDevice::load_osl_kernels()
732 osl_globals.surface_state :
734 osl_globals.volume_state :
736 osl_globals.displacement_state :
737 osl_globals.bump_state);
738 for (
const OSL::ShaderGroupRef &group : groups) {
740 string osl_ptx, init_name, entry_name;
741 osl_globals.ss->getattribute(group.get(),
"group_init_name", init_name);
742 osl_globals.ss->getattribute(group.get(),
"group_entry_name", entry_name);
743 osl_globals.ss->getattribute(
744 group.get(),
"ptx_compiled_version", OSL::TypeDesc::PTR, &osl_ptx);
746 int groupdata_size = 0;
747 osl_globals.ss->getattribute(group.get(),
"llvm_groupdata_size", groupdata_size);
748 if (groupdata_size == 0) {
750 osl_globals.ss->getattribute(group.get(),
"groupdata_size", groupdata_size);
752 if (groupdata_size > 2048) {
754 string_printf(
"Requested OSL group data size (%d) is greater than the maximum "
755 "supported with OptiX (2048)",
760 osl_kernels.push_back({std::move(osl_ptx), std::move(init_name), std::move(entry_name)});
764 osl_kernels.emplace_back();
769 const CUDAContextScope scope(
this);
771 if (pipelines[PIP_SHADE]) {
772 optixPipelineDestroy(pipelines[PIP_SHADE]);
775 for (OptixModule &
module : osl_modules) {
777 optixModuleDestroy(
module);
781 for (OptixProgramGroup &group : osl_groups) {
783 optixProgramGroupDestroy(group);
788 if (osl_kernels.empty()) {
793 OptixProgramGroupOptions group_options = {};
794 OptixModuleCompileOptions module_options = {};
795 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
796 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
798 osl_groups.resize(osl_kernels.size() * 2 + 1);
799 osl_modules.resize(osl_kernels.size() + 1);
802 string ptx_data, ptx_filename =
path_get(
"lib/kernel_optix_osl_services.ptx.zst");
804 set_error(
string_printf(
"Failed to load OptiX OSL services kernel from '%s'",
805 ptx_filename.c_str()));
809# if OPTIX_ABI_VERSION >= 84
810 const OptixResult
result = optixModuleCreate(context,
817 &osl_modules.back());
819 const OptixResult
result = optixModuleCreateFromPTX(context,
826 &osl_modules.back());
828 if (
result != OPTIX_SUCCESS) {
829 set_error(
string_printf(
"Failed to load OptiX OSL services kernel from '%s' (%s)",
830 ptx_filename.c_str(),
831 optixGetErrorName(
result)));
835 OptixProgramGroupDesc group_desc = {};
836 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
837 group_desc.callables.entryFunctionNameDC =
"__direct_callable__dummy_services";
838 group_desc.callables.moduleDC = osl_modules.back();
840 optix_assert(optixProgramGroupCreate(
841 context, &group_desc, 1, &group_options,
nullptr, 0, &osl_groups.back()));
847 for (
size_t i = 0; i < osl_kernels.size(); ++i) {
848 if (osl_kernels[i].ptx.empty()) {
852# if OPTIX_ABI_VERSION >= 84
853 OptixTask
task =
nullptr;
854 results[i] = optixModuleCreateWithTasks(context,
857 osl_kernels[i].ptx.data(),
858 osl_kernels[i].ptx.size(),
863 if (results[i] == OPTIX_SUCCESS) {
864 execute_optix_task(pool, task, results[i]);
866# elif OPTIX_ABI_VERSION >= 55
867 OptixTask
task =
nullptr;
868 results[i] = optixModuleCreateFromPTXWithTasks(context,
871 osl_kernels[i].ptx.data(),
872 osl_kernels[i].ptx.size(),
877 if (results[i] == OPTIX_SUCCESS) {
878 execute_optix_task(pool, task, results[i]);
881 pool.
push([
this, &results, i, &module_options, &osl_kernels]() {
882 results[i] = optixModuleCreateFromPTX(context,
885 osl_kernels[i].ptx.data(),
886 osl_kernels[i].ptx.size(),
896 for (
size_t i = 0; i < osl_kernels.size(); ++i) {
897 if (osl_kernels[i].ptx.empty()) {
901 if (results[i] != OPTIX_SUCCESS) {
902 set_error(
string_printf(
"Failed to load OptiX OSL kernel for %s (%s)",
903 osl_kernels[i].init_entry.c_str(),
904 optixGetErrorName(results[i])));
908 OptixProgramGroupDesc group_descs[2] = {};
909 group_descs[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
910 group_descs[0].callables.entryFunctionNameDC = osl_kernels[i].init_entry.c_str();
911 group_descs[0].callables.moduleDC = osl_modules[i];
912 group_descs[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
913 group_descs[1].callables.entryFunctionNameDC = osl_kernels[i].exec_entry.c_str();
914 group_descs[1].callables.moduleDC = osl_modules[i];
916 optix_assert(optixProgramGroupCreate(
917 context, group_descs, 2, &group_options,
nullptr, 0, &osl_groups[i * 2]));
921 sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
922 for (
int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
923 optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
925 for (
size_t i = 0; i < osl_groups.size(); ++i) {
926 if (osl_groups[i] !=
NULL) {
927 optix_assert(optixSbtRecordPackHeader(osl_groups[i], &sbt_data[NUM_PROGRAM_GROUPS + i]));
932 optix_assert(optixSbtRecordPackHeader(osl_groups.back(), &sbt_data[NUM_PROGRAM_GROUPS + i]));
935 sbt_data.copy_to_device();
937 OptixPipelineLinkOptions link_options = {};
938 link_options.maxTraceDepth = 0;
939# if OPTIX_ABI_VERSION < 84
940 link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
945 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
946 pipeline_groups.push_back(groups[PG_RGEN_SHADE_BACKGROUND]);
947 pipeline_groups.push_back(groups[PG_RGEN_SHADE_LIGHT]);
948 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE]);
949 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
950 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
951 pipeline_groups.push_back(groups[PG_RGEN_SHADE_VOLUME]);
952 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SHADOW]);
953 pipeline_groups.push_back(groups[PG_RGEN_SHADE_DEDICATED_LIGHT]);
954 pipeline_groups.push_back(groups[PG_RGEN_EVAL_DISPLACE]);
955 pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]);
956 pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]);
958 for (
const OptixProgramGroup &group : osl_groups) {
960 pipeline_groups.push_back(group);
964 optix_assert(optixPipelineCreate(context,
967 pipeline_groups.data(),
968 pipeline_groups.size(),
971 &pipelines[PIP_SHADE]));
974 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
977 for (
int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
978# if OPTIX_ABI_VERSION >= 84
979 optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i],
nullptr));
981 optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
984 for (
size_t i = 0; i < osl_groups.size(); ++i) {
985 if (osl_groups[i] !=
NULL) {
986# if OPTIX_ABI_VERSION >= 84
987 optix_assert(optixProgramGroupGetStackSize(
988 osl_groups[i], &osl_stack_size[i], pipelines[PIP_SHADE]));
990 optix_assert(optixProgramGroupGetStackSize(osl_groups[i], &osl_stack_size[i]));
995 const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
996 stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
997 unsigned int dss = 0;
998 for (
unsigned int i = 0; i < osl_stack_size.size(); ++i) {
999 dss = std::max(dss, osl_stack_size[i].dssDC);
1002 optix_assert(optixPipelineSetStackSize(
1003 pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
1006 return !have_error();
1012void *OptiXDevice::get_cpu_osl_memory()
1015 return &osl_globals;
1021bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
1022 OptixBuildOperation operation,
1023 const OptixBuildInput &build_input,
1033 const CUDAContextScope scope(
this);
1038 OptixAccelBufferSizes sizes = {};
1039 OptixAccelBuildOptions
options = {};
1040 options.operation = operation;
1041 if (build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
1044 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
1045 OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1046 use_fast_trace_bvh =
true;
1048 else if (use_fast_trace_bvh) {
1049 VLOG_INFO <<
"Using fast to trace OptiX BVH";
1050 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
1053 VLOG_INFO <<
"Using fast to update OptiX BVH";
1054 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1057 options.motionOptions.numKeys = num_motion_steps;
1058 options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH;
1059 options.motionOptions.timeBegin = 0.0f;
1060 options.motionOptions.timeEnd = 1.0f;
1062 optix_assert(optixAccelComputeMemoryUsage(context, &
options, &build_input, 1, &sizes));
1066 temp_mem.alloc_to_device(
align_up(sizes.tempSizeInBytes, 8) + 8);
1067 if (!temp_mem.device_pointer) {
1074 if (operation == OPTIX_BUILD_OPERATION_BUILD) {
1075 assert(out_data.
device ==
this);
1086 OptixAccelEmitDesc compacted_size_prop = {};
1087 compacted_size_prop.
type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
1090 compacted_size_prop.result =
align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8);
1092 OptixTraversableHandle out_handle = 0;
1093 optix_assert(optixAccelBuild(context,
1098 temp_mem.device_pointer,
1099 sizes.tempSizeInBytes,
1101 sizes.outputSizeInBytes,
1103 use_fast_trace_bvh ? &compacted_size_prop :
NULL,
1104 use_fast_trace_bvh ? 1 : 0));
1105 bvh->traversable_handle =
static_cast<uint64_t>(out_handle);
1108 cuda_assert(cuStreamSynchronize(
NULL));
1112 if (use_fast_trace_bvh) {
1113 uint64_t compacted_size = sizes.outputSizeInBytes;
1114 cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result,
sizeof(compacted_size)));
1120 if (compacted_size < sizes.outputSizeInBytes) {
1122 compacted_data.alloc_to_device(compacted_size);
1123 if (!compacted_data.device_pointer) {
1126 return !have_error();
1129 optix_assert(optixAccelCompact(
1130 context,
NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
1131 bvh->traversable_handle =
static_cast<uint64_t>(out_handle);
1134 cuda_assert(cuStreamSynchronize(
NULL));
1136 std::swap(out_data.
device_size, compacted_data.device_size);
1137 std::swap(out_data.
device_pointer, compacted_data.device_pointer);
1143 return !have_error();
1150 free_bvh_memory_delayed();
1152 BVHOptiX *
const bvh_optix =
static_cast<BVHOptiX *
>(bvh);
1154 progress.
set_substatus(
"Building OptiX acceleration structure");
1161 OptixBuildOperation operation = OPTIX_BUILD_OPERATION_BUILD;
1162 if (
refit && !use_fast_trace_bvh) {
1163 assert(bvh_optix->traversable_handle != 0);
1164 operation = OPTIX_BUILD_OPERATION_UPDATE;
1167 bvh_optix->as_data->free();
1168 bvh_optix->traversable_handle = 0;
1175 Hair *
const hair =
static_cast<Hair *const
>(geom);
1182 size_t num_motion_steps = 1;
1184 if (pipeline_options.usesMotionBlur && hair->get_use_motion_blur() && motion_keys) {
1185 num_motion_steps = hair->get_motion_steps();
1192 size_t num_vertices = num_segments * 4;
1194# if OPTIX_ABI_VERSION >= 55
1197 index_data.alloc(num_segments);
1198 vertex_data.alloc(num_vertices * num_motion_steps);
1201 aabb_data.alloc(num_segments * num_motion_steps);
1205 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1207 const float3 *keys = hair->get_curve_keys().data();
1208 size_t center_step = (num_motion_steps - 1) / 2;
1209 if (step != center_step) {
1210 size_t attr_offset = (
step > center_step) ? step - 1 :
step;
1212 keys = motion_keys->
data_float3() + attr_offset * hair->get_curve_keys().size();
1215# if OPTIX_ABI_VERSION >= 55
1217 for (
size_t curve_index = 0, segment_index = 0, vertex_index = step * num_vertices;
1222 const array<float> &curve_radius = hair->get_curve_radius();
1224 const int first_key_index = curve.
first_key;
1226 vertex_data[vertex_index++] =
make_float4(keys[first_key_index].
x,
1227 keys[first_key_index].
y,
1228 keys[first_key_index].
z,
1229 curve_radius[first_key_index]);
1234 index_data[segment_index++] = vertex_index - 1;
1236 vertex_data[vertex_index++] =
make_float4(keys[first_key_index + k].
x,
1237 keys[first_key_index + k].
y,
1238 keys[first_key_index + k].
z,
1239 curve_radius[first_key_index + k]);
1242 const int last_key_index = first_key_index + curve.
num_keys - 1;
1244 vertex_data[vertex_index++] =
make_float4(keys[last_key_index].
x,
1245 keys[last_key_index].
y,
1246 keys[last_key_index].
z,
1247 curve_radius[last_key_index]);
1248 vertex_data[vertex_index++] =
make_float4(keys[last_key_index].
x,
1249 keys[last_key_index].
y,
1250 keys[last_key_index].
z,
1251 curve_radius[last_key_index]);
1258 for (
size_t curve_index = 0, i = 0; curve_index < hair->
num_curves(); ++curve_index) {
1262# if OPTIX_ABI_VERSION < 55
1264 const array<float> &curve_radius = hair->get_curve_radius();
1271 index_data[i] = i * 4;
1272 float4 *
const v = vertex_data.data() +
step * num_vertices + index_data[i];
1278 curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
1287 dot(cr2bsp0, px),
dot(cr2bsp0, py),
dot(cr2bsp0, pz),
dot(cr2bsp0, pw));
1289 dot(cr2bsp1, px),
dot(cr2bsp1, py),
dot(cr2bsp1, pz),
dot(cr2bsp1, pw));
1291 dot(cr2bsp2, px),
dot(cr2bsp2, py),
dot(cr2bsp2, pz),
dot(cr2bsp2, pw));
1293 dot(cr2bsp3, px),
dot(cr2bsp3, py),
dot(cr2bsp3, pz),
dot(cr2bsp3, pw));
1301 const size_t index =
step * num_segments + i;
1315 aabb_data.copy_to_device();
1316 index_data.copy_to_device();
1317 vertex_data.copy_to_device();
1320 aabb_ptrs.reserve(num_motion_steps);
1323 width_ptrs.reserve(num_motion_steps);
1324 vertex_ptrs.reserve(num_motion_steps);
1325 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1326 aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments *
sizeof(OptixAabb));
1327 const device_ptr base_ptr = vertex_data.device_pointer +
1329 width_ptrs.push_back(base_ptr + 3 *
sizeof(
float));
1330 vertex_ptrs.push_back(base_ptr);
1334 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1335 OptixBuildInput build_input = {};
1337 build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
1338# if OPTIX_ABI_VERSION >= 55
1339 build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
1341 build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
1343 build_input.curveArray.numPrimitives = num_segments;
1344 build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1345 build_input.curveArray.numVertices = num_vertices;
1346 build_input.curveArray.vertexStrideInBytes =
sizeof(
float4);
1347 build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
1348 build_input.curveArray.widthStrideInBytes =
sizeof(
float4);
1349 build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
1350 build_input.curveArray.indexStrideInBytes =
sizeof(
int);
1351 build_input.curveArray.flag = build_flags;
1357 build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
1359 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1360 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1361 build_input.customPrimitiveArray.numPrimitives = num_segments;
1362 build_input.customPrimitiveArray.strideInBytes =
sizeof(OptixAabb);
1363 build_input.customPrimitiveArray.flags = &build_flags;
1364 build_input.customPrimitiveArray.numSbtRecords = 1;
1368 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1369 progress.
set_error(
"Failed to build OptiX acceleration structure");
1374 Mesh *
const mesh =
static_cast<Mesh *const
>(geom);
1379 const size_t num_verts = mesh->get_verts().size();
1381 size_t num_motion_steps = 1;
1383 if (pipeline_options.usesMotionBlur && mesh->get_use_motion_blur() && motion_keys) {
1384 num_motion_steps = mesh->get_motion_steps();
1388 index_data.alloc(mesh->get_triangles().size());
1389 memcpy(index_data.data(),
1390 mesh->get_triangles().data(),
1391 mesh->get_triangles().size() *
sizeof(
int));
1393 vertex_data.alloc(num_verts * num_motion_steps);
1395 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1398 size_t center_step = (num_motion_steps - 1) / 2;
1400 if (step != center_step) {
1404 memcpy(vertex_data.data() + num_verts * step,
verts, num_verts *
sizeof(
float3));
1408 index_data.copy_to_device();
1409 vertex_data.copy_to_device();
1412 vertex_ptrs.reserve(num_motion_steps);
1413 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1414 vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step *
sizeof(
float3));
1418 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1419 OptixBuildInput build_input = {};
1420 build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
1421 build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1422 build_input.triangleArray.numVertices = num_verts;
1423 build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
1424 build_input.triangleArray.vertexStrideInBytes =
sizeof(
float4);
1425 build_input.triangleArray.indexBuffer = index_data.device_pointer;
1426 build_input.triangleArray.numIndexTriplets = mesh->
num_triangles();
1427 build_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
1428 build_input.triangleArray.indexStrideInBytes = 3 *
sizeof(
int);
1429 build_input.triangleArray.flags = &build_flags;
1433 build_input.triangleArray.numSbtRecords = 1;
1434 build_input.triangleArray.primitiveIndexOffset = mesh->
prim_offset;
1436 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1437 progress.
set_error(
"Failed to build OptiX acceleration structure");
1443 const size_t num_points = pointcloud->
num_points();
1444 if (num_points == 0) {
1448 size_t num_motion_steps = 1;
1450 if (pipeline_options.usesMotionBlur && pointcloud->get_use_motion_blur() && motion_points) {
1451 num_motion_steps = pointcloud->get_motion_steps();
1455 aabb_data.alloc(num_points * num_motion_steps);
1458 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1460 size_t center_step = (num_motion_steps - 1) / 2;
1462 if (step == center_step) {
1463 const float3 *points = pointcloud->get_points().data();
1464 const float *radius = pointcloud->get_radius().data();
1466 for (
size_t i = 0; i < num_points; ++i) {
1471 const size_t index =
step * num_points + i;
1481 size_t attr_offset = (
step > center_step) ? step - 1 :
step;
1484 for (
size_t i = 0; i < num_points; ++i) {
1489 const size_t index =
step * num_points + i;
1501 aabb_data.copy_to_device();
1504 aabb_ptrs.reserve(num_motion_steps);
1505 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
1506 aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points *
sizeof(OptixAabb));
1512 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
1513 OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1514 OptixBuildInput build_input = {};
1515 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1516# if OPTIX_ABI_VERSION < 23
1517 build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1518 build_input.aabbArray.numPrimitives = num_points;
1519 build_input.aabbArray.strideInBytes =
sizeof(OptixAabb);
1520 build_input.aabbArray.flags = &build_flags;
1521 build_input.aabbArray.numSbtRecords = 1;
1522 build_input.aabbArray.primitiveIndexOffset = pointcloud->
prim_offset;
1524 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1525 build_input.customPrimitiveArray.numPrimitives = num_points;
1526 build_input.customPrimitiveArray.strideInBytes =
sizeof(OptixAabb);
1527 build_input.customPrimitiveArray.flags = &build_flags;
1528 build_input.customPrimitiveArray.numSbtRecords = 1;
1529 build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->
prim_offset;
1532 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1533 progress.
set_error(
"Failed to build OptiX acceleration structure");
1538 unsigned int num_instances = 0;
1539 unsigned int max_num_instances = 0xFFFFFFFF;
1541 bvh_optix->as_data->free();
1542 bvh_optix->traversable_handle = 0;
1543 bvh_optix->motion_transform_data->free();
1545 optixDeviceContextGetProperty(context,
1546 OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
1548 sizeof(max_num_instances));
1550 max_num_instances >>= 1;
1551 if (bvh->
objects.size() > max_num_instances) {
1553 "Failed to build OptiX acceleration structure because there are too many instances");
1559 instances.alloc(bvh->
objects.size());
1562 size_t motion_transform_offset = 0;
1563 if (pipeline_options.usesMotionBlur) {
1564 size_t total_motion_transform_size = 0;
1567 total_motion_transform_size =
align_up(total_motion_transform_size,
1568 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1569 const size_t motion_keys =
max(ob->get_motion().size(), (
size_t)2) - 2;
1570 total_motion_transform_size = total_motion_transform_size +
1571 sizeof(OptixSRTMotionTransform) +
1572 motion_keys *
sizeof(OptixSRTData);
1576 assert(bvh_optix->motion_transform_data->device ==
this);
1577 bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
1586 BVHOptiX *
const blas =
static_cast<BVHOptiX *
>(ob->get_geometry()->bvh);
1587 OptixTraversableHandle handle = blas->traversable_handle;
1592 OptixInstance &
instance = instances[num_instances++];
1611 if (0 ==
instance.visibilityMask) {
1618 if (pipeline_options.usesMotionBlur && ob->get_geometry()->has_motion_blur()) {
1620 instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
1625 instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
1631# if OPTIX_ABI_VERSION < 55
1642 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
1646 if (pipeline_options.usesMotionBlur && ob->
use_motion()) {
1647 size_t motion_keys =
max(ob->get_motion().size(), (
size_t)2) - 2;
1648 size_t motion_transform_size =
sizeof(OptixSRTMotionTransform) +
1649 motion_keys *
sizeof(OptixSRTData);
1651 const CUDAContextScope scope(
this);
1653 motion_transform_offset =
align_up(motion_transform_offset,
1654 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1655 CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
1656 motion_transform_offset;
1657 motion_transform_offset += motion_transform_size;
1660 OptixSRTMotionTransform &motion_transform = *
reinterpret_cast<OptixSRTMotionTransform *
>(
1661 new uint8_t[motion_transform_size]);
1662 motion_transform.child = handle;
1663 motion_transform.motionOptions.numKeys = ob->get_motion().size();
1664 motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
1665 motion_transform.motionOptions.timeBegin = 0.0f;
1666 motion_transform.motionOptions.timeEnd = 1.0f;
1668 OptixSRTData *
const srt_data = motion_transform.srtData;
1671 decomp.data(), ob->get_motion().
data(), ob->get_motion().size());
1673 for (
size_t i = 0; i < ob->get_motion().
size(); ++i) {
1675 srt_data[i].sx = decomp[i].y.w;
1676 srt_data[i].sy = decomp[i].z.w;
1677 srt_data[i].sz = decomp[i].w.w;
1680 srt_data[i].a = decomp[i].z.x;
1681 srt_data[i].b = decomp[i].z.y;
1682 srt_data[i].c = decomp[i].w.x;
1683 assert(decomp[i].
z.z == 0.0f);
1684 assert(decomp[i].
w.y == 0.0f);
1685 assert(decomp[i].
w.z == 0.0f);
1688 srt_data[i].pvx = 0.0f;
1689 srt_data[i].pvy = 0.0f;
1690 srt_data[i].pvz = 0.0f;
1693 srt_data[i].qx = decomp[i].x.x;
1694 srt_data[i].qy = decomp[i].x.y;
1695 srt_data[i].qz = decomp[i].x.z;
1696 srt_data[i].qw = decomp[i].x.w;
1699 srt_data[i].tx = decomp[i].y.x;
1700 srt_data[i].ty = decomp[i].y.y;
1701 srt_data[i].tz = decomp[i].y.z;
1705 cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
1706 delete[]
reinterpret_cast<uint8_t *
>(&motion_transform);
1709 optixConvertPointerToTraversableHandle(context,
1710 motion_transform_gpu,
1711 OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
1715 instance.traversableHandle = handle;
1717 if (ob->get_geometry()->is_instanced()) {
1725 instances.resize(num_instances);
1726 instances.copy_to_device();
1729 OptixBuildInput build_input = {};
1730 build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
1731 build_input.instanceArray.instances = instances.device_pointer;
1732 build_input.instanceArray.numInstances = num_instances;
1734 if (!build_optix_bvh(bvh_optix, OPTIX_BUILD_OPERATION_BUILD, build_input, 0)) {
1735 progress.
set_error(
"Failed to build OptiX acceleration structure");
1737 tlas_handle = bvh_optix->traversable_handle;
1741void OptiXDevice::release_bvh(
BVH *bvh)
1746 BVHOptiX *
const bvh_optix =
static_cast<BVHOptiX *
>(bvh);
1748 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
1749 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
1750 bvh_optix->traversable_handle = 0;
1753void OptiXDevice::free_bvh_memory_delayed()
1756 delayed_free_bvh_memory.free_memory();
1759void OptiXDevice::const_copy_to(
const char *name,
void *host,
size_t size)
1762 CUDADevice::const_copy_to(name, host,
size);
1764 if (strcmp(name,
"data") == 0) {
1769 *(OptixTraversableHandle *)&
data->device_bvh = tlas_handle;
1776# define KERNEL_DATA_ARRAY(data_type, data_name) \
1777 if (strcmp(name, #data_name) == 0) { \
1778 update_launch_params(offsetof(KernelParamsOptiX, data_name), host, size); \
1782# include "kernel/data_arrays.h"
1783# undef KERNEL_DATA_ARRAY
1786void OptiXDevice::update_launch_params(
size_t offset,
void *
data,
size_t data_size)
1788 const CUDAContextScope scope(
this);
1790 cuda_assert(cuMemcpyHtoD(launch_params.device_pointer + offset,
data, data_size));
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object instance
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a point
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Attribute * find(ustring name) const
vector< Geometry * > geometry
vector< Object * > objects
Curve get_curve(size_t i) const
size_t curve_segment_offset
size_t num_curves() const
size_t num_segments() const
CurveShapeType curve_shape
void set_substatus(const string &substatus_)
void set_error(const string &error_message_)
device_ptr device_pointer
void alloc_to_device(size_t num, bool shrink_to_fit=true)
additional_info("compositor_sum_squared_difference_float_shared") .push_constant(Type output_img float dot(value.rgb, luminance_coefficients)") .define("LOAD(value)"
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
DebugFlags & DebugFlags()
#define CCL_NAMESPACE_END
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
@ SHADER_TYPE_DISPLACEMENT
#define KERNEL_FEATURE_OBJECT_MOTION
@ ATTR_STD_MOTION_VERTEX_POSITION
#define KERNEL_FEATURE_OSL
#define KERNEL_FEATURE_SUBSURFACE
#define KERNEL_FEATURE_HAIR_THICK
#define KERNEL_FEATURE_PATH_TRACING
#define KERNEL_FEATURE_HAIR
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_BAKING
#define KERNEL_FEATURE_MNEE
#define KERNEL_FEATURE_POINTCLOUD
#define VLOG_IS_ON(severity)
Segment< FEdge *, Vec3r > segment
struct blender::compositor::@345301070213251227305337367154215234324277345027::@113305264211110136200164070253045215160301331207 task
T step(const T &edge, const T &value)
size_t path_file_size(const string &path)
bool path_is_directory(const string &path)
string path_get(const string &sub)
string path_join(const string &dir, const string &file)
bool path_read_compressed_text(const string &path, string &text)
static struct PyModuleDef module
unsigned __int64 uint64_t
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox &bounds) const
size_t num_triangles() const
int get_device_index() const
bool is_traceable() const
uint visibility_for_tracing() const
Point get_point(int i) const
size_t num_points() const
void push(TaskRunFunction &&task)
void wait_work(Summary *stats=NULL)
VecBase< float, 4 > float4
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
ccl_device_inline size_t align_up(size_t offset, size_t alignment)