37 # ifdef WITH_CUDA_DYNLOAD
40 # define OPTIX_DONT_INCLUDE_CUDA
42 # include <optix_function_table_definition.h>
43 # include <optix_stubs.h>
46 # define OPTIX_DENOISER_NO_PIXEL_STRIDE 1
64 # define KERNEL_TEX(type, name) const type *name;
69 # define check_result_cuda(stmt) \
71 CUresult res = stmt; \
72 if (res != CUDA_SUCCESS) { \
74 cuGetErrorName(res, &name); \
75 set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
80 # define check_result_cuda_ret(stmt) \
82 CUresult res = stmt; \
83 if (res != CUDA_SUCCESS) { \
85 cuGetErrorName(res, &name); \
86 set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
92 # define check_result_optix(stmt) \
94 enum OptixResult res = stmt; \
95 if (res != OPTIX_SUCCESS) { \
96 const char *name = optixGetErrorName(res); \
97 set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
102 # define check_result_optix_ret(stmt) \
104 enum OptixResult res = stmt; \
105 if (res != OPTIX_SUCCESS) { \
106 const char *name = optixGetErrorName(res); \
107 set_error(string_printf("%s in %s (device_optix.cpp:%d)", name, #stmt, __LINE__)); \
113 # define launch_filter_kernel(func_name, w, h, args) \
116 check_result_cuda_ret(cuModuleGetFunction(&func, cuFilterModule, func_name)); \
117 check_result_cuda_ret(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); \
119 check_result_cuda_ret( \
120 cuFuncGetAttribute(&threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
121 threads = (int)sqrt((float)threads); \
122 int xblocks = ((w) + threads - 1) / threads; \
123 int yblocks = ((h) + threads - 1) / threads; \
124 check_result_cuda_ret( \
125 cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); \
129 class OptiXDevice :
public CUDADevice {
138 # if OPTIX_ABI_VERSION >= 36
146 NUM_PROGRAM_GROUPS = PG_CALL + 3
150 enum { PIP_PATH_TRACE, PIP_SHADER_EVAL, NUM_PIPELINES };
154 char header[OPTIX_SBT_RECORD_HEADER_SIZE];
159 bool free_map_host =
false;
162 bool use_mapped_host =
false;
166 struct CUDAContextScope {
167 CUDAContextScope(CUcontext ctx)
169 cuCtxPushCurrent(ctx);
173 cuCtxPopCurrent(
NULL);
183 OptixModule optix_module =
NULL;
184 OptixModule builtin_modules[2] = {};
185 OptixPipeline pipelines[NUM_PIPELINES] = {};
187 bool motion_blur =
false;
190 OptixTraversableHandle tlas_handle = 0;
192 OptixDenoiser denoiser =
NULL;
194 int denoiser_input_passes = 0;
201 : CUDADevice(info_, stats_, profiler_, background_),
203 launch_params(this,
"__params", false),
204 denoiser_state(this,
"__denoiser_state", true)
213 const CUDAContextScope scope(cuContext);
216 OptixDeviceContextOptions
options = {};
217 # ifdef WITH_CYCLES_LOGGING
220 [](
unsigned int level,
const char *,
const char *message,
void *) {
223 LOG_IF(FATAL, VLOG_IS_ON(1)) << message;
226 LOG_IF(ERROR, VLOG_IS_ON(1)) << message;
229 LOG_IF(WARNING, VLOG_IS_ON(1)) << message;
232 LOG_IF(INFO, VLOG_IS_ON(1)) << message;
237 # if OPTIX_ABI_VERSION >= 41 && defined(WITH_CYCLES_DEBUG)
238 options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
240 check_result_optix(optixDeviceContextCreate(cuContext, &
options, &
context));
241 # ifdef WITH_CYCLES_LOGGING
242 check_result_optix(optixDeviceContextSetLogCallback(
247 cuda_stream.resize(info.cpu_threads);
248 for (
int i = 0; i < info.cpu_threads; ++i)
249 check_result_cuda(cuStreamCreate(&cuda_stream[i], CU_STREAM_NON_BLOCKING));
262 const CUDAContextScope scope(cuContext);
264 free_bvh_memory_delayed();
268 launch_params.
free();
269 denoiser_state.
free();
272 if (optix_module !=
NULL)
273 optixModuleDestroy(optix_module);
274 for (
unsigned int i = 0; i < 2; ++i)
275 if (builtin_modules[i] !=
NULL)
276 optixModuleDestroy(builtin_modules[i]);
277 for (
unsigned int i = 0; i < NUM_PIPELINES; ++i)
278 if (pipelines[i] !=
NULL)
279 optixPipelineDestroy(pipelines[i]);
282 for (CUstream stream : cuda_stream)
283 cuStreamDestroy(stream);
285 if (denoiser !=
NULL)
286 optixDenoiserDestroy(denoiser);
288 optixDeviceContextDestroy(
context);
292 bool show_samples()
const override
295 return info.cpu_threads == 1;
301 if (optix_module ==
NULL)
302 return CUDADevice::get_bvh_layout_mask();
313 string common_cflags = CUDADevice::compile_kernel_get_common_cflags(
314 requested_features,
filter,
false);
317 const char *optix_sdk_path = getenv(
"OPTIX_ROOT_DIR");
318 if (optix_sdk_path) {
319 common_cflags +=
string_printf(
" -I\"%s/include\"", optix_sdk_path);
324 common_cflags +=
" --keep-device-functions";
327 common_cflags +=
" -D __NO_SHADER_RAYTRACE__";
330 return common_cflags;
341 if (!CUDADevice::load_kernels(requested_features)) {
350 const CUDAContextScope scope(cuContext);
353 if (optix_module !=
NULL) {
354 optixModuleDestroy(optix_module);
357 for (
unsigned int i = 0; i < 2; ++i) {
358 if (builtin_modules[i] !=
NULL) {
359 optixModuleDestroy(builtin_modules[i]);
360 builtin_modules[i] =
NULL;
363 for (
unsigned int i = 0; i < NUM_PIPELINES; ++i) {
364 if (pipelines[i] !=
NULL) {
365 optixPipelineDestroy(pipelines[i]);
370 OptixModuleCompileOptions module_options = {};
371 module_options.maxRegisterCount = 0;
372 # ifdef WITH_CYCLES_DEBUG
373 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
374 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
376 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
377 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
380 # if OPTIX_ABI_VERSION >= 41
381 module_options.boundValues =
nullptr;
382 module_options.numBoundValues = 0;
385 OptixPipelineCompileOptions pipeline_options = {};
387 pipeline_options.usesMotionBlur =
false;
388 pipeline_options.traversableGraphFlags =
389 OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
390 pipeline_options.numPayloadValues = 6;
391 pipeline_options.numAttributeValues = 2;
392 pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
393 pipeline_options.pipelineLaunchParamsVariableName =
"__params";
395 # if OPTIX_ABI_VERSION >= 36
396 pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
399 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
402 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
413 pipeline_options.usesMotionBlur =
true;
416 pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
421 "lib/kernel_optix_shader_raytrace.ptx" :
422 "lib/kernel_optix.ptx");
423 if (use_adaptive_compilation() ||
path_file_size(ptx_filename) == -1) {
424 if (!getenv(
"OPTIX_ROOT_DIR")) {
426 "Missing OPTIX_ROOT_DIR environment variable (which must be set with the path to "
427 "the Optix SDK to be able to compile Optix kernels on demand).");
430 ptx_filename = compile_kernel(requested_features,
"kernel_optix",
"optix",
true);
432 if (ptx_filename.empty() || !
path_read_text(ptx_filename, ptx_data)) {
433 set_error(
"Failed to load OptiX kernel from '" + ptx_filename +
"'");
437 check_result_optix_ret(optixModuleCreateFromPTX(
context,
448 OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
449 OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
450 OptixProgramGroupOptions group_options = {};
451 group_descs[PG_RGEN].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
452 group_descs[PG_RGEN].raygen.module = optix_module;
454 group_descs[PG_RGEN].raygen.entryFunctionName =
"__raygen__kernel_optix_path_trace";
455 group_descs[PG_MISS].kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
456 group_descs[PG_MISS].miss.module = optix_module;
457 group_descs[PG_MISS].miss.entryFunctionName =
"__miss__kernel_optix_miss";
458 group_descs[PG_HITD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
459 group_descs[PG_HITD].hitgroup.moduleCH = optix_module;
460 group_descs[PG_HITD].hitgroup.entryFunctionNameCH =
"__closesthit__kernel_optix_hit";
461 group_descs[PG_HITD].hitgroup.moduleAH = optix_module;
462 group_descs[PG_HITD].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_visibility_test";
463 group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
464 group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
465 group_descs[PG_HITS].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_shadow_all_hit";
468 group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
469 group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
475 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
"__intersection__curve_all";
476 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
"__intersection__curve_all";
479 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
"__intersection__curve_ribbon";
480 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
"__intersection__curve_ribbon";
483 # if OPTIX_ABI_VERSION >= 36
485 OptixBuiltinISOptions builtin_options = {};
486 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
487 builtin_options.usesMotionBlur =
false;
489 check_result_optix_ret(optixBuiltinISModuleGet(
490 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
492 group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
493 group_descs[PG_HITD].hitgroup.entryFunctionNameIS =
nullptr;
494 group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
495 group_descs[PG_HITS].hitgroup.entryFunctionNameIS =
nullptr;
498 builtin_options.usesMotionBlur =
true;
500 check_result_optix_ret(optixBuiltinISModuleGet(
501 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
503 group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
504 group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
505 group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
506 group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
514 group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
515 group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
516 group_descs[PG_HITL].hitgroup.entryFunctionNameAH =
"__anyhit__kernel_optix_local_hit";
520 group_descs[PG_BAKE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
521 group_descs[PG_BAKE].raygen.module = optix_module;
522 group_descs[PG_BAKE].raygen.entryFunctionName =
"__raygen__kernel_optix_bake";
526 group_descs[PG_DISP].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
527 group_descs[PG_DISP].raygen.module = optix_module;
528 group_descs[PG_DISP].raygen.entryFunctionName =
"__raygen__kernel_optix_displace";
532 group_descs[PG_BACK].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
533 group_descs[PG_BACK].raygen.module = optix_module;
534 group_descs[PG_BACK].raygen.entryFunctionName =
"__raygen__kernel_optix_background";
539 group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
540 group_descs[PG_CALL + 0].callables.moduleDC = optix_module;
541 group_descs[PG_CALL + 0].callables.entryFunctionNameDC =
"__direct_callable__svm_eval_nodes";
542 group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
543 group_descs[PG_CALL + 1].callables.moduleDC = optix_module;
544 group_descs[PG_CALL + 1].callables.entryFunctionNameDC =
545 "__direct_callable__kernel_volume_shadow";
546 group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
547 group_descs[PG_CALL + 2].callables.moduleDC = optix_module;
548 group_descs[PG_CALL + 2].callables.entryFunctionNameDC =
549 "__direct_callable__subsurface_scatter_multi_setup";
552 check_result_optix_ret(optixProgramGroupCreate(
553 context, group_descs, NUM_PROGRAM_GROUPS, &group_options,
nullptr, 0, groups));
556 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
558 sbt_data.
alloc(NUM_PROGRAM_GROUPS);
559 memset(sbt_data.
host_pointer, 0,
sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
560 for (
unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
561 check_result_optix_ret(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
562 check_result_optix_ret(optixProgramGroupGetStackSize(groups[i], &stack_size[i]));
567 unsigned int trace_css = stack_size[PG_HITD].cssCH;
569 trace_css =
std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
570 trace_css =
std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
571 trace_css =
std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
572 # if OPTIX_ABI_VERSION >= 36
574 stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
576 stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
579 OptixPipelineLinkOptions link_options = {};
580 link_options.maxTraceDepth = 1;
581 # ifdef WITH_CYCLES_DEBUG
582 link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
584 link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
586 # if OPTIX_ABI_VERSION < 24
587 link_options.overrideUsesMotionBlur = motion_blur;
592 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
593 pipeline_groups.push_back(groups[PG_RGEN]);
594 pipeline_groups.push_back(groups[PG_MISS]);
595 pipeline_groups.push_back(groups[PG_HITD]);
596 pipeline_groups.push_back(groups[PG_HITS]);
597 pipeline_groups.push_back(groups[PG_HITL]);
598 # if OPTIX_ABI_VERSION >= 36
600 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
601 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
605 pipeline_groups.push_back(groups[PG_CALL + 0]);
606 pipeline_groups.push_back(groups[PG_CALL + 1]);
607 pipeline_groups.push_back(groups[PG_CALL + 2]);
610 check_result_optix_ret(optixPipelineCreate(
context,
613 pipeline_groups.data(),
614 pipeline_groups.size(),
617 &pipelines[PIP_PATH_TRACE]));
620 const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
625 const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
626 std::max(stack_size[PG_CALL + 1].dssDC,
627 stack_size[PG_CALL + 2].dssDC);
630 check_result_optix_ret(
631 optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE],
635 motion_blur ? 3 : 2));
639 const bool use_shader_eval_pipeline = requested_features.
use_baking ||
643 if (use_shader_eval_pipeline) {
645 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
646 pipeline_groups.push_back(groups[PG_BAKE]);
647 pipeline_groups.push_back(groups[PG_DISP]);
648 pipeline_groups.push_back(groups[PG_BACK]);
649 pipeline_groups.push_back(groups[PG_MISS]);
650 pipeline_groups.push_back(groups[PG_HITD]);
651 pipeline_groups.push_back(groups[PG_HITS]);
652 pipeline_groups.push_back(groups[PG_HITL]);
653 # if OPTIX_ABI_VERSION >= 36
655 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
656 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
660 pipeline_groups.push_back(groups[PG_CALL + 0]);
661 pipeline_groups.push_back(groups[PG_CALL + 1]);
662 pipeline_groups.push_back(groups[PG_CALL + 2]);
665 check_result_optix_ret(optixPipelineCreate(
context,
668 pipeline_groups.data(),
669 pipeline_groups.size(),
672 &pipelines[PIP_SHADER_EVAL]));
675 const unsigned int css =
std::max(stack_size[PG_BAKE].cssRG,
677 stack_size[PG_BACK].cssRG)) +
678 link_options.maxTraceDepth * trace_css;
679 const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
680 std::max(stack_size[PG_CALL + 1].dssDC,
681 stack_size[PG_CALL + 2].dssDC);
683 check_result_optix_ret(
684 optixPipelineSetStackSize(pipelines[PIP_SHADER_EVAL],
688 motion_blur ? 3 : 2));
692 for (
unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
693 optixProgramGroupDestroy(groups[i]);
705 if (thread_index != 0) {
711 while (
task.acquire_tile(
this, tile,
task.tile_types)) {
713 launch_render(
task, tile, thread_index);
717 CUDADevice::render(
task, tile, work_tiles);
720 launch_denoise(
task, tile);
721 task.release_tile(tile);
722 if (
task.get_cancel() && !
task.need_finish_queue)
724 else if (have_error())
729 launch_shader_eval(
task, thread_index);
745 launch_denoise(
task, tile);
751 assert(thread_index < launch_params.
data_size);
767 int step_samples = (info.display_device ? 8 : 32);
773 const CUDAContextScope scope(cuContext);
779 if (
task.adaptive_sampling.use) {
783 device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
785 cuMemcpyHtoDAsync(d_wtile_ptr, &wtile,
sizeof(wtile), cuda_stream[thread_index]));
787 OptixShaderBindingTable sbt_params = {};
788 sbt_params.raygenRecord = sbt_data.
device_pointer + PG_RGEN *
sizeof(SbtRecord);
789 sbt_params.missRecordBase = sbt_data.
device_pointer + PG_MISS *
sizeof(SbtRecord);
790 sbt_params.missRecordStrideInBytes =
sizeof(SbtRecord);
791 sbt_params.missRecordCount = 1;
792 sbt_params.hitgroupRecordBase = sbt_data.
device_pointer + PG_HITD *
sizeof(SbtRecord);
793 sbt_params.hitgroupRecordStrideInBytes =
sizeof(SbtRecord);
794 # if OPTIX_ABI_VERSION >= 36
795 sbt_params.hitgroupRecordCount = 5;
797 sbt_params.hitgroupRecordCount = 3;
799 sbt_params.callablesRecordBase = sbt_data.
device_pointer + PG_CALL *
sizeof(SbtRecord);
800 sbt_params.callablesRecordCount = 3;
801 sbt_params.callablesRecordStrideInBytes =
sizeof(SbtRecord);
804 check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
805 cuda_stream[thread_index],
816 if (
task.adaptive_sampling.use &&
task.adaptive_sampling.need_filter(filter_sample)) {
817 adaptive_sampling_filter(filter_sample, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
821 check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
829 if (
task.get_cancel() && !
task.need_finish_queue)
834 if (
task.adaptive_sampling.use) {
835 device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
836 adaptive_sampling_post(rtile, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
837 check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
848 const CUDAContextScope scope(cuContext);
858 task.map_neighbor_tiles(neighbors,
this);
868 int4 clip_rect = neighbors.bounds();
874 int pixel_stride =
task.pass_stride * (int)
sizeof(
float);
875 int pixel_offset = (rtile.
offset + rtile.
x + rtile.
y * rtile.
stride) * pixel_stride;
876 const int pass_offset[3] = {
882 int input_stride = pixel_stride;
889 bool contiguous_memory =
true;
891 if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.
buffer) {
892 contiguous_memory =
false;
896 if (contiguous_memory) {
898 input_ptr -= (overlap_offset.
x + overlap_offset.
y * rtile.
stride) * pixel_stride;
900 input_stride *= rtile.
stride;
906 input_ptr = input.device_pointer;
910 TileInfo *tile_info = tile_info_mem.alloc(1);
912 tile_info->
offsets[i] = neighbors.tiles[i].offset;
913 tile_info->
strides[i] = neighbors.tiles[i].stride;
914 tile_info->
buffers[i] = neighbors.tiles[i].buffer;
916 tile_info->
x[0] = neighbors.tiles[3].x;
917 tile_info->
x[1] = neighbors.tiles[4].x;
918 tile_info->
x[2] = neighbors.tiles[5].x;
919 tile_info->
x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
920 tile_info->
y[0] = neighbors.tiles[1].y;
921 tile_info->
y[1] = neighbors.tiles[4].y;
922 tile_info->
y[2] = neighbors.tiles[7].y;
923 tile_info->
y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
924 tile_info_mem.copy_to_device();
927 &input.device_pointer, &tile_info_mem.device_pointer, &rect.
x, &
task.pass_stride};
928 launch_filter_kernel(
"kernel_cuda_filter_copy_input",
rect_size.x,
rect_size.y, args);
931 # if OPTIX_DENOISER_NO_PIXEL_STRIDE
935 void *input_args[] = {&input_rgb.device_pointer,
941 const_cast<int *
>(pass_offset),
942 &
task.denoising.input_passes,
944 launch_filter_kernel(
947 input_ptr = input_rgb.device_pointer;
948 pixel_stride = 3 *
sizeof(
float);
949 input_stride =
rect_size.x * pixel_stride;
952 const bool recreate_denoiser = (denoiser ==
NULL) ||
953 (
task.denoising.input_passes != denoiser_input_passes);
954 if (recreate_denoiser) {
956 if (denoiser !=
NULL) {
957 optixDenoiserDestroy(denoiser);
961 OptixDenoiserOptions denoiser_options = {};
962 assert(
task.denoising.input_passes >= 1 &&
task.denoising.input_passes <= 3);
963 denoiser_options.inputKind =
static_cast<OptixDenoiserInputKind
>(
964 OPTIX_DENOISER_INPUT_RGB + (
task.denoising.input_passes - 1));
965 # if OPTIX_ABI_VERSION < 28
966 denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
968 check_result_optix_ret(optixDenoiserCreate(
context, &denoiser_options, &denoiser));
969 check_result_optix_ret(
970 optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR,
NULL, 0));
973 denoiser_input_passes =
task.denoising.input_passes;
976 OptixDenoiserSizes sizes = {};
977 check_result_optix_ret(
980 # if OPTIX_ABI_VERSION < 28
981 const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
983 const size_t scratch_size = sizes.withOverlapScratchSizeInBytes;
985 const size_t scratch_offset = sizes.stateSizeInBytes;
993 check_result_optix_ret(optixDenoiserSetup(denoiser,
1007 OptixImage2D input_layers[3] = {};
1008 OptixImage2D output_layers[1] = {};
1010 for (
int i = 0; i < 3; ++i) {
1011 # if OPTIX_DENOISER_NO_PIXEL_STRIDE
1014 input_layers[i].data = input_ptr + pass_offset[i];
1018 input_layers[i].rowStrideInBytes = input_stride;
1019 input_layers[i].pixelStrideInBytes = pixel_stride;
1020 input_layers[i].format = OPTIX_PIXEL_FORMAT_FLOAT3;
1023 # if OPTIX_DENOISER_NO_PIXEL_STRIDE
1024 output_layers[0].data = input_ptr;
1027 output_layers[0].rowStrideInBytes = input_stride;
1028 output_layers[0].pixelStrideInBytes = pixel_stride;
1029 int2 output_offset = overlap_offset;
1032 output_layers[0].data = target_tile.
buffer + pixel_offset;
1033 output_layers[0].width = target_tile.
w;
1034 output_layers[0].height = target_tile.
h;
1035 output_layers[0].rowStrideInBytes = target_tile.
stride * pixel_stride;
1036 output_layers[0].pixelStrideInBytes = pixel_stride;
1038 output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
1041 OptixDenoiserParams
params = {};
1042 check_result_optix_ret(optixDenoiserInvoke(denoiser,
1048 task.denoising.input_passes,
1055 # if OPTIX_DENOISER_NO_PIXEL_STRIDE
1056 void *output_args[] = {&input_ptr,
1070 launch_filter_kernel(
1071 "kernel_cuda_filter_convert_from_rgb", target_tile.
w, target_tile.
h, output_args);
1074 check_result_cuda_ret(cuStreamSynchronize(0));
1076 task.unmap_neighbor_tiles(neighbors,
this);
1081 CUDADevice::denoise(rtile, denoising);
1085 task.update_progress(&rtile, rtile.
w * rtile.
h);
1092 unsigned int rgen_index = PG_BACK;
1094 rgen_index = PG_BAKE;
1096 rgen_index = PG_DISP;
1098 const CUDAContextScope scope(cuContext);
1106 params.output = (float4 *)
task.shader_output;
1113 check_result_cuda(cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParams,
shader),
1116 cuda_stream[thread_index]));
1118 OptixShaderBindingTable sbt_params = {};
1119 sbt_params.raygenRecord = sbt_data.
device_pointer + rgen_index *
sizeof(SbtRecord);
1120 sbt_params.missRecordBase = sbt_data.
device_pointer + PG_MISS *
sizeof(SbtRecord);
1121 sbt_params.missRecordStrideInBytes =
sizeof(SbtRecord);
1122 sbt_params.missRecordCount = 1;
1123 sbt_params.hitgroupRecordBase = sbt_data.
device_pointer + PG_HITD *
sizeof(SbtRecord);
1124 sbt_params.hitgroupRecordStrideInBytes =
sizeof(SbtRecord);
1125 # if OPTIX_ABI_VERSION >= 36
1126 sbt_params.hitgroupRecordCount = 5;
1128 sbt_params.hitgroupRecordCount = 3;
1130 sbt_params.callablesRecordBase = sbt_data.
device_pointer + PG_CALL *
sizeof(SbtRecord);
1131 sbt_params.callablesRecordCount = 3;
1132 sbt_params.callablesRecordStrideInBytes =
sizeof(SbtRecord);
1134 check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
1135 cuda_stream[thread_index],
1143 check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
1149 bool build_optix_bvh(BVHOptiX *bvh,
1150 OptixBuildOperation operation,
1151 const OptixBuildInput &build_input,
1161 const CUDAContextScope scope(cuContext);
1164 OptixAccelBufferSizes sizes = {};
1165 OptixAccelBuildOptions
options = {};
1166 options.operation = operation;
1169 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
1173 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1176 options.motionOptions.numKeys = num_motion_steps;
1177 options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH;
1178 options.motionOptions.timeBegin = 0.0f;
1179 options.motionOptions.timeEnd = 1.0f;
1181 check_result_optix_ret(
1182 optixAccelComputeMemoryUsage(
context, &
options, &build_input, 1, &sizes));
1186 temp_mem.alloc_to_device(
align_up(sizes.tempSizeInBytes, 8) + 8);
1187 if (!temp_mem.device_pointer)
1192 if (operation == OPTIX_BUILD_OPERATION_BUILD) {
1193 assert(out_data.
device ==
this);
1203 OptixAccelEmitDesc compacted_size_prop = {};
1204 compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
1207 compacted_size_prop.result =
align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8);
1209 OptixTraversableHandle out_handle = 0;
1210 check_result_optix_ret(optixAccelBuild(
context,
1215 temp_mem.device_pointer,
1216 sizes.tempSizeInBytes,
1218 sizes.outputSizeInBytes,
1220 background ? &compacted_size_prop :
NULL,
1221 background ? 1 : 0));
1222 bvh->traversable_handle =
static_cast<uint64_t>(out_handle);
1225 check_result_cuda_ret(cuStreamSynchronize(
NULL));
1229 uint64_t compacted_size = sizes.outputSizeInBytes;
1230 check_result_cuda_ret(
1231 cuMemcpyDtoH(&compacted_size, compacted_size_prop.result,
sizeof(compacted_size)));
1237 if (compacted_size < sizes.outputSizeInBytes) {
1239 compacted_data.alloc_to_device(compacted_size);
1240 if (!compacted_data.device_pointer)
1245 check_result_optix_ret(optixAccelCompact(
context,
1248 compacted_data.device_pointer,
1251 bvh->traversable_handle =
static_cast<uint64_t>(out_handle);
1254 check_result_cuda_ret(cuStreamSynchronize(
NULL));
1273 free_bvh_memory_delayed();
1275 BVHOptiX *
const bvh_optix =
static_cast<BVHOptiX *
>(bvh);
1277 progress.
set_substatus(
"Building OptiX acceleration structure");
1284 OptixBuildOperation operation = OPTIX_BUILD_OPERATION_BUILD;
1285 if (
refit && !background) {
1286 assert(bvh_optix->traversable_handle != 0);
1287 operation = OPTIX_BUILD_OPERATION_UPDATE;
1290 bvh_optix->as_data.free();
1291 bvh_optix->traversable_handle = 0;
1298 Hair *
const hair =
static_cast<Hair *const
>(geom);
1305 size_t num_motion_steps = 1;
1307 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
1308 num_motion_steps = hair->get_motion_steps();
1312 # if OPTIX_ABI_VERSION >= 36
1316 const size_t num_vertices = num_segments * 4;
1318 index_data.alloc(num_segments);
1319 vertex_data.alloc(num_vertices * num_motion_steps);
1323 aabb_data.alloc(num_segments * num_motion_steps);
1326 for (
size_t step = 0; step < num_motion_steps; ++step) {
1328 const float3 *keys = hair->get_curve_keys().data();
1329 size_t center_step = (num_motion_steps - 1) / 2;
1330 if (step != center_step) {
1331 size_t attr_offset = (step > center_step) ? step - 1 : step;
1333 keys = motion_keys->
data_float3() + attr_offset * hair->get_curve_keys().size();
1336 for (
size_t j = 0, i = 0; j < hair->
num_curves(); ++j) {
1338 # if OPTIX_ABI_VERSION >= 36
1339 const array<float> &curve_radius = hair->get_curve_radius();
1343 # if OPTIX_ABI_VERSION >= 36
1347 int ka =
max(k0 - 1,
curve.first_key);
1350 const float4 px =
make_float4(keys[ka].
x, keys[k0].
x, keys[k1].
x, keys[kb].
x);
1351 const float4 py =
make_float4(keys[ka].
y, keys[k0].
y, keys[k1].
y, keys[kb].
y);
1352 const float4 pz =
make_float4(keys[ka].
z, keys[k0].
z, keys[k1].
z, keys[kb].
z);
1354 curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
1357 static const float4 cr2bsp0 =
make_float4(+7, -4, +5, -2) / 6.f;
1358 static const float4 cr2bsp1 =
make_float4(-2, 11, -4, +1) / 6.f;
1359 static const float4 cr2bsp2 =
make_float4(+1, -4, 11, -2) / 6.f;
1360 static const float4 cr2bsp3 =
make_float4(-2, +5, -4, +7) / 6.f;
1362 index_data[i] = i * 4;
1363 float4 *
const v = vertex_data.data() + step * num_vertices + index_data[i];
1365 dot(cr2bsp0, px),
dot(cr2bsp0, py),
dot(cr2bsp0, pz),
dot(cr2bsp0, pw));
1367 dot(cr2bsp1, px),
dot(cr2bsp1, py),
dot(cr2bsp1, pz),
dot(cr2bsp1, pw));
1369 dot(cr2bsp2, px),
dot(cr2bsp2, py),
dot(cr2bsp2, pz),
dot(cr2bsp2, pw));
1371 dot(cr2bsp3, px),
dot(cr2bsp3, py),
dot(cr2bsp3, pz),
dot(cr2bsp3, pw));
1379 const size_t index = step * num_segments + i;
1380 aabb_data[index].minX =
bounds.min.x;
1381 aabb_data[index].minY =
bounds.min.y;
1382 aabb_data[index].minZ =
bounds.min.z;
1383 aabb_data[index].maxX =
bounds.max.x;
1384 aabb_data[index].maxY =
bounds.max.y;
1385 aabb_data[index].maxZ =
bounds.max.z;
1392 aabb_data.copy_to_device();
1393 # if OPTIX_ABI_VERSION >= 36
1394 index_data.copy_to_device();
1395 vertex_data.copy_to_device();
1399 aabb_ptrs.reserve(num_motion_steps);
1400 # if OPTIX_ABI_VERSION >= 36
1403 width_ptrs.reserve(num_motion_steps);
1404 vertex_ptrs.reserve(num_motion_steps);
1406 for (
size_t step = 0; step < num_motion_steps; ++step) {
1407 aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments *
sizeof(OptixAabb));
1408 # if OPTIX_ABI_VERSION >= 36
1409 const device_ptr base_ptr = vertex_data.device_pointer +
1410 step * num_vertices *
sizeof(float4);
1411 width_ptrs.push_back(base_ptr + 3 *
sizeof(
float));
1412 vertex_ptrs.push_back(base_ptr);
1417 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1418 OptixBuildInput build_input = {};
1419 # if OPTIX_ABI_VERSION >= 36
1421 build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
1422 build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
1423 build_input.curveArray.numPrimitives = num_segments;
1424 build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1425 build_input.curveArray.numVertices = num_vertices;
1426 build_input.curveArray.vertexStrideInBytes =
sizeof(float4);
1427 build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
1428 build_input.curveArray.widthStrideInBytes =
sizeof(float4);
1429 build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
1430 build_input.curveArray.indexStrideInBytes =
sizeof(
int);
1431 build_input.curveArray.flag = build_flags;
1439 build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
1441 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1442 # if OPTIX_ABI_VERSION < 23
1443 build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1444 build_input.aabbArray.numPrimitives = num_segments;
1445 build_input.aabbArray.strideInBytes =
sizeof(OptixAabb);
1446 build_input.aabbArray.flags = &build_flags;
1447 build_input.aabbArray.numSbtRecords = 1;
1450 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1451 build_input.customPrimitiveArray.numPrimitives = num_segments;
1452 build_input.customPrimitiveArray.strideInBytes =
sizeof(OptixAabb);
1453 build_input.customPrimitiveArray.flags = &build_flags;
1454 build_input.customPrimitiveArray.numSbtRecords = 1;
1455 build_input.customPrimitiveArray.primitiveIndexOffset = hair->
optix_prim_offset;
1459 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1460 progress.
set_error(
"Failed to build OptiX acceleration structure");
1470 const size_t num_verts =
mesh->get_verts().
size();
1472 size_t num_motion_steps = 1;
1474 if (motion_blur &&
mesh->get_use_motion_blur() && motion_keys) {
1475 num_motion_steps =
mesh->get_motion_steps();
1479 index_data.alloc(
mesh->get_triangles().
size());
1480 memcpy(index_data.data(),
1481 mesh->get_triangles().data(),
1482 mesh->get_triangles().
size() *
sizeof(
int));
1484 vertex_data.alloc(num_verts * num_motion_steps);
1486 for (
size_t step = 0; step < num_motion_steps; ++step) {
1489 size_t center_step = (num_motion_steps - 1) / 2;
1491 if (step != center_step) {
1493 (step > center_step ? step - 1 : step) * num_verts;
1496 memcpy(vertex_data.data() + num_verts * step,
verts, num_verts *
sizeof(
float3));
1500 index_data.copy_to_device();
1501 vertex_data.copy_to_device();
1504 vertex_ptrs.reserve(num_motion_steps);
1505 for (
size_t step = 0; step < num_motion_steps; ++step) {
1506 vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step *
sizeof(
float3));
1510 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1511 OptixBuildInput build_input = {};
1512 build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
1513 build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1514 build_input.triangleArray.numVertices = num_verts;
1515 build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
1516 build_input.triangleArray.vertexStrideInBytes =
sizeof(
float3);
1517 build_input.triangleArray.indexBuffer = index_data.device_pointer;
1519 build_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
1520 build_input.triangleArray.indexStrideInBytes = 3 *
sizeof(int);
1521 build_input.triangleArray.flags = &build_flags;
1525 build_input.triangleArray.numSbtRecords = 1;
1528 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1529 progress.
set_error(
"Failed to build OptiX acceleration structure");
1534 unsigned int num_instances = 0;
1535 unsigned int max_num_instances = 0xFFFFFFFF;
1537 bvh_optix->as_data.free();
1538 bvh_optix->traversable_handle = 0;
1539 bvh_optix->motion_transform_data.free();
1541 optixDeviceContextGetProperty(
context,
1542 OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
1544 sizeof(max_num_instances));
1546 max_num_instances >>= 1;
1547 if (bvh->
objects.size() > max_num_instances) {
1549 "Failed to build OptiX acceleration structure because there are too many instances");
1554 # if OPTIX_ABI_VERSION < 41
1556 aabbs.alloc(bvh->
objects.size());
1559 instances.alloc(bvh->
objects.size());
1562 size_t motion_transform_offset = 0;
1564 size_t total_motion_transform_size = 0;
1567 total_motion_transform_size =
align_up(total_motion_transform_size,
1568 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1569 const size_t motion_keys =
max(ob->get_motion().size(), 2) - 2;
1570 total_motion_transform_size = total_motion_transform_size +
1571 sizeof(OptixSRTMotionTransform) +
1572 motion_keys *
sizeof(OptixSRTData);
1576 assert(bvh_optix->motion_transform_data.device ==
this);
1577 bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
1585 BVHOptiX *
const blas =
static_cast<BVHOptiX *
>(ob->get_geometry()->bvh);
1586 OptixTraversableHandle handle = blas->traversable_handle;
1588 # if OPTIX_ABI_VERSION < 41
1589 OptixAabb &aabb = aabbs[num_instances];
1598 OptixInstance &instance = instances[num_instances++];
1599 memset(&instance, 0,
sizeof(instance));
1602 instance.transform[0] = 1.0f;
1603 instance.transform[5] = 1.0f;
1604 instance.transform[10] = 1.0f;
1610 instance.visibilityMask = 1;
1612 if (ob->get_geometry()->has_volume) {
1614 instance.visibilityMask |= 2;
1619 instance.visibilityMask |= 4;
1621 # if OPTIX_ABI_VERSION >= 36
1622 if (motion_blur && ob->get_geometry()->has_motion_blur() &&
1626 instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
1633 size_t motion_keys =
max(ob->get_motion().size(), 2) - 2;
1634 size_t motion_transform_size =
sizeof(OptixSRTMotionTransform) +
1635 motion_keys *
sizeof(OptixSRTData);
1637 const CUDAContextScope scope(cuContext);
1639 motion_transform_offset =
align_up(motion_transform_offset,
1640 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1641 CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
1642 motion_transform_offset;
1643 motion_transform_offset += motion_transform_size;
1646 OptixSRTMotionTransform &motion_transform = *
reinterpret_cast<OptixSRTMotionTransform *
>(
1647 new uint8_t[motion_transform_size]);
1648 motion_transform.child = handle;
1649 motion_transform.motionOptions.numKeys = ob->get_motion().size();
1650 motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
1651 motion_transform.motionOptions.timeBegin = 0.0f;
1652 motion_transform.motionOptions.timeEnd = 1.0f;
1654 OptixSRTData *
const srt_data = motion_transform.srtData;
1657 decomp.data(), ob->get_motion().
data(), ob->get_motion().size());
1659 for (
size_t i = 0; i < ob->get_motion().
size(); ++i) {
1661 srt_data[i].sx = decomp[i].y.w;
1662 srt_data[i].sy = decomp[i].z.w;
1663 srt_data[i].sz = decomp[i].w.w;
1666 srt_data[i].a = decomp[i].z.x;
1667 srt_data[i].b = decomp[i].z.y;
1668 srt_data[i].c = decomp[i].w.x;
1669 assert(decomp[i].
z.z == 0.0f);
1670 assert(decomp[i].
w.y == 0.0f);
1671 assert(decomp[i].
w.z == 0.0f);
1674 srt_data[i].pvx = 0.0f;
1675 srt_data[i].pvy = 0.0f;
1676 srt_data[i].pvz = 0.0f;
1679 srt_data[i].qx = decomp[i].x.x;
1680 srt_data[i].qy = decomp[i].x.y;
1681 srt_data[i].qz = decomp[i].x.z;
1682 srt_data[i].qw = decomp[i].x.w;
1685 srt_data[i].tx = decomp[i].y.x;
1686 srt_data[i].ty = decomp[i].y.y;
1687 srt_data[i].tz = decomp[i].y.z;
1691 cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
1692 delete[]
reinterpret_cast<uint8_t *
>(&motion_transform);
1695 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
1698 optixConvertPointerToTraversableHandle(
context,
1699 motion_transform_gpu,
1700 OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
1701 &instance.traversableHandle);
1704 instance.traversableHandle = handle;
1706 if (ob->get_geometry()->is_instanced()) {
1708 memcpy(instance.transform, &ob->get_tfm(),
sizeof(instance.transform));
1712 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
1715 instance.instanceId |= 1;
1721 # if OPTIX_ABI_VERSION < 41
1722 aabbs.resize(num_instances);
1723 aabbs.copy_to_device();
1725 instances.resize(num_instances);
1726 instances.copy_to_device();
1729 OptixBuildInput build_input = {};
1730 build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
1731 # if OPTIX_ABI_VERSION < 41
1732 build_input.instanceArray.aabbs = aabbs.device_pointer;
1733 build_input.instanceArray.numAabbs = num_instances;
1735 build_input.instanceArray.instances = instances.device_pointer;
1736 build_input.instanceArray.numInstances = num_instances;
1738 if (!build_optix_bvh(bvh_optix, OPTIX_BUILD_OPERATION_BUILD, build_input, 0)) {
1739 progress.
set_error(
"Failed to build OptiX acceleration structure");
1741 tlas_handle = bvh_optix->traversable_handle;
1745 void release_optix_bvh(
BVH *bvh)
override
1750 BVHOptiX *
const bvh_optix =
static_cast<BVHOptiX *
>(bvh);
1752 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
1753 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
1754 bvh_optix->traversable_handle = 0;
1757 void free_bvh_memory_delayed()
1763 void const_copy_to(
const char *name,
void *host,
size_t size)
override
1768 CUDADevice::const_copy_to(name, host,
size);
1770 if (strcmp(name,
"__data") == 0) {
1775 *(OptixTraversableHandle *)&
data->bvh.scene = tlas_handle;
1777 update_launch_params(offsetof(KernelParams,
data), host,
size);
1782 # define KERNEL_TEX(data_type, tex_name) \
1783 if (strcmp(name, #tex_name) == 0) { \
1784 update_launch_params(offsetof(KernelParams, tex_name), host, size); \
1791 void update_launch_params(
size_t offset,
void *
data,
size_t data_size)
1793 const CUDAContextScope scope(cuContext);
1795 for (
int i = 0; i < info.cpu_threads; ++i)
1805 load_texture_info();
1817 thread_run(task_copy, 0);
1823 list<DeviceTask> tasks;
1824 task.split(tasks, info.cpu_threads);
1833 thread_run(task_copy, task_index);
1839 void task_wait()
override
1845 void task_cancel()
override
1854 if (g_optixFunctionTable.optixDeviceContextCreate !=
NULL)
1861 const OptixResult
result = optixInit();
1863 if (
result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
1864 VLOG(1) <<
"OptiX initialization failed because the installed NVIDIA driver is too old. "
1865 "Please update to the latest driver first!";
1868 else if (
result != OPTIX_SUCCESS) {
1869 VLOG(1) <<
"OptiX initialization failed with error code " << (
unsigned int)
result;
1879 devices.reserve(cuda_devices.size());
1886 cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, info.num);
1892 info.id +=
"_OptiX";
1894 info.has_branched_path =
false;
1902 return new OptiXDevice(info, stats, profiler, background);
typedef float(TangentPoint)[2]
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble z
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Separate Vector White RGB Map Separate Set Z Dilate Combine Combine Color Channel Split ID Combine Luminance Directional Alpha Distance Hue Movie Ellipse Bokeh View Corner DENOISE
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Attribute * find(ustring name) const
vector< Geometry * > geometry
vector< Object * > objects
bool use_background_light
bool use_true_displacement
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
void set_substatus(const string &substatus_)
void set_error(const string &error_message_)
device_ptr device_pointer
void alloc_to_device(size_t num, bool shrink_to_fit=true)
T * alloc(size_t width, size_t height=0, size_t depth=0)
CCL_NAMESPACE_BEGIN struct Options options
Device * device_optix_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
DO_INLINE void filter(lfVector *V, fmatrix3x3 *S)
unsigned long long CUtexObject
#define CCL_NAMESPACE_END
#define make_float4(x, y, z, w)
void KERNEL_FUNCTION_FULL_NAME() shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int filter, int i, int offset, int sample)
@ ATTR_STD_MOTION_VERTEX_POSITION
Segment< FEdge *, Vec3r > segment
static void sample(SocketReader *reader, int x, int y, float color[4])
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
struct blender::compositor::@172::@174 task
struct SELECTID_Context context
unsigned __int64 uint64_t
Curve get_curve(size_t i) const
size_t num_curves() const
size_t num_segments() const
CurveShapeType curve_shape
size_t num_triangles() const
NODE_DECLARE BoundBox bounds
int get_device_index() const
bool is_traceable() const
void push(TaskRunFunction &&task)
void wait_work(Summary *stats=NULL)
ccl_global float * buffer
DebugFlags & DebugFlags()
ccl_device_inline float dot(const float2 &a, const float2 &b)
size_t path_file_size(const string &path)
string path_get(const string &sub)
bool path_read_text(const string &path, string &text)
ccl_device_inline int4 rect_clip(int4 a, int4 b)
ccl_device_inline int4 rect_expand(int4 rect, int d)
ccl_device_inline int rect_size(int4 rect)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
ccl_device_inline size_t align_up(size_t offset, size_t alignment)