Blender V4.5
optix/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_OPTIX
6
7# include "device/optix/queue.h"
9
10# define __KERNEL_OPTIX__
12
14
15/* CUDADeviceQueue */
16
17OptiXDeviceQueue::OptiXDeviceQueue(OptiXDevice *device) : CUDADeviceQueue(device) {}
18
19void OptiXDeviceQueue::init_execution()
20{
21 CUDADeviceQueue::init_execution();
22}
23
24static bool is_optix_specific_kernel(DeviceKernel kernel, bool osl_shading, bool osl_camera)
25{
26# ifdef WITH_OSL
27 /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
28 if (osl_shading && device_kernel_has_shading(kernel)) {
29 return true;
30 }
31 if (osl_camera && kernel == DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA) {
32 return true;
33 }
34# else
35 (void)osl_shading;
36 (void)osl_camera;
37# endif
38
39 return device_kernel_has_intersection(kernel);
40}
41
42bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
43 const int work_size,
44 const DeviceKernelArguments &args)
45{
46 OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
47
48# ifdef WITH_OSL
49 const OSLGlobals *og = static_cast<const OSLGlobals *>(optix_device->get_cpu_osl_memory());
50 const bool osl_shading = og->use_shading;
51 const bool osl_camera = og->use_camera;
52# else
53 const bool osl_shading = false;
54 const bool osl_camera = false;
55# endif
56
57 if (!is_optix_specific_kernel(kernel, osl_shading, osl_camera)) {
58 return CUDADeviceQueue::enqueue(kernel, work_size, args);
59 }
60
61 if (cuda_device_->have_error()) {
62 return false;
63 }
64
65 debug_enqueue_begin(kernel, work_size);
66
67 const CUDAContextScope scope(cuda_device_);
68
69 const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
70 const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
71
72 auto set_launch_param = [&](size_t offset, size_t size, int arg) {
73 cuda_device_assert(
74 cuda_device_,
75 cuMemcpyHtoDAsync(launch_params_ptr + offset, args.values[arg], size, cuda_stream_));
76 };
77
78 set_launch_param(offsetof(KernelParamsOptiX, path_index_array), sizeof(device_ptr), 0);
79
81 set_launch_param(offsetof(KernelParamsOptiX, render_buffer), sizeof(device_ptr), 1);
82 }
86 {
87 set_launch_param(offsetof(KernelParamsOptiX, offset), sizeof(int32_t), 2);
88 }
89
91 set_launch_param(offsetof(KernelParamsOptiX, num_tiles), sizeof(int32_t), 1);
92 set_launch_param(offsetof(KernelParamsOptiX, render_buffer), sizeof(device_ptr), 2);
93 set_launch_param(offsetof(KernelParamsOptiX, max_tile_work_size), sizeof(int32_t), 3);
94 }
95
96 cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
97
98 OptixPipeline pipeline = nullptr;
99 OptixShaderBindingTable sbt_params = {};
100
101 switch (kernel) {
103 pipeline = optix_device->pipelines[PIP_SHADE];
104 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
105 break;
107 pipeline = optix_device->pipelines[PIP_SHADE];
108 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
109 break;
111 pipeline = optix_device->pipelines[PIP_SHADE];
112 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
113 break;
115 pipeline = optix_device->pipelines[PIP_SHADE];
116 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
117 break;
119 pipeline = optix_device->pipelines[PIP_SHADE];
120 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
121 break;
123 pipeline = optix_device->pipelines[PIP_SHADE];
124 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
125 break;
127 pipeline = optix_device->pipelines[PIP_SHADE];
128 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
129 break;
131 pipeline = optix_device->pipelines[PIP_SHADE];
132 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_DEDICATED_LIGHT * sizeof(SbtRecord);
133 break;
134
136 pipeline = optix_device->pipelines[PIP_INTERSECT];
137 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
138 break;
140 pipeline = optix_device->pipelines[PIP_INTERSECT];
141 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SHADOW * sizeof(SbtRecord);
142 break;
144 pipeline = optix_device->pipelines[PIP_INTERSECT];
145 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SUBSURFACE * sizeof(SbtRecord);
146 break;
148 pipeline = optix_device->pipelines[PIP_INTERSECT];
149 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
150 break;
152 pipeline = optix_device->pipelines[PIP_INTERSECT];
153 sbt_params.raygenRecord = sbt_data_ptr +
154 PG_RGEN_INTERSECT_DEDICATED_LIGHT * sizeof(SbtRecord);
155 break;
156
158 pipeline = optix_device->pipelines[PIP_SHADE];
159 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
160 break;
162 pipeline = optix_device->pipelines[PIP_SHADE];
163 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
164 break;
166 pipeline = optix_device->pipelines[PIP_SHADE];
167 sbt_params.raygenRecord = sbt_data_ptr +
168 PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
169 break;
170
172 pipeline = optix_device->pipelines[PIP_SHADE];
173 sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INIT_FROM_CAMERA * sizeof(SbtRecord);
174 break;
175
176 default:
177 LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
178 << " is attempted to be enqueued.";
179 return false;
180 }
181
182 sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
183 sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
184 sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
185 sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
186 sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
187 sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
188 sbt_params.callablesRecordBase = sbt_data_ptr + CALLABLE_PROGRAM_GROUPS_BASE * sizeof(SbtRecord);
189 sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
190 sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
191
192# ifdef WITH_OSL
193 if (osl_shading || osl_camera) {
194 sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
195 }
196# endif
197
198 /* Launch the ray generation program. */
199 optix_device_assert(optix_device,
200 optixLaunch(pipeline,
201 cuda_stream_,
202 launch_params_ptr,
203 optix_device->launch_params.data_elements,
204 &sbt_params,
205 work_size,
206 1,
207 1));
208
209 debug_enqueue_end();
210
211 return !(optix_device->have_error());
212}
213
215
216#endif /* WITH_OPTIX */
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN bool device_kernel_has_shading(DeviceKernel kernel)
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
#define offsetof(t, d)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int num_tiles
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float const int max_tile_work_size
ccl_gpu_kernel_postfix const ccl_global int * path_index_array
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_DEDICATED_LIGHT
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
#define LOG(severity)
Definition log.h:32
void * values[MAX_ARGS]
uint64_t device_ptr
Definition types_base.h:44