Blender V4.5
denoiser_gpu.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "device/denoise.h"
8#include "device/device.h"
9#include "device/memory.h"
10#include "device/queue.h"
11
13
14#include "session/buffers.h"
15
16#include "util/log.h"
17
19
21 : Denoiser(denoiser_device, params)
22{
23 denoiser_queue_ = denoiser_device->gpu_queue_create();
25}
26
28{
29 /* Explicit implementation, to allow forward declaration of Device in the header. */
30}
31
32bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
33 RenderBuffers *render_buffers,
34 const int num_samples,
35 bool allow_inplace_modification)
36{
37 Device *denoiser_device = get_denoiser_device();
38 if (!denoiser_device) {
39 return false;
40 }
41
42 DenoiseTask task;
43 task.params = params_;
44 task.num_samples = num_samples;
45 task.buffer_params = buffer_params;
46 task.allow_inplace_modification = allow_inplace_modification;
47
48 RenderBuffers local_render_buffers(denoiser_device);
49 bool local_buffer_used = false;
50
51 if (denoiser_device == render_buffers->buffer.device) {
52 /* The device can access an existing buffer pointer. */
53 local_buffer_used = false;
54 task.render_buffers = render_buffers;
55 }
56 else {
57 VLOG_WORK << "Creating temporary buffer on denoiser device.";
58
59 /* Create buffer which is available by the device used by denoiser. */
60
61 /* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
62 * ignoring other light ad data passes. */
63
64 local_buffer_used = true;
65
66 render_buffers->copy_from_device();
67
68 local_render_buffers.reset(buffer_params);
69
70 /* NOTE: The local buffer is allocated for an exact size of the effective render size, while
71 * the input render buffer is allocated for the lowest resolution divider possible. So it is
72 * important to only copy actually needed part of the input buffer. */
73 memcpy(local_render_buffers.buffer.data(),
74 render_buffers->buffer.data(),
75 sizeof(float) * local_render_buffers.buffer.size());
76
77 denoiser_queue_->copy_to_device(local_render_buffers.buffer);
78
79 task.render_buffers = &local_render_buffers;
81 }
82
83 const bool denoise_result = denoise_buffer(task);
84
85 if (local_buffer_used) {
86 local_render_buffers.copy_from_device();
87
89 render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params);
90
91 render_buffers->copy_to_device();
92 }
93
94 return denoise_result;
95}
96
98{
99 DenoiseContext context(denoiser_device_, task);
100
101 if (!denoise_ensure(context)) {
102 return false;
103 }
104
105 if (!denoise_filter_guiding_preprocess(context)) {
106 LOG(ERROR) << "Error preprocessing guiding passes.";
107 return false;
108 }
109
110 /* Passes which will use real albedo when it is available. */
111 denoise_pass(context, PASS_COMBINED);
113
114 /* Passes which do not need albedo and hence if real is present it needs to become fake. */
116
117 return true;
118}
119
121{
122 if (!denoise_create_if_needed(context)) {
123 LOG(ERROR) << "GPU denoiser creation has failed.";
124 return false;
125 }
126
127 if (!denoise_configure_if_needed(context)) {
128 LOG(ERROR) << "GPU denoiser configuration has failed.";
129 return false;
130 }
131
132 return true;
133}
134
136{
137 const BufferParams &buffer_params = context.buffer_params;
138
139 const int work_size = buffer_params.width * buffer_params.height;
140
141 const DeviceKernelArguments args(&context.guiding_params.device_pointer,
142 &context.guiding_params.pass_stride,
143 &context.guiding_params.pass_albedo,
144 &context.guiding_params.pass_normal,
145 &context.guiding_params.pass_flow,
146 &context.render_buffers->buffer.device_pointer,
147 &buffer_params.offset,
148 &buffer_params.stride,
149 &buffer_params.pass_stride,
150 &context.pass_sample_count,
151 &context.pass_denoising_albedo,
152 &context.pass_denoising_normal,
153 &context.pass_motion,
154 &buffer_params.full_x,
155 &buffer_params.full_y,
156 &buffer_params.width,
157 &buffer_params.height,
158 &context.num_samples);
159
161}
162
164 : denoise_params(task.params),
167 guiding_buffer(device, "denoiser guiding passes buffer", true),
169{
171 if (denoise_params.use_pass_albedo) {
172 num_input_passes += 1;
173 use_pass_albedo = true;
174 pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
175 if (denoise_params.use_pass_normal) {
176 num_input_passes += 1;
177 use_pass_normal = true;
178 pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
179 }
180 }
181
182 if (denoise_params.temporally_stable) {
183 prev_output.device_pointer = render_buffers->buffer.device_pointer;
184
185 prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
186
187 prev_output.stride = buffer_params.stride;
188 prev_output.pass_stride = buffer_params.pass_stride;
189
190 num_input_passes += 1;
191 use_pass_motion = true;
192 pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
193 }
194
195 use_guiding_passes = (num_input_passes - 1) > 0;
196
197 if (use_guiding_passes) {
198 if (task.allow_inplace_modification) {
199 guiding_params.device_pointer = render_buffers->buffer.device_pointer;
200
201 guiding_params.pass_albedo = pass_denoising_albedo;
202 guiding_params.pass_normal = pass_denoising_normal;
203 guiding_params.pass_flow = pass_motion;
204
205 guiding_params.stride = buffer_params.stride;
206 guiding_params.pass_stride = buffer_params.pass_stride;
207 }
208 else {
209 guiding_params.pass_stride = 0;
210 if (use_pass_albedo) {
211 guiding_params.pass_albedo = guiding_params.pass_stride;
212 guiding_params.pass_stride += 3;
213 }
214 if (use_pass_normal) {
215 guiding_params.pass_normal = guiding_params.pass_stride;
216 guiding_params.pass_stride += 3;
217 }
218 if (use_pass_motion) {
219 guiding_params.pass_flow = guiding_params.pass_stride;
220 guiding_params.pass_stride += 2;
221 }
222
223 guiding_params.stride = buffer_params.width;
224
225 guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
226 guiding_params.pass_stride);
227 guiding_params.device_pointer = guiding_buffer.device_pointer;
228 }
229 }
230
231 pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
232}
233
235 const DenoisePass &pass)
236{
237 const BufferParams &buffer_params = context.buffer_params;
238
239 const int work_size = buffer_params.width * buffer_params.height;
240
241 const DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
242 &buffer_params.full_x,
243 &buffer_params.full_y,
244 &buffer_params.width,
245 &buffer_params.height,
246 &buffer_params.offset,
247 &buffer_params.stride,
248 &buffer_params.pass_stride,
249 &context.num_samples,
250 &pass.noisy_offset,
251 &pass.denoised_offset,
252 &context.pass_sample_count,
253 &pass.num_components,
254 &pass.use_compositing);
255
257}
258
260 const DenoisePass &pass)
261{
262 if (context.denoise_params.type != DENOISER_OPTIX) {
263 /* Pass preprocessing is used to clamp values for the OptiX denoiser.
264 * Clamping is not necessary for other denoisers, so just skip this preprocess step. */
265 return true;
266 }
267
268 const BufferParams &buffer_params = context.buffer_params;
269
270 const int work_size = buffer_params.width * buffer_params.height;
271
272 const DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
273 &buffer_params.full_x,
274 &buffer_params.full_y,
275 &buffer_params.width,
276 &buffer_params.height,
277 &buffer_params.offset,
278 &buffer_params.stride,
279 &buffer_params.pass_stride,
280 &pass.denoised_offset);
281
283}
284
286{
287 const BufferParams &buffer_params = context.buffer_params;
288
289 const int work_size = buffer_params.width * buffer_params.height;
290
291 const DeviceKernelArguments args(&context.guiding_params.device_pointer,
292 &context.guiding_params.pass_stride,
293 &context.guiding_params.pass_albedo,
294 &buffer_params.width,
295 &buffer_params.height);
296
298}
299
301{
302 PassAccessor::PassAccessInfo pass_access_info;
303 pass_access_info.type = pass.type;
304 pass_access_info.mode = PassMode::NOISY;
305 pass_access_info.offset = pass.noisy_offset;
306
307 /* Denoiser operates on passes which are used to calculate the approximation, and is never used
308 * on the approximation. The latter is not even possible because OptiX does not support
309 * denoising of semi-transparent pixels. */
310 pass_access_info.use_approximate_shadow_catcher = false;
311 pass_access_info.use_approximate_shadow_catcher_background = false;
312 pass_access_info.show_active_pixels = false;
313
314 /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
315 */
316 const PassAccessorGPU pass_accessor(
317 denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
318
319 PassAccessor::Destination destination(pass_access_info.type);
320 destination.d_pixels = context.render_buffers->buffer.device_pointer;
321 destination.num_components = 3;
322 destination.pixel_offset = pass.denoised_offset;
323 destination.pixel_stride = context.buffer_params.pass_stride;
324
325 BufferParams buffer_params = context.buffer_params;
326 buffer_params.window_x = 0;
327 buffer_params.window_y = 0;
328 buffer_params.window_width = buffer_params.width;
329 buffer_params.window_height = buffer_params.height;
330
331 pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
332}
333
335{
336 const BufferParams &buffer_params = context.buffer_params;
337
338 const DenoisePass pass(pass_type, buffer_params);
339
340 if (pass.noisy_offset == PASS_UNUSED) {
341 return;
342 }
343 if (pass.denoised_offset == PASS_UNUSED) {
344 LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
345 return;
346 }
347
348 if (pass.use_denoising_albedo) {
349 if (context.albedo_replaced_with_fake) {
350 LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
351 return;
352 }
353 }
354 else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
355 context.albedo_replaced_with_fake = true;
357 LOG(ERROR) << "Error replacing real albedo with the fake one.";
358 return;
359 }
360 }
361
362 /* Read and preprocess noisy color input pass. */
363 denoise_color_read(context, pass);
364 if (!denoise_filter_color_preprocess(context, pass)) {
365 LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
366 return;
367 }
368
369 if (!denoise_run(context, pass)) {
370 LOG(ERROR) << "Error running denoiser.";
371 return;
372 }
373
374 /* Store result in the combined pass of the render buffer.
375 *
376 * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
377 if (!denoise_filter_color_postprocess(context, pass)) {
378 LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
379 return;
380 }
381
382 denoiser_queue_->synchronize();
383}
384
return true
void render_buffers_host_copy_denoised(RenderBuffers *dst, const BufferParams &dst_params, const RenderBuffers *src, const BufferParams &src_params, const size_t src_offset)
Definition buffers.cpp:301
int pass_stride
Definition buffers.h:92
int window_y
Definition buffers.h:78
int window_height
Definition buffers.h:80
int window_width
Definition buffers.h:79
NODE_DECLARE int width
Definition buffers.h:70
int window_x
Definition buffers.h:77
DenoiseContext(Device *device, const DenoiseTask &task)
const BufferParams & buffer_params
const DenoiseParams & denoise_params
device_only_memory< float > guiding_buffer
RenderBuffers * render_buffers
RenderBuffers * render_buffers
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass)=0
DenoiserGPU(Device *denoiser_device, const DenoiseParams &params)
virtual bool denoise_ensure(DenoiseContext &context)
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass)
~DenoiserGPU() override
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass)
bool denoise_filter_guiding_preprocess(const DenoiseContext &context)
virtual bool denoise_create_if_needed(DenoiseContext &context)=0
bool denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) override
unique_ptr< DeviceQueue > denoiser_queue_
void denoise_pass(DenoiseContext &context, PassType pass_type)
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
virtual bool denoise_configure_if_needed(DenoiseContext &context)=0
DenoiseParams params_
Definition denoiser.h:133
Denoiser(Device *denoiser_device, const DenoiseParams &params)
Definition denoiser.cpp:211
Device * get_denoiser_device() const
Definition denoiser.cpp:268
Device * denoiser_device_
Definition denoiser.h:131
virtual unique_ptr< DeviceQueue > gpu_queue_create()
bool get_render_tile_pixels(const RenderBuffers *render_buffers, const Destination &destination) const
device_vector< float > buffer
Definition buffers.h:158
BufferParams params
Definition buffers.h:155
bool copy_from_device()
Definition buffers.cpp:283
void copy_to_device()
Definition buffers.cpp:296
void reset(const BufferParams &params)
Definition buffers.cpp:268
size_t size() const
@ DENOISER_OPTIX
Definition denoise.h:12
#define PASS_UNUSED
#define CCL_NAMESPACE_END
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
PassType
@ PASS_SHADOW_CATCHER_MATTE
@ PASS_SHADOW_CATCHER
@ PASS_COMBINED
@ PASS_SAMPLE_COUNT
@ DEVICE_KERNEL_FILTER_COLOR_PREPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO
@ DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS
@ DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS
#define DCHECK(expression)
Definition log.h:50
#define VLOG_WORK
Definition log.h:74
#define LOG(severity)
Definition log.h:32
CCL_NAMESPACE_BEGIN const char * pass_type_as_string(const PassType type)
Definition pass.cpp:11
@ NOISY
Definition pass.h:21