Blender V4.5
state_flow.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include "kernel/globals.h"
8#include "kernel/types.h"
9
11
12#include "util/atomic.h"
13
15
16/* Control Flow
17 *
18 * Utilities for control flow between kernels. The implementation is different between CPU and
19 * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
20 *
21 * There is a main path for regular path tracing camera for path tracing. Shadows for next
22 * event estimation branch off from this into their own path, that may be computed in
23 * parallel while the main path continues. Additionally, shading kernels are sorted using
24 * a key for coherence.
25 *
26 * Each kernel on the main path must call one of these functions. These may not be called
27 * multiple times from the same kernel.
28 *
29 * integrator_path_init(kg, state, next_kernel)
30 * integrator_path_next(kg, state, current_kernel, next_kernel)
31 * integrator_path_terminate(kg, state, current_kernel)
32 *
33 * For the shadow path similar functions are used, and again each shadow kernel must call
34 * one of them, and only once.
35 */
36
41
46
47#ifdef __KERNEL_GPU__
48
51 const DeviceKernel next_kernel)
52{
53 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
54 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
55}
56
59 const DeviceKernel current_kernel,
60 const DeviceKernel next_kernel)
61{
62 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
63 1);
64 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
65 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
66}
67
70 const DeviceKernel current_kernel)
71{
72 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
73 1);
74 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
75}
76
78 KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
79{
81 &kernel_integrator_state.next_shadow_path_index[0], 1);
82 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
83 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
84# ifdef __PATH_GUIDING__
85 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
86# endif
87 return shadow_state;
88}
89
92 const DeviceKernel current_kernel,
93 const DeviceKernel next_kernel)
94{
95 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
96 1);
97 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
98 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
99}
100
103 const DeviceKernel current_kernel)
104{
105 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
106 1);
107 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
108}
109
110/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
111# define INTEGRATOR_SORT_KEY(key, state) \
112 (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
113
116 const DeviceKernel next_kernel,
117 const uint32_t key)
118{
119 const int key_ = INTEGRATOR_SORT_KEY(key, state);
120 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
121 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
122 INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
123
124# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
125 if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
126 return;
127 }
128# endif
129
130 atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
131}
132
135 const DeviceKernel current_kernel,
136 const DeviceKernel next_kernel,
137 const uint32_t key)
138{
139 const int key_ = INTEGRATOR_SORT_KEY(key, state);
140 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
141 1);
142 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
143 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
144 INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
145
146# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
147 if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
148 return;
149 }
150# endif
151
152 atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
153}
154
155#else
156
159 const DeviceKernel next_kernel)
160{
161 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
162}
163
166 const DeviceKernel next_kernel,
167 const uint32_t key)
168{
169 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
170 (void)key;
171}
172
175 const DeviceKernel current_kernel,
176 const DeviceKernel next_kernel)
177{
178 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
179 (void)current_kernel;
180}
181
184 const DeviceKernel current_kernel)
185{
186 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
187 (void)current_kernel;
188}
189
192 const DeviceKernel current_kernel,
193 const DeviceKernel next_kernel,
194 const uint32_t key)
195{
196 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
197 (void)key;
198 (void)current_kernel;
199}
200
202 KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
203{
204 IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
205 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
206# ifdef __PATH_GUIDING__
207 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
208# endif
209 return shadow_state;
210}
211
214 const DeviceKernel current_kernel,
215 const DeviceKernel next_kernel)
216{
217 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
218 (void)current_kernel;
219}
220
223 const DeviceKernel current_kernel)
224{
225 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
226 (void)current_kernel;
227}
228
229#endif
230
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_device_forceinline
const ThreadKernelGlobalsCPU * KernelGlobals
#define CCL_NAMESPACE_END
#define kernel_integrator_state
DeviceKernel
static ulong state[N]
IntegratorShadowStateCPU * IntegratorShadowState
Definition state.h:230
#define INTEGRATOR_STATE_WRITE(state, nested_struct, member)
Definition state.h:236
#define INTEGRATOR_STATE(state, nested_struct, member)
Definition state.h:235
const IntegratorShadowStateCPU * ConstIntegratorShadowState
Definition state.h:231
IntegratorStateCPU * IntegratorState
Definition state.h:228
const IntegratorStateCPU * ConstIntegratorState
Definition state.h:229
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, IntegratorShadowState state, const DeviceKernel current_kernel)
Definition state_flow.h:221
CCL_NAMESPACE_BEGIN ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
Definition state_flow.h:37
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel, const uint32_t key)
Definition state_flow.h:190
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel)
Definition state_flow.h:182
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
Definition state_flow.h:42
ccl_device_forceinline void integrator_path_next(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition state_flow.h:173
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const uint32_t key)
Definition state_flow.h:164
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
Definition state_flow.h:201
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, IntegratorShadowState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition state_flow.h:212
ccl_device_forceinline void integrator_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel)
Definition state_flow.h:157