Blender  V2.93
kernel_shadow.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #ifdef __VOLUME__
20 /* Get PathState ready for use for volume stack evaluation. */
21 # ifdef __SPLIT_KERNEL__
23 # endif
25  shadow_blocked_volume_path_state(KernelGlobals *kg,
26  VolumeState *volume_state,
28  ShaderData *sd,
29  Ray *ray)
30 {
31 # ifdef __SPLIT_KERNEL__
34 # else
35  PathState *ps = &volume_state->ps;
36 # endif
37  *ps = *state;
38  /* We are checking for shadow on the "other" side of the surface, so need
39  * to discard volume we are currently at.
40  */
41  if (dot(sd->Ng, ray->D) < 0.0f) {
42  kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
43  }
44  return ps;
45 }
46 #endif /* __VOLUME__ */
47 
48 /* Attenuate throughput accordingly to the given intersection event.
49  * Returns true if the throughput is zero and traversal can be aborted.
50  */
52  ShaderData *shadow_sd,
54 #ifdef __VOLUME__
55  ccl_addr_space PathState *volume_state,
56 #endif
57  Intersection *isect,
58  Ray *ray,
59  float3 *throughput)
60 {
61 #ifdef __VOLUME__
62  /* Attenuation between last surface and next surface. */
63  if (volume_state->volume_stack[0].shader != SHADER_NONE) {
64  Ray segment_ray = *ray;
65  segment_ray.t = isect->t;
66  kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput);
67  }
68 #endif
69  /* Setup shader data at surface. */
70  shader_setup_from_ray(kg, shadow_sd, isect, ray);
71  /* Attenuation from transparent surface. */
72  if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
76  *throughput *= shader_bsdf_transparency(kg, shadow_sd);
77  }
78  /* Stop if all light is blocked. */
79  if (is_zero(*throughput)) {
80  return true;
81  }
82 #ifdef __VOLUME__
83  /* Exit/enter volume. */
84  kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
85 #endif
86  return false;
87 }
88 
89 /* Special version which only handles opaque shadows. */
90 ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
91  ShaderData *shadow_sd,
93  const uint visibility,
94  Ray *ray,
95  Intersection *isect,
96  float3 *shadow)
97 {
98  const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
99 #ifdef __VOLUME__
100  if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
101  /* Apply attenuation from current volume shader. */
102  kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
103  }
104 #endif
105  return blocked;
106 }
107 
108 #ifdef __TRANSPARENT_SHADOWS__
109 # ifdef __SHADOW_RECORD_ALL__
110 /* Shadow function to compute how much light is blocked,
111  *
112  * We trace a single ray. If it hits any opaque surface, or more than a given
113  * number of transparent surfaces is hit, then we consider the geometry to be
114  * entirely blocked. If not, all transparent surfaces will be recorded and we
115  * will shade them one by one to determine how much light is blocked. This all
116  * happens in one scene intersection function.
117  *
118  * Recording all hits works well in some cases but may be slower in others. If
119  * we have many semi-transparent hairs, one intersection may be faster because
120  * you'd be reinteresecting the same hairs a lot with each step otherwise. If
121  * however there is mostly binary transparency then we may be recording many
122  * unnecessary intersections when one of the first surfaces blocks all light.
123  *
124  * From tests in real scenes it seems the performance loss is either minimal,
125  * or there is a performance increase anyway due to avoiding the need to send
126  * two rays with transparent shadows.
127  *
128  * On CPU it'll handle all transparent bounces (by allocating storage for
129  * intersections when they don't fit into the stack storage).
130  *
131  * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
132  * is something to be kept an eye on.
133  */
134 
135 # define SHADOW_STACK_MAX_HITS 64
136 
137 /* Actual logic with traversal loop implementation which is free from device
138  * specific tweaks.
139  *
140  * Note that hits array should be as big as max_hits+1.
141  */
142 ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
143  ShaderData *sd,
144  ShaderData *shadow_sd,
146  const uint visibility,
147  Ray *ray,
148  Intersection *hits,
149  uint max_hits,
150  float3 *shadow)
151 {
152  /* Intersect to find an opaque surface, or record all transparent
153  * surface hits.
154  */
155  uint num_hits;
156  const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
157 # ifdef __VOLUME__
158 # ifdef __KERNEL_OPTIX__
159  VolumeState &volume_state = kg->volume_state;
160 # else
161  VolumeState volume_state;
162 # endif
163 # endif
164  /* If no opaque surface found but we did find transparent hits,
165  * shade them.
166  */
167  if (!blocked && num_hits > 0) {
168  float3 throughput = one_float3();
169  float3 Pend = ray->P + ray->D * ray->t;
170  float last_t = 0.0f;
171  int bounce = state->transparent_bounce;
172  Intersection *isect = hits;
173 # ifdef __VOLUME__
174 # ifdef __SPLIT_KERNEL__
176 # endif
177  PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
178 # endif
179  sort_intersections(hits, num_hits);
180  for (int hit = 0; hit < num_hits; hit++, isect++) {
181  /* Adjust intersection distance for moving ray forward. */
182  float new_t = isect->t;
183  isect->t -= last_t;
184  /* Skip hit if we did not move forward, step by step raytracing
185  * would have skipped it as well then.
186  */
187  if (last_t == new_t) {
188  continue;
189  }
190  last_t = new_t;
191  /* Attenuate the throughput. */
193  shadow_sd,
194  state,
195 # ifdef __VOLUME__
196  ps,
197 # endif
198  isect,
199  ray,
200  &throughput)) {
201  return true;
202  }
203  /* Move ray forward. */
204  ray->P = shadow_sd->P;
205  if (ray->t != FLT_MAX) {
206  ray->D = normalize_len(Pend - ray->P, &ray->t);
207  }
208  bounce++;
209  }
210 # ifdef __VOLUME__
211  /* Attenuation for last line segment towards light. */
212  if (ps->volume_stack[0].shader != SHADER_NONE) {
213  kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
214  }
215 # endif
216  *shadow = throughput;
217  return is_zero(throughput);
218  }
219 # ifdef __VOLUME__
220  if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
221  /* Apply attenuation from current volume shader. */
222 # ifdef __SPLIT_KERNEL__
224 # endif
225  PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
226  kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
227  }
228 # endif
229  return blocked;
230 }
231 
232 /* Here we do all device specific trickery before invoking actual traversal
233  * loop to help readability of the actual logic.
234  */
235 ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
236  ShaderData *sd,
237  ShaderData *shadow_sd,
239  const uint visibility,
240  Ray *ray,
241  uint max_hits,
242  float3 *shadow)
243 {
244 # ifdef __SPLIT_KERNEL__
245  Intersection hits_[SHADOW_STACK_MAX_HITS];
246  Intersection *hits = &hits_[0];
247 # elif defined(__KERNEL_CUDA__)
248  Intersection *hits = kg->hits_stack;
249 # else
250  Intersection hits_stack[SHADOW_STACK_MAX_HITS];
251  Intersection *hits = hits_stack;
252 # endif
253 # ifndef __KERNEL_GPU__
254  /* Prefer to use stack but use dynamic allocation if too deep max hits
255  * we need max_hits + 1 storage space due to the logic in
256  * scene_intersect_shadow_all which will first store and then check if
257  * the limit is exceeded.
258  *
259  * Ignore this on GPU because of slow/unavailable malloc().
260  */
261  if (max_hits + 1 > SHADOW_STACK_MAX_HITS) {
262  if (kg->transparent_shadow_intersections == NULL) {
263  const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
264  kg->transparent_shadow_intersections = (Intersection *)malloc(sizeof(Intersection) *
265  (transparent_max_bounce + 1));
266  }
267  hits = kg->transparent_shadow_intersections;
268  }
269 # endif /* __KERNEL_GPU__ */
270  /* Invoke actual traversal. */
271  return shadow_blocked_transparent_all_loop(
272  kg, sd, shadow_sd, state, visibility, ray, hits, max_hits, shadow);
273 }
274 # endif /* __SHADOW_RECORD_ALL__ */
275 
276 # if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
277 /* Shadow function to compute how much light is blocked,
278  *
279  * Here we raytrace from one transparent surface to the next step by step.
280  * To minimize overhead in cases where we don't need transparent shadows, we
281  * first trace a regular shadow ray. We check if the hit primitive was
282  * potentially transparent, and only in that case start marching. this gives
283  * one extra ray cast for the cases were we do want transparency.
284  */
285 
286 /* This function is only implementing device-independent traversal logic
287  * which requires some precalculation done.
288  */
289 ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
290  ShaderData *sd,
291  ShaderData *shadow_sd,
293  const uint visibility,
294  Ray *ray,
295  Intersection *isect,
296  const bool blocked,
297  const bool is_transparent_isect,
298  float3 *shadow)
299 {
300 # ifdef __VOLUME__
301 # ifdef __KERNEL_OPTIX__
302  VolumeState &volume_state = kg->volume_state;
303 # else
304  VolumeState volume_state;
305 # endif
306 # endif
307  if (blocked && is_transparent_isect) {
308  float3 throughput = one_float3();
309  float3 Pend = ray->P + ray->D * ray->t;
310  int bounce = state->transparent_bounce;
311 # ifdef __VOLUME__
312 # ifdef __SPLIT_KERNEL__
314 # endif
315  PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
316 # endif
317  for (;;) {
318  if (bounce >= kernel_data.integrator.transparent_max_bounce) {
319  return true;
320  }
321  if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
322  break;
323  }
324  if (!shader_transparent_shadow(kg, isect)) {
325  return true;
326  }
327  /* Attenuate the throughput. */
329  shadow_sd,
330  state,
331 # ifdef __VOLUME__
332  ps,
333 # endif
334  isect,
335  ray,
336  &throughput)) {
337  return true;
338  }
339  /* Move ray forward. */
340  ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
341  if (ray->t != FLT_MAX) {
342  ray->D = normalize_len(Pend - ray->P, &ray->t);
343  }
344  bounce++;
345  }
346 # ifdef __VOLUME__
347  /* Attenuation for last line segment towards light. */
348  if (ps->volume_stack[0].shader != SHADER_NONE) {
349  kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
350  }
351 # endif
352  *shadow *= throughput;
353  return is_zero(throughput);
354  }
355 # ifdef __VOLUME__
356  if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
357  /* Apply attenuation from current volume shader. */
358 # ifdef __SPLIT_KERNEL__
360 # endif
361  PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
362  kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
363  }
364 # endif
365  return blocked;
366 }
367 
368 ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
369  ShaderData *sd,
370  ShaderData *shadow_sd,
372  const uint visibility,
373  Ray *ray,
374  Intersection *isect,
375  float3 *shadow)
376 {
377  bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
378  bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
379  return shadow_blocked_transparent_stepped_loop(
380  kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
381 }
382 
383 # endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
384 #endif /* __TRANSPARENT_SHADOWS__ */
385 
386 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
387  ShaderData *sd,
388  ShaderData *shadow_sd,
390  Ray *ray,
391  float3 *shadow)
392 {
393  *shadow = one_float3();
394 #if !defined(__KERNEL_OPTIX__)
395  /* Some common early checks.
396  * Avoid conditional trace call in OptiX though, since those hurt performance there.
397  */
398  if (ray->t == 0.0f) {
399  return false;
400  }
401 #endif
402 #ifdef __SHADOW_TRICKS__
403  const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
405 #else
406  const uint visibility = PATH_RAY_SHADOW;
407 #endif
408  /* Do actual shadow shading.
409  * First of all, we check if integrator requires transparent shadows.
410  * if not, we use simplest and fastest ever way to calculate occlusion.
411  * Do not do this in OptiX to avoid the additional trace call.
412  */
413 #if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__)
414  Intersection isect;
415 # ifdef __TRANSPARENT_SHADOWS__
416  if (!kernel_data.integrator.transparent_shadows)
417 # endif
418  {
419  return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
420  }
421 #endif
422 #ifdef __TRANSPARENT_SHADOWS__
423 # ifdef __SHADOW_RECORD_ALL__
424  /* For the transparent shadows we try to use record-all logic on the
425  * devices which supports this.
426  */
427  const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
428  /* Check transparent bounces here, for volume scatter which can do
429  * lighting before surface path termination is checked.
430  */
431  if (state->transparent_bounce >= transparent_max_bounce) {
432  return true;
433  }
434  uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
435 # if defined(__KERNEL_OPTIX__)
436  /* Always use record-all behavior in OptiX, but ensure there are no out of bounds
437  * accesses to the hit stack.
438  */
439  max_hits = min(max_hits, SHADOW_STACK_MAX_HITS - 1);
440 # elif defined(__KERNEL_GPU__)
441  /* On GPU we do tricky with tracing opaque ray first, this avoids speed
442  * regressions in some files.
443  *
444  * TODO(sergey): Check why using record-all behavior causes slowdown in such
445  * cases. Could that be caused by a higher spill pressure?
446  */
447  const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
448  const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
449  if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
450  return shadow_blocked_transparent_stepped_loop(
451  kg, sd, shadow_sd, state, visibility, ray, &isect, blocked, is_transparent_isect, shadow);
452  }
453 # endif /* __KERNEL_GPU__ */
454  return shadow_blocked_transparent_all(
455  kg, sd, shadow_sd, state, visibility, ray, max_hits, shadow);
456 # else /* __SHADOW_RECORD_ALL__ */
457  /* Fallback to a slowest version which works on all devices. */
458  return shadow_blocked_transparent_stepped(
459  kg, sd, shadow_sd, state, visibility, ray, &isect, shadow);
460 # endif /* __SHADOW_RECORD_ALL__ */
461 #endif /* __TRANSPARENT_SHADOWS__ */
462 }
463 
464 #undef SHADOW_STACK_MAX_HITS
465 
unsigned int uint
Definition: BLI_sys_types.h:83
ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
CCL_NAMESPACE_BEGIN ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd)
#define kernel_data
#define ccl_global_id(d)
#define ccl_addr_space
#define ccl_global_size(d)
#define ccl_device_forceinline
#define ccl_device
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline void path_state_modify_bounce(ccl_addr_space PathState *state, bool increase)
CCL_NAMESPACE_BEGIN ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
Definition: kernel_shader.h:59
ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ccl_global float *buffer, int path_flag)
CCL_NAMESPACE_BEGIN ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, Intersection *isect, Ray *ray, float3 *throughput)
Definition: kernel_shadow.h:51
ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *sd, ShaderData *shadow_sd, ccl_addr_space PathState *state, Ray *ray, float3 *shadow)
ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, const uint visibility, Ray *ray, Intersection *isect, float3 *shadow)
Definition: kernel_shadow.h:90
#define kernel_split_state
@ SD_HAS_ONLY_VOLUME
Definition: kernel_types.h:875
#define __VOLUME__
Definition: kernel_types.h:114
#define SHADER_NONE
Definition: kernel_types.h:58
@ PATH_RAY_SHADOW
Definition: kernel_types.h:284
@ PATH_RAY_SHADOW_NON_CATCHER
Definition: kernel_types.h:282
@ PATH_RAY_SHADOW_CATCHER
Definition: kernel_types.h:306
@ PATH_RAY_SHADOW_OPAQUE
Definition: kernel_types.h:277
@ PATH_RAY_SHADOW_TRANSPARENT
Definition: kernel_types.h:280
ShaderData
static ulong state[N]
#define min(a, b)
Definition: sort.c:51
float t
Definition: kernel_types.h:649
float3 P
Definition: kernel_types.h:647
float3 D
Definition: kernel_types.h:648
PathState ps
ccl_device_inline float dot(const float2 &a, const float2 &b)
ccl_device_inline float2 normalize_len(const float2 &a, float *t)
ccl_device_inline bool is_zero(const float2 &a)
ccl_device_inline float3 one_float3()