Blender  V2.93
bvh_binning.cpp
Go to the documentation of this file.
1 /*
2  * Adapted from code copyright 2009-2011 Intel Corporation
3  * Modifications Copyright 2012, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 //#define __KERNEL_SSE__
19 
20 #include "bvh/bvh_binning.h"
21 
22 #include <stdlib.h>
23 
24 #include "util/util_algorithm.h"
25 #include "util/util_boundbox.h"
26 #include "util/util_types.h"
27 
29 
30 /* SSE replacements */
31 
32 __forceinline void prefetch_L1(const void * /*ptr*/)
33 {
34 }
35 __forceinline void prefetch_L2(const void * /*ptr*/)
36 {
37 }
38 __forceinline void prefetch_L3(const void * /*ptr*/)
39 {
40 }
41 __forceinline void prefetch_NTA(const void * /*ptr*/)
42 {
43 }
44 
45 template<size_t src> __forceinline float extract(const int4 &b)
46 {
47  return b[src];
48 }
49 template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
50 {
51  float4 r = a;
52  r[dst] = b;
53  return r;
54 }
55 
56 __forceinline int get_best_dimension(const float4 &bestSAH)
57 {
58  // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
59 
60  float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
61 
62  if (bestSAH.x == minSAH)
63  return 0;
64  else if (bestSAH.y == minSAH)
65  return 1;
66  else
67  return 2;
68 }
69 
70 /* BVH Object Binning */
71 
73  BVHReference *prims,
74  const BVHUnaligned *unaligned_heuristic,
75  const Transform *aligned_space)
76  : BVHRange(job),
77  splitSAH(FLT_MAX),
78  dim(0),
79  pos(0),
80  unaligned_heuristic_(unaligned_heuristic),
81  aligned_space_(aligned_space)
82 {
83  if (aligned_space_ == NULL) {
84  bounds_ = bounds();
86  }
87  else {
88  /* TODO(sergey): With some additional storage we can avoid
89  * need in re-calculating this.
90  */
91  bounds_ = unaligned_heuristic->compute_aligned_boundbox(
92  *this, prims, *aligned_space, &cent_bounds_);
93  }
94 
95  /* compute number of bins to use and precompute scaling factor for binning */
96  num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
98 
99  /* initialize binning counter and bounds */
100  BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
101  int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
102 
103  for (size_t i = 0; i < num_bins; i++) {
104  bin_count[i] = make_int4(0);
105  bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
106  }
107 
108  /* map geometry to bins, unrolled once */
109  {
110  int64_t i;
111 
112  for (i = 0; i < int64_t(size()) - 1; i += 2) {
113  prefetch_L2(&prims[start() + i + 8]);
114 
115  /* map even and odd primitive to bin */
116  const BVHReference &prim0 = prims[start() + i + 0];
117  const BVHReference &prim1 = prims[start() + i + 1];
118 
119  BoundBox bounds0 = get_prim_bounds(prim0);
120  BoundBox bounds1 = get_prim_bounds(prim1);
121 
122  int4 bin0 = get_bin(bounds0);
123  int4 bin1 = get_bin(bounds1);
124 
125  /* increase bounds for bins for even primitive */
126  int b00 = (int)extract<0>(bin0);
127  bin_count[b00][0]++;
128  bin_bounds[b00][0].grow(bounds0);
129  int b01 = (int)extract<1>(bin0);
130  bin_count[b01][1]++;
131  bin_bounds[b01][1].grow(bounds0);
132  int b02 = (int)extract<2>(bin0);
133  bin_count[b02][2]++;
134  bin_bounds[b02][2].grow(bounds0);
135 
136  /* increase bounds of bins for odd primitive */
137  int b10 = (int)extract<0>(bin1);
138  bin_count[b10][0]++;
139  bin_bounds[b10][0].grow(bounds1);
140  int b11 = (int)extract<1>(bin1);
141  bin_count[b11][1]++;
142  bin_bounds[b11][1].grow(bounds1);
143  int b12 = (int)extract<2>(bin1);
144  bin_count[b12][2]++;
145  bin_bounds[b12][2].grow(bounds1);
146  }
147 
148  /* for uneven number of primitives */
149  if (i < int64_t(size())) {
150  /* map primitive to bin */
151  const BVHReference &prim0 = prims[start() + i];
152  BoundBox bounds0 = get_prim_bounds(prim0);
153  int4 bin0 = get_bin(bounds0);
154 
155  /* increase bounds of bins */
156  int b00 = (int)extract<0>(bin0);
157  bin_count[b00][0]++;
158  bin_bounds[b00][0].grow(bounds0);
159  int b01 = (int)extract<1>(bin0);
160  bin_count[b01][1]++;
161  bin_bounds[b01][1].grow(bounds0);
162  int b02 = (int)extract<2>(bin0);
163  bin_count[b02][2]++;
164  bin_bounds[b02][2].grow(bounds0);
165  }
166  }
167 
168  /* sweep from right to left and compute parallel prefix of merged bounds */
169  float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
170  float4 r_count[MAX_BINS]; /* number of primitives on the right */
171  int4 count = make_int4(0);
172 
176 
177  for (size_t i = num_bins - 1; i > 0; i--) {
178  count = count + bin_count[i];
179  r_count[i] = blocks(count);
180 
181  bx = merge(bx, bin_bounds[i][0]);
182  r_area[i][0] = bx.half_area();
183  by = merge(by, bin_bounds[i][1]);
184  r_area[i][1] = by.half_area();
185  bz = merge(bz, bin_bounds[i][2]);
186  r_area[i][2] = bz.half_area();
187  r_area[i][3] = r_area[i][2];
188  }
189 
190  /* sweep from left to right and compute SAH */
191  int4 ii = make_int4(1);
192  float4 bestSAH = make_float4(FLT_MAX);
193  int4 bestSplit = make_int4(-1);
194 
195  count = make_int4(0);
196 
197  bx = BoundBox::empty;
198  by = BoundBox::empty;
199  bz = BoundBox::empty;
200 
201  for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
202  count = count + bin_count[i - 1];
203 
204  bx = merge(bx, bin_bounds[i - 1][0]);
205  float Ax = bx.half_area();
206  by = merge(by, bin_bounds[i - 1][1]);
207  float Ay = by.half_area();
208  bz = merge(bz, bin_bounds[i - 1][2]);
209  float Az = bz.half_area();
210 
211  float4 lCount = blocks(count);
212  float4 lArea = make_float4(Ax, Ay, Az, Az);
213  float4 sah = lArea * lCount + r_area[i] * r_count[i];
214 
215  bestSplit = select(sah < bestSAH, ii, bestSplit);
216  bestSAH = min(sah, bestSAH);
217  }
218 
220  bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
221 
222  /* find best dimension */
223  dim = get_best_dimension(bestSAH);
224  splitSAH = bestSAH[dim];
225  pos = bestSplit[dim];
227 }
228 
230  BVHObjectBinning &left_o,
231  BVHObjectBinning &right_o) const
232 {
233  size_t N = size();
234 
235  BoundBox lgeom_bounds = BoundBox::empty;
236  BoundBox rgeom_bounds = BoundBox::empty;
237  BoundBox lcent_bounds = BoundBox::empty;
238  BoundBox rcent_bounds = BoundBox::empty;
239 
240  int64_t l = 0, r = N - 1;
241 
242  while (l <= r) {
243  prefetch_L2(&prims[start() + l + 8]);
244  prefetch_L2(&prims[start() + r - 8]);
245 
246  BVHReference prim = prims[start() + l];
248  float3 unaligned_center = unaligned_bounds.center2();
249  float3 center = prim.bounds().center2();
250 
251  if (get_bin(unaligned_center)[dim] < pos) {
252  lgeom_bounds.grow(prim.bounds());
253  lcent_bounds.grow(center);
254  l++;
255  }
256  else {
257  rgeom_bounds.grow(prim.bounds());
258  rcent_bounds.grow(center);
259  swap(prims[start() + l], prims[start() + r]);
260  r--;
261  }
262  }
263  /* finish */
264  if (l != 0 && N - 1 - r != 0) {
265  right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
266  prims);
267  left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
268  return;
269  }
270 
271  /* object medium split if we did not make progress, can happen when all
272  * primitives have same centroid */
273  lgeom_bounds = BoundBox::empty;
274  rgeom_bounds = BoundBox::empty;
275  lcent_bounds = BoundBox::empty;
276  rcent_bounds = BoundBox::empty;
277 
278  for (size_t i = 0; i < N / 2; i++) {
279  lgeom_bounds.grow(prims[start() + i].bounds());
280  lcent_bounds.grow(prims[start() + i].bounds().center2());
281  }
282 
283  for (size_t i = N / 2; i < N; i++) {
284  rgeom_bounds.grow(prims[start() + i].bounds());
285  rcent_bounds.grow(prims[start() + i].bounds().center2());
286  }
287 
288  right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
289  prims);
290  left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
291 }
292 
void swap(T &a, T &b)
Definition: Common.h:33
NSNotificationCenter * center
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
ATTR_WARN_UNUSED_RESULT const BMLoop * l
CCL_NAMESPACE_BEGIN __forceinline void prefetch_L1(const void *)
Definition: bvh_binning.cpp:32
__forceinline float extract(const int4 &b)
Definition: bvh_binning.cpp:45
__forceinline void prefetch_L2(const void *)
Definition: bvh_binning.cpp:35
__forceinline void prefetch_NTA(const void *)
Definition: bvh_binning.cpp:41
__forceinline void prefetch_L3(const void *)
Definition: bvh_binning.cpp:38
__forceinline const float4 insert(const float4 &a, const float b)
Definition: bvh_binning.cpp:49
__forceinline int get_best_dimension(const float4 &bestSAH)
Definition: bvh_binning.cpp:56
void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const
__forceinline const BoundBox & unaligned_bounds()
Definition: bvh_binning.h:50
__forceinline int4 get_bin(const BoundBox &box) const
Definition: bvh_binning.h:75
__forceinline BVHObjectBinning()
Definition: bvh_binning.h:39
BoundBox bounds_
Definition: bvh_binning.h:65
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
Definition: bvh_binning.h:102
__forceinline float4 blocks(const int4 &a) const
Definition: bvh_binning.h:91
const Transform * aligned_space_
Definition: bvh_binning.h:69
BoundBox cent_bounds_
Definition: bvh_binning.h:66
__forceinline int size() const
Definition: bvh_params.h:266
__forceinline int start() const
Definition: bvh_params.h:262
__forceinline const BoundBox & cent_bounds() const
Definition: bvh_params.h:258
__forceinline BVHRange()
Definition: bvh_params.h:230
__forceinline const BoundBox & bounds() const
Definition: bvh_params.h:254
__forceinline const BoundBox & bounds() const
Definition: bvh_params.h:180
BoundBox compute_aligned_boundbox(const BVHObjectBinning &range, const BVHReference *references, const Transform &aligned_space, BoundBox *cent_bounds=NULL) const
uint pos
int count
#define CCL_NAMESPACE_END
#define make_int4(x, y, z, w)
#define make_float4(x, y, z, w)
#define make_float3(x, y, z)
#define rcp(x)
static unsigned a[3]
Definition: RandGen.cpp:92
params N
#define min(a, b)
Definition: sort.c:51
__int64 int64_t
Definition: stdint.h:92
__forceinline float half_area() const
__forceinline float3 size() const
__forceinline void grow(const float3 &pt)
Definition: util_boundbox.h:55
__forceinline float3 center2() const
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: util_avxb.h:167
__forceinline float extract< 0 >(const avxf &a)
Definition: util_avxf.h:272
__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt)
#define __forceinline
Definition: util_defines.h:71
ccl_device_inline float4 float3_to_float4(const float3 a)
Definition: util_math.h:420
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)