Blender  V2.93
util_sseb.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0(the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #ifndef __UTIL_SSEB_H__
19 #define __UTIL_SSEB_H__
20 
22 
23 #ifdef __KERNEL_SSE2__
24 
25 struct ssei;
26 struct ssef;
27 
29 struct sseb {
30  typedef sseb Mask; // mask type
31  typedef ssei Int; // int type
32  typedef ssef Float; // float type
33 
34  enum { size = 4 }; // number of SIMD elements
35  union {
36  __m128 m128;
37  int32_t v[4];
38  }; // data
39 
43 
44  __forceinline sseb()
45  {
46  }
47  __forceinline sseb(const sseb &other)
48  {
49  m128 = other.m128;
50  }
51  __forceinline sseb &operator=(const sseb &other)
52  {
53  m128 = other.m128;
54  return *this;
55  }
56 
57  __forceinline sseb(const __m128 input) : m128(input)
58  {
59  }
60  __forceinline operator const __m128 &(void)const
61  {
62  return m128;
63  }
64  __forceinline operator const __m128i(void) const
65  {
66  return _mm_castps_si128(m128);
67  }
68  __forceinline operator const __m128d(void) const
69  {
70  return _mm_castps_pd(m128);
71  }
72 
73  __forceinline sseb(bool a)
74  : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)])
75  {
76  }
77  __forceinline sseb(bool a, bool b)
78  : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)])
79  {
80  }
81  __forceinline sseb(bool a, bool b, bool c, bool d)
82  : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)])
83  {
84  }
85  __forceinline sseb(int mask)
86  {
87  assert(mask >= 0 && mask < 16);
88  m128 = _mm_lookupmask_ps[mask];
89  }
90 
94 
95  __forceinline sseb(FalseTy) : m128(_mm_setzero_ps())
96  {
97  }
98  __forceinline sseb(TrueTy)
99  : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
100  {
101  }
102 
106 
107  __forceinline bool operator[](const size_t i) const
108  {
109  assert(i < 4);
110  return (_mm_movemask_ps(m128) >> i) & 1;
111  }
112  __forceinline int32_t &operator[](const size_t i)
113  {
114  assert(i < 4);
115  return v[i];
116  }
117 };
118 
122 
123 __forceinline const sseb operator!(const sseb &a)
124 {
125  return _mm_xor_ps(a, sseb(True));
126 }
127 
131 
132 __forceinline const sseb operator&(const sseb &a, const sseb &b)
133 {
134  return _mm_and_ps(a, b);
135 }
136 __forceinline const sseb operator|(const sseb &a, const sseb &b)
137 {
138  return _mm_or_ps(a, b);
139 }
140 __forceinline const sseb operator^(const sseb &a, const sseb &b)
141 {
142  return _mm_xor_ps(a, b);
143 }
144 
148 
149 __forceinline const sseb operator&=(sseb &a, const sseb &b)
150 {
151  return a = a & b;
152 }
153 __forceinline const sseb operator|=(sseb &a, const sseb &b)
154 {
155  return a = a | b;
156 }
157 __forceinline const sseb operator^=(sseb &a, const sseb &b)
158 {
159  return a = a ^ b;
160 }
161 
165 
166 __forceinline const sseb operator!=(const sseb &a, const sseb &b)
167 {
168  return _mm_xor_ps(a, b);
169 }
170 __forceinline const sseb operator==(const sseb &a, const sseb &b)
171 {
172  return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b));
173 }
174 
175 __forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f)
176 {
177 # if defined(__KERNEL_SSE41__)
178  return _mm_blendv_ps(f, t, m);
179 # else
180  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
181 # endif
182 }
183 
187 
188 __forceinline const sseb unpacklo(const sseb &a, const sseb &b)
189 {
190  return _mm_unpacklo_ps(a, b);
191 }
192 __forceinline const sseb unpackhi(const sseb &a, const sseb &b)
193 {
194  return _mm_unpackhi_ps(a, b);
195 }
196 
197 template<size_t i0, size_t i1, size_t i2, size_t i3>
198 __forceinline const sseb shuffle(const sseb &a)
199 {
200 # ifdef __KERNEL_NEON__
201  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a);
202 # else
203  return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
204 # endif
205 }
206 
207 # ifndef __KERNEL_NEON__
208 template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a)
209 {
210  return _mm_movelh_ps(a, a);
211 }
212 
213 template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a)
214 {
215  return _mm_movehl_ps(a, a);
216 }
217 # endif
218 
219 template<size_t i0, size_t i1, size_t i2, size_t i3>
220 __forceinline const sseb shuffle(const sseb &a, const sseb &b)
221 {
222 # ifdef __KERNEL_NEON__
223  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a, b);
224 # else
225  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
226 # endif
227 }
228 
229 # ifndef __KERNEL_NEON__
230 template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b)
231 {
232  return _mm_movelh_ps(a, b);
233 }
234 
235 template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b)
236 {
237  return _mm_movehl_ps(b, a);
238 }
239 # endif
240 
241 # if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
242 template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a)
243 {
244  return _mm_moveldup_ps(a);
245 }
246 template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a)
247 {
248  return _mm_movehdup_ps(a);
249 }
250 # endif
251 
252 # if defined(__KERNEL_SSE41__)
253 template<size_t dst, size_t src, size_t clr>
254 __forceinline const sseb insert(const sseb &a, const sseb &b)
255 {
256 # ifdef __KERNEL_NEON__
257  sseb res = a;
258  if (clr)
259  res[dst] = 0;
260  else
261  res[dst] = b[src];
262  return res;
263 # else
264  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
265 # endif
266 }
267 template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b)
268 {
269  return insert<dst, src, 0>(a, b);
270 }
271 template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b)
272 {
273  return insert<dst, 0>(a, sseb(b));
274 }
275 # endif
276 
280 
281 # if defined(__KERNEL_SSE41__)
282 __forceinline uint32_t popcnt(const sseb &a)
283 {
284 # if defined(__KERNEL_NEON__)
285  const int32x4_t mask = {1, 1, 1, 1};
286  int32x4_t t = vandq_s32(a.m128, mask);
287  return vaddvq_s32(t);
288 # else
289  return _mm_popcnt_u32(_mm_movemask_ps(a));
290 # endif
291 }
292 # else
293 __forceinline uint32_t popcnt(const sseb &a)
294 {
295  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]);
296 }
297 # endif
298 
299 __forceinline bool reduce_and(const sseb &a)
300 {
301 # if defined(__KERNEL_NEON__)
302  return vaddvq_s32(a.m128) == -4;
303 # else
304  return _mm_movemask_ps(a) == 0xf;
305 # endif
306 }
307 __forceinline bool reduce_or(const sseb &a)
308 {
309 # if defined(__KERNEL_NEON__)
310  return vaddvq_s32(a.m128) != 0x0;
311 # else
312  return _mm_movemask_ps(a) != 0x0;
313 # endif
314 }
315 __forceinline bool all(const sseb &b)
316 {
317 # if defined(__KERNEL_NEON__)
318  return vaddvq_s32(b.m128) == -4;
319 # else
320  return _mm_movemask_ps(b) == 0xf;
321 # endif
322 }
323 __forceinline bool any(const sseb &b)
324 {
325 # if defined(__KERNEL_NEON__)
326  return vaddvq_s32(b.m128) != 0x0;
327 # else
328  return _mm_movemask_ps(b) != 0x0;
329 # endif
330 }
331 __forceinline bool none(const sseb &b)
332 {
333 # if defined(__KERNEL_NEON__)
334  return vaddvq_s32(b.m128) == 0x0;
335 # else
336  return _mm_movemask_ps(b) == 0x0;
337 # endif
338 }
339 
340 __forceinline uint32_t movemask(const sseb &a)
341 {
342  return _mm_movemask_ps(a);
343 }
344 
348 
349 ccl_device_inline void print_sseb(const char *label, const sseb &a)
350 {
351  printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]);
352 }
353 
354 #endif
355 
357 
358 #endif
void BLI_kdtree_nd_() insert(KDTree *tree, int index, const float co[KD_DIMS]) ATTR_NONNULL(1
struct Mask Mask
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Separate Vector White RGB Map Separate Set Z Dilate Combine Combine Color Channel Split ID Combine Luminance Directional Alpha Distance Hue Movie Ellipse Bokeh View Corner Anti Mix RGB Hue Separate TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC Boolean Random Float
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
SIMD_FORCE_INLINE btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:157
const char * label
CCL_NAMESPACE_BEGIN PackFlags operator|=(PackFlags &pack_flags, uint32_t value)
Definition: geometry.cpp:49
bool operator==(const GeometrySet &UNUSED(a), const GeometrySet &UNUSED(b))
static void shuffle(float2 points[], int size, int rng_seed)
Definition: jitter.cpp:243
#define ccl_device_inline
#define CCL_NAMESPACE_END
static unsigned c
Definition: RandGen.cpp:97
static unsigned a[3]
Definition: RandGen.cpp:92
GPUState operator^(const GPUState &a, const GPUState &b)
constexpr bool operator!=(StringRef a, StringRef b)
unsigned int uint32_t
Definition: stdint.h:83
signed int int32_t
Definition: stdint.h:80
__forceinline uint32_t movemask(const avxb &a)
Definition: util_avxb.h:227
__forceinline bool any(const avxb &b)
Definition: util_avxb.h:218
__forceinline bool all(const avxb &b)
Definition: util_avxb.h:214
__forceinline bool reduce_and(const avxb &a)
Definition: util_avxb.h:206
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
Definition: util_avxb.h:113
__forceinline const avxb operator^=(avxb &a, const avxb &b)
Definition: util_avxb.h:138
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
Definition: util_avxb.h:130
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
Definition: util_avxb.h:180
__forceinline const avxb operator|(const avxb &a, const avxb &b)
Definition: util_avxb.h:117
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: util_avxb.h:167
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
Definition: util_avxb.h:104
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
Definition: util_avxb.h:184
__forceinline bool reduce_or(const avxb &a)
Definition: util_avxb.h:210
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
Definition: util_avxb.h:199
__forceinline bool none(const avxb &b)
Definition: util_avxb.h:222
__forceinline const avxi shuffle< 0, 0, 2, 2 >(const avxi &b)
Definition: util_avxi.h:625
__forceinline const avxi shuffle< 0, 1, 0, 1 >(const avxi &b)
Definition: util_avxi.h:633
__forceinline const avxi shuffle< 1, 1, 3, 3 >(const avxi &b)
Definition: util_avxi.h:629
#define __forceinline
Definition: util_defines.h:71
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)