Blender  V2.93
util_avxb.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0(the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #ifndef __UTIL_AVXB_H__
19 #define __UTIL_AVXB_H__
20 
22 
23 struct avxf;
24 
26 struct avxb {
27  typedef avxb Mask; // mask type
28  typedef avxf Float; // float type
29 
30  enum { size = 8 }; // number of SIMD elements
31  union {
32  __m256 m256;
33  int32_t v[8];
34  }; // data
35 
39 
41  {
42  }
43  __forceinline avxb(const avxb &other)
44  {
45  m256 = other.m256;
46  }
48  {
49  m256 = other.m256;
50  return *this;
51  }
52 
53  __forceinline avxb(const __m256 input) : m256(input)
54  {
55  }
56  __forceinline avxb(const __m128 &a, const __m128 &b)
57  : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1))
58  {
59  }
60  __forceinline operator const __m256 &(void)const
61  {
62  return m256;
63  }
64  __forceinline operator const __m256i(void) const
65  {
66  return _mm256_castps_si256(m256);
67  }
68  __forceinline operator const __m256d(void) const
69  {
70  return _mm256_castps_pd(m256);
71  }
72 
76 
77  __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps())
78  {
79  }
80  __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1)))
81  {
82  }
83 
87 
88  __forceinline bool operator[](const size_t i) const
89  {
90  assert(i < 8);
91  return (_mm256_movemask_ps(m256) >> i) & 1;
92  }
93  __forceinline int32_t &operator[](const size_t i)
94  {
95  assert(i < 8);
96  return v[i];
97  }
98 };
99 
103 
105 {
106  return _mm256_xor_ps(a, avxb(True));
107 }
108 
112 
113 __forceinline const avxb operator&(const avxb &a, const avxb &b)
114 {
115  return _mm256_and_ps(a, b);
116 }
117 __forceinline const avxb operator|(const avxb &a, const avxb &b)
118 {
119  return _mm256_or_ps(a, b);
120 }
121 __forceinline const avxb operator^(const avxb &a, const avxb &b)
122 {
123  return _mm256_xor_ps(a, b);
124 }
125 
129 
131 {
132  return a = a & b;
133 }
135 {
136  return a = a | b;
137 }
139 {
140  return a = a ^ b;
141 }
142 
146 
147 __forceinline const avxb operator!=(const avxb &a, const avxb &b)
148 {
149  return _mm256_xor_ps(a, b);
150 }
151 __forceinline const avxb operator==(const avxb &a, const avxb &b)
152 {
153 #ifdef __KERNEL_AVX2__
154  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
155 #else
156  __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
157  __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
158  __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
159  __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
160  __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
161  __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
162  __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
163  return _mm256_castsi256_ps(result);
164 #endif
165 }
166 
167 __forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
168 {
169 #if defined(__KERNEL_SSE41__)
170  return _mm256_blendv_ps(f, t, m);
171 #else
172  return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
173 #endif
174 }
175 
179 
180 __forceinline const avxb unpacklo(const avxb &a, const avxb &b)
181 {
182  return _mm256_unpacklo_ps(a, b);
183 }
184 __forceinline const avxb unpackhi(const avxb &a, const avxb &b)
185 {
186  return _mm256_unpackhi_ps(a, b);
187 }
188 
192 
193 #if defined(__KERNEL_SSE41__)
195 {
196  return _mm_popcnt_u32(_mm256_movemask_ps(a));
197 }
198 #else
200 {
201  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) +
202  bool(a[7]);
203 }
204 #endif
205 
207 {
208  return _mm256_movemask_ps(a) == 0xf;
209 }
211 {
212  return _mm256_movemask_ps(a) != 0x0;
213 }
214 __forceinline bool all(const avxb &b)
215 {
216  return _mm256_movemask_ps(b) == 0xf;
217 }
218 __forceinline bool any(const avxb &b)
219 {
220  return _mm256_movemask_ps(b) != 0x0;
221 }
222 __forceinline bool none(const avxb &b)
223 {
224  return _mm256_movemask_ps(b) == 0x0;
225 }
226 
228 {
229  return _mm256_movemask_ps(a);
230 }
231 
235 
236 ccl_device_inline void print_avxb(const char *label, const avxb &a)
237 {
238  printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
239 }
240 
242 
243 #endif
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
const char * label
#define ccl_device_inline
#define CCL_NAMESPACE_END
static unsigned a[3]
Definition: RandGen.cpp:92
unsigned int uint32_t
Definition: stdint.h:83
signed int int32_t
Definition: stdint.h:80
Definition: util_avxb.h:26
__forceinline avxb(const __m256 input)
Definition: util_avxb.h:53
__forceinline avxb(TrueTy)
Definition: util_avxb.h:80
@ size
Definition: util_avxb.h:30
__forceinline avxb(const __m128 &a, const __m128 &b)
Definition: util_avxb.h:56
int32_t v[8]
Definition: util_avxb.h:33
__forceinline avxb & operator=(const avxb &other)
Definition: util_avxb.h:47
__forceinline avxb(FalseTy)
Constants.
Definition: util_avxb.h:77
__forceinline avxb()
Constructors, Assignment & Cast Operators.
Definition: util_avxb.h:40
__forceinline bool operator[](const size_t i) const
Array Access.
Definition: util_avxb.h:88
avxf Float
Definition: util_avxb.h:28
__forceinline int32_t & operator[](const size_t i)
Definition: util_avxb.h:93
__m256 m256
Definition: util_avxb.h:32
avxb Mask
Definition: util_avxb.h:27
__forceinline avxb(const avxb &other)
Definition: util_avxb.h:43
Definition: util_avxf.h:24
__forceinline const avxb operator|=(avxb &a, const avxb &b)
Definition: util_avxb.h:134
__forceinline uint32_t movemask(const avxb &a)
Definition: util_avxb.h:227
__forceinline bool any(const avxb &b)
Definition: util_avxb.h:218
__forceinline bool all(const avxb &b)
Definition: util_avxb.h:214
__forceinline const avxb operator==(const avxb &a, const avxb &b)
Definition: util_avxb.h:151
__forceinline bool reduce_and(const avxb &a)
Definition: util_avxb.h:206
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
Definition: util_avxb.h:113
__forceinline const avxb operator^=(avxb &a, const avxb &b)
Definition: util_avxb.h:138
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
Definition: util_avxb.h:130
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
Definition: util_avxb.h:180
__forceinline const avxb operator|(const avxb &a, const avxb &b)
Definition: util_avxb.h:117
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: util_avxb.h:167
ccl_device_inline void print_avxb(const char *label, const avxb &a)
Debug Functions.
Definition: util_avxb.h:236
__forceinline const avxb operator^(const avxb &a, const avxb &b)
Definition: util_avxb.h:121
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
Definition: util_avxb.h:104
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
Definition: util_avxb.h:184
__forceinline bool reduce_or(const avxb &a)
Definition: util_avxb.h:210
__forceinline const avxb operator!=(const avxb &a, const avxb &b)
Comparison Operators + Select.
Definition: util_avxb.h:147
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
Definition: util_avxb.h:199
__forceinline bool none(const avxb &b)
Definition: util_avxb.h:222
#define __forceinline
Definition: util_defines.h:71