18 #ifndef __UTIL_AVXB_H__
19 #define __UTIL_AVXB_H__
57 :
m256(_mm256_insertf128_ps(_mm256_castps128_ps256(
a), b, 1))
66 return _mm256_castps_si256(
m256);
70 return _mm256_castps_pd(
m256);
91 return (_mm256_movemask_ps(
m256) >> i) & 1;
106 return _mm256_xor_ps(
a,
avxb(True));
115 return _mm256_and_ps(
a, b);
119 return _mm256_or_ps(
a, b);
123 return _mm256_xor_ps(
a, b);
149 return _mm256_xor_ps(
a, b);
153 #ifdef __KERNEL_AVX2__
154 return _mm256_castsi256_ps(_mm256_cmpeq_epi32(
a, b));
156 __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(
a, 0));
157 __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(
a, 1));
158 __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
159 __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
160 __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
161 __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
162 __m256i
result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
163 return _mm256_castsi256_ps(
result);
169 #if defined(__KERNEL_SSE41__)
170 return _mm256_blendv_ps(f,
t, m);
172 return _mm256_or_ps(_mm256_and_ps(m,
t), _mm256_andnot_ps(m, f));
182 return _mm256_unpacklo_ps(
a, b);
186 return _mm256_unpackhi_ps(
a, b);
193 #if defined(__KERNEL_SSE41__)
196 return _mm_popcnt_u32(_mm256_movemask_ps(
a));
201 return bool(
a[0]) + bool(
a[1]) + bool(
a[2]) + bool(
a[3]) + bool(
a[4]) + bool(
a[5]) + bool(
a[6]) +
208 return _mm256_movemask_ps(
a) == 0xf;
212 return _mm256_movemask_ps(
a) != 0x0;
216 return _mm256_movemask_ps(b) == 0xf;
220 return _mm256_movemask_ps(b) != 0x0;
224 return _mm256_movemask_ps(b) == 0x0;
229 return _mm256_movemask_ps(
a);
238 printf(
"%s: %d %d %d %d %d %d %d %d\n",
label,
a[0],
a[1],
a[2],
a[3],
a[4],
a[5],
a[6],
a[7]);
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
#define ccl_device_inline
#define CCL_NAMESPACE_END
__forceinline avxb(const __m256 input)
__forceinline avxb(TrueTy)
__forceinline avxb(const __m128 &a, const __m128 &b)
__forceinline avxb & operator=(const avxb &other)
__forceinline avxb(FalseTy)
Constants.
__forceinline avxb()
Constructors, Assignment & Cast Operators.
__forceinline bool operator[](const size_t i) const
Array Access.
__forceinline int32_t & operator[](const size_t i)
__forceinline avxb(const avxb &other)
__forceinline const avxb operator|=(avxb &a, const avxb &b)
__forceinline uint32_t movemask(const avxb &a)
__forceinline bool any(const avxb &b)
__forceinline bool all(const avxb &b)
__forceinline const avxb operator==(const avxb &a, const avxb &b)
__forceinline bool reduce_and(const avxb &a)
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
__forceinline const avxb operator^=(avxb &a, const avxb &b)
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
__forceinline const avxb operator|(const avxb &a, const avxb &b)
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
ccl_device_inline void print_avxb(const char *label, const avxb &a)
Debug Functions.
__forceinline const avxb operator^(const avxb &a, const avxb &b)
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
__forceinline bool reduce_or(const avxb &a)
__forceinline const avxb operator!=(const avxb &a, const avxb &b)
Comparison Operators + Select.
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
__forceinline bool none(const avxb &b)