17 #ifndef __UTIL_AVXI_H__
18 #define __UTIL_AVXI_H__
29 #if !defined(__KERNEL_AVX2__)
67 :
m256(_mm256_insertf128_si256(_mm256_castsi128_si256(
a),
a, 1))
71 :
m256(_mm256_insertf128_si256(_mm256_castsi128_si256(
a), b, 1))
74 #if defined(__KERNEL_AVX2__)
76 :
m256(_mm256_insertf128_si256(_mm256_castsi128_si256(
a), b, 1))
85 :
m256(_mm256_castps_si256(_mm256_loadu_ps((const
float *)
a)))
95 :
m256(_mm256_set_epi32(d,
c, b,
a, d,
c, b,
a))
100 :
m256(_mm256_set_epi32(
h, g, f,
e, d,
c, b,
a))
115 #if defined(__KERNEL_AVX2__)
130 :
m256(_mm256_set_epi32(
131 pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf))
135 :
m256(_mm256_set_epi32(
136 neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf))
166 return _mm256_castps_si256(
a);
172 #if defined(__KERNEL_AVX2__)
175 return _mm256_sub_epi32(_mm256_setzero_si256(),
a.m256);
179 return _mm256_abs_epi32(
a.m256);
184 return avxi(_mm_sub_epi32(_mm_setzero_si128(),
a.l), _mm_sub_epi32(_mm_setzero_si128(),
a.h));
188 return avxi(_mm_abs_epi32(
a.l), _mm_abs_epi32(
a.h));
196 #if defined(__KERNEL_AVX2__)
199 return _mm256_add_epi32(
a.m256, b.
m256);
204 return avxi(_mm_add_epi32(
a.l, b.
l), _mm_add_epi32(
a.h, b.
h));
216 #if defined(__KERNEL_AVX2__)
219 return _mm256_sub_epi32(
a.m256, b.
m256);
224 return avxi(_mm_sub_epi32(
a.l, b.
l), _mm_sub_epi32(
a.h, b.
h));
236 #if defined(__KERNEL_AVX2__)
239 return _mm256_mullo_epi32(
a.m256, b.
m256);
244 return avxi(_mm_mullo_epi32(
a.l, b.
l), _mm_mullo_epi32(
a.h, b.
h));
256 #if defined(__KERNEL_AVX2__)
259 return _mm256_and_si256(
a.m256, b.
m256);
264 return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(b)));
276 #if defined(__KERNEL_AVX2__)
279 return _mm256_or_si256(
a.m256, b.
m256);
284 return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(b)));
296 #if defined(__KERNEL_AVX2__)
299 return _mm256_xor_si256(
a.m256, b.
m256);
304 return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(b)));
316 #if defined(__KERNEL_AVX2__)
319 return _mm256_slli_epi32(
a.m256, n);
323 return _mm256_srai_epi32(
a.m256, n);
328 return _mm256_srai_epi32(
a.m256, b);
332 return _mm256_srli_epi32(
a.m256, b);
337 return avxi(_mm_slli_epi32(
a.l, n), _mm_slli_epi32(
a.h, n));
341 return avxi(_mm_srai_epi32(
a.l, n), _mm_srai_epi32(
a.h, n));
346 return avxi(_mm_srai_epi32(
a.l, b), _mm_srai_epi32(
a.h, b));
350 return avxi(_mm_srli_epi32(
a.l, b), _mm_srli_epi32(
a.h, b));
354 #if defined(__KERNEL_AVX2__)
357 return _mm256_min_epi32(
a.m256, b.
m256);
362 return avxi(_mm_min_epi32(
a.l, b.
l), _mm_min_epi32(
a.h, b.
h));
374 #if defined(__KERNEL_AVX2__)
377 return _mm256_max_epi32(
a.m256, b.
m256);
382 return avxi(_mm_max_epi32(
a.l, b.
l), _mm_max_epi32(
a.h, b.
h));
465 #if defined(__KERNEL_AVX2__)
468 return _mm256_castsi256_ps(_mm256_cmpeq_epi32(
a.m256, b.
m256));
473 return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(
a.l, b.
l)),
474 _mm_castsi128_ps(_mm_cmpeq_epi32(
a.h, b.
h)));
499 #if defined(__KERNEL_AVX2__)
502 return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.
m256,
a.m256));
507 return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(
a.l, b.
l)),
508 _mm_castsi128_ps(_mm_cmplt_epi32(
a.h, b.
h)));
533 #if defined(__KERNEL_AVX2__)
536 return _mm256_castsi256_ps(_mm256_cmpgt_epi32(
a.m256, b.
m256));
541 return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(
a.l, b.
l)),
542 _mm_castsi128_ps(_mm_cmpgt_epi32(
a.h, b.
h)));
569 return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(
t), m));
576 #if defined(__KERNEL_AVX2__)
579 return _mm256_unpacklo_epi32(
a.m256, b.
m256);
583 return _mm256_unpackhi_epi32(
a.m256, b.
m256);
588 return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(b)));
592 return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(
a), _mm256_castsi256_ps(b)));
598 return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(
a), _MM_SHUFFLE(i, i, i, i)));
603 return _mm256_permute2f128_si256(
a,
a, (
i1 << 4) | (i0 << 0));
608 return _mm256_permute2f128_si256(
a, b, (
i1 << 4) | (i0 << 0));
611 template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
614 return _mm256_castps_si256(
615 _mm256_permute_ps(_mm256_castsi256_ps(
a), _MM_SHUFFLE(i3, i2,
i1, i0)));
618 template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
621 return _mm256_castps_si256(_mm256_shuffle_ps(
622 _mm256_castsi256_ps(
a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2,
i1, i0)));
627 return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b)));
631 return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b)));
635 return _mm256_castps_si256(
636 _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b)))));
641 return _mm256_castps_si256(_mm256_broadcast_ss((
const float *)
ptr));
645 return _mm256_insertf128_si256(
a, b, i);
649 return _mm256_extractf128_si256(
a, i);
658 return min(
v, shuffle<1, 0, 3, 2>(
v));
663 return min(
v1, shuffle<2, 3, 0, 1>(
v1));
668 return min(
v1, shuffle<1, 0>(
v1));
673 return max(
v, shuffle<1, 0, 3, 2>(
v));
678 return max(
v1, shuffle<2, 3, 0, 1>(
v1));
683 return max(
v1, shuffle<1, 0>(
v1));
688 return v + shuffle<1, 0, 3, 2>(
v);
693 return v1 + shuffle<2, 3, 0, 1>(
v1);
698 return v1 + shuffle<1, 0>(
v1);
740 printf(
"%s: %d %d %d %d %d %d %d %d\n",
label,
a[0],
a[1],
a[2],
a[3],
a[4],
a[5],
a[6],
a[7]);
typedef float(TangentPoint)[2]
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble v1
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_inline
#define CCL_NAMESPACE_END
__forceinline avxi(const __m128i &a, const __m128i &b)
__forceinline avxi(ZeroTy)
Constants.
__forceinline avxi(NegInfTy)
__forceinline avxi()
Constructors, Assignment & Cast Operators.
__forceinline avxi(const __m256 a)
__forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d)
__forceinline avxi(const __m256i a)
__forceinline avxi(StepTy)
__forceinline avxi(const ssei &a)
__forceinline avxi(OneTy)
__forceinline avxi(const ssei &a, const ssei &b)
__forceinline avxi(PosInfTy)
__forceinline const int32_t & operator[](const size_t i) const
Array Access.
__forceinline avxi(const int32_t *const a)
__forceinline avxi(int32_t a)
__forceinline avxi(const avxi &a)
__forceinline avxi(int32_t a, int32_t b)
__forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h)
__forceinline int32_t & operator[](const size_t i)
__forceinline avxi & operator=(const avxi &a)
__forceinline uint32_t movemask(const avxb &a)
__forceinline float extract< 0 >(const avxf &a)
__forceinline const avxi vreduce_min2(const avxi &v)
Reductions.
__forceinline const avxi max(const avxi &a, const avxi &b)
__forceinline const avxb operator>(const avxi &a, const avxi &b)
ccl_device_inline void print_avxi(const char *label, const avxi &a)
Output Operators.
__forceinline const avxi operator&(const avxi &a, const avxi &b)
__forceinline const avxb operator!=(const avxi &a, const avxi &b)
__forceinline const avxi vreduce_min4(const avxi &v)
__forceinline const avxi operator<<(const avxi &a, const int32_t n)
__forceinline const ssei extract(const avxi &a)
__forceinline avxi unpackhi(const avxi &a, const avxi &b)
__forceinline const avxi shuffle< 0, 0, 2, 2 >(const avxi &b)
__forceinline avxi & operator-=(avxi &a, const avxi &b)
__forceinline const avxb operator>=(const avxi &a, const avxi &b)
__forceinline const avxi shuffle< 0, 1, 0, 1 >(const avxi &b)
__forceinline int reduce_max(const avxi &v)
__forceinline const avxi vreduce_add2(const avxi &v)
__forceinline const avxi vreduce_add4(const avxi &v)
__forceinline avxi & operator<<=(avxi &a, const int32_t b)
__forceinline uint32_t select_max(const avxi &v)
__forceinline const avxi min(const avxi &a, const avxi &b)
__forceinline const avxi vreduce_add(const avxi &v)
__forceinline const avxi shuffle(const avxi &a)
__forceinline avxi & operator|=(avxi &a, const avxi &b)
__forceinline const avxi abs(const avxi &a)
__forceinline const avxb operator<=(const avxi &a, const avxi &b)
__forceinline int reduce_min(const avxi &v)
__forceinline const avxi vreduce_max4(const avxi &v)
__forceinline const avxi shuffle< 1, 1, 3, 3 >(const avxi &b)
__forceinline const avxi operator>>(const avxi &a, const int32_t n)
__forceinline const avxi vreduce_min(const avxi &v)
__forceinline const avxi broadcast(const int *ptr)
__forceinline avxi & operator>>=(avxi &a, const int32_t b)
__forceinline const avxb operator==(const avxi &a, const avxi &b)
Comparison Operators + Select.
__forceinline const avxi operator|(const avxi &a, const avxi &b)
__forceinline const avxb operator<(const avxi &a, const avxi &b)
__forceinline avxi & operator+=(avxi &a, const avxi &b)
Assignment Operators.
__forceinline const avxi operator^(const avxi &a, const avxi &b)
__forceinline const avxi vreduce_max(const avxi &v)
__forceinline const avxi srl(const avxi &a, const int32_t b)
__forceinline const avxi vreduce_max2(const avxi &v)
__forceinline const avxi insert(const avxi &a, const ssei &b)
__forceinline avxi unpacklo(const avxi &a, const avxi &b)
Movement/Shifting/Shuffling Functions.
__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f)
__forceinline avxi & operator^=(avxi &a, const avxi &b)
__forceinline const avxi operator+(const avxi &a)
__forceinline int reduce_add(const avxi &v)
__forceinline uint32_t select_min(const avxi &v)
__forceinline const avxi sra(const avxi &a, const int32_t b)
__forceinline const avxi operator-(const avxi &a)
__forceinline avxi & operator*=(avxi &a, const avxi &b)
__forceinline avxi & operator&=(avxi &a, const avxi &b)
__forceinline const avxi cast(const __m256 &a)
Unary Operators.
__forceinline const avxi operator*(const avxi &a, const avxi &b)
CCL_NAMESPACE_BEGIN __forceinline uint32_t __bsf(const uint32_t x)