18 #ifndef __UTIL_SSEI_H__
19 #define __UTIL_SSEI_H__
23 #ifdef __KERNEL_SSE2__
102 return _mm_castps_si128(
a);
110 return _mm_sub_epi32(_mm_setzero_si128(),
a.m128);
112 # if defined(__KERNEL_SSSE3__)
115 return _mm_abs_epi32(
a.m128);
125 return _mm_add_epi32(
a.m128, b.m128);
138 return _mm_sub_epi32(
a.m128, b.m128);
149 # if defined(__KERNEL_SSE41__)
152 return _mm_mullo_epi32(
a.m128, b.m128);
166 return _mm_and_si128(
a.m128, b.m128);
179 return _mm_or_si128(
a.m128, b.m128);
192 return _mm_xor_si128(
a.m128, b.m128);
205 return _mm_slli_epi32(
a.m128, n);
209 return _mm_srai_epi32(
a.m128, n);
214 return _mm_andnot_si128(
a.m128, b.m128);
218 return _mm_andnot_si128(
cast(
a.m128), b.m128);
222 return _mm_andnot_si128(
a.m128,
cast(b.m128));
227 return _mm_srai_epi32(
a.m128, b);
231 return _mm_srli_epi32(
a.m128, b);
234 # if defined(__KERNEL_SSE41__)
237 return _mm_min_epi32(
a.m128, b.m128);
241 return min(
a, ssei(b));
245 return min(ssei(
a), b);
250 return _mm_max_epi32(
a.m128, b.m128);
254 return max(
a, ssei(b));
258 return max(ssei(
a), b);
284 # if defined(__KERNEL_SSE41__)
337 return _mm_castsi128_ps(_mm_cmpeq_epi32(
a.m128, b.m128));
363 return _mm_castsi128_ps(_mm_cmplt_epi32(
a.m128, b.m128));
389 return _mm_castsi128_ps(_mm_cmpgt_epi32(
a.m128, b.m128));
415 # ifdef __KERNEL_SSE41__
416 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(
t), m));
418 return _mm_or_si128(_mm_and_si128(m,
t), _mm_andnot_si128(m, f));
424 # if defined(__KERNEL_SSE41__) && \
425 ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
426 return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(
t),
mask));
438 return _mm_unpacklo_epi32(
a, b);
442 return _mm_unpackhi_epi32(
a, b);
445 template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
448 # ifdef __KERNEL_NEON__
449 return shuffle_neon<ssei, i0, i1, i2, i3>(
a);
451 return _mm_shuffle_epi32(
a, _MM_SHUFFLE(i3, i2,
i1, i0));
455 template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
458 # ifdef __KERNEL_NEON__
459 return shuffle_neon<ssei, i0, i1, i2, i3>(
a, b);
461 return _mm_castps_si128(
462 _mm_shuffle_ps(_mm_castsi128_ps(
a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2,
i1, i0)));
468 return shuffle<i0, i0, i0, i0>(b);
471 # if defined(__KERNEL_SSE41__)
474 return _mm_extract_epi32(b, src);
478 return _mm_insert_epi32(
a, b, dst);
497 # if defined(__KERNEL_SSE41__)
500 ssei h =
min(shuffle<1, 0, 3, 2>(
v),
v);
501 return min(shuffle<2, 3, 0, 1>(h), h);
505 ssei h =
max(shuffle<1, 0, 3, 2>(
v),
v);
506 return max(shuffle<2, 3, 0, 1>(h), h);
510 ssei h = shuffle<1, 0, 3, 2>(
v) +
v;
511 return shuffle<2, 3, 0, 1>(h) + h;
516 # ifdef __KERNEL_NEON__
517 return vminvq_s32(
v);
524 # ifdef __KERNEL_NEON__
525 return vmaxvq_s32(
v);
532 # ifdef __KERNEL_NEON__
533 return vaddvq_s32(
v);
550 const ssei
a =
select(valid,
v, ssei((
int)pos_inf));
555 const ssei
a =
select(valid,
v, ssei((
int)neg_inf));
563 return (
a < b) ?
a : b;
567 return (
a > b) ?
a : b;
571 return ssei_min(ssei_min(
v[0],
v[1]), ssei_min(
v[2],
v[3]));
575 return ssei_max(ssei_max(
v[0],
v[1]), ssei_max(
v[2],
v[3]));
579 return v[0] +
v[1] +
v[2] +
v[3];
590 return _mm_load_si128((__m128i *)
a);
595 _mm_store_si128((__m128i *)
ptr,
v);
600 _mm_storeu_si128((__m128i *)
ptr,
v);
605 # if defined(__KERNEL_AVX__)
606 _mm_maskstore_ps((
float *)
ptr, (__m128i)
mask, _mm_castsi128_ps(i));
614 # if defined(__KERNEL_SSE41__)
615 return _mm_stream_load_si128((__m128i *)
ptr);
617 return _mm_load_si128((__m128i *)
ptr);
623 # if defined(__KERNEL_SSE41__)
624 _mm_stream_ps((
float *)
ptr, _mm_castsi128_ps(
v));
626 _mm_store_si128((__m128i *)
ptr,
v);
636 printf(
"%s: %df %df %df %d\n",
label,
a[0],
a[1],
a[2],
a[3]);
void BLI_kdtree_nd_() insert(KDTree *tree, int index, const float co[KD_DIMS]) ATTR_NONNULL(1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Separate Vector White RGB Map Separate Set Z Dilate Combine Combine Color Channel Split ID Combine Luminance Directional Alpha Distance Hue Movie Ellipse Bokeh View Corner Anti Mix RGB Hue Separate TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC Boolean Random Float
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
SIMD_FORCE_INLINE btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
__forceinline float extract(const int4 &b)
CCL_NAMESPACE_BEGIN PackFlags operator|=(PackFlags &pack_flags, uint32_t value)
std::ostream & operator<<(std::ostream &stream, const GeometrySet &geometry_set)
bool operator==(const GeometrySet &UNUSED(a), const GeometrySet &UNUSED(b))
static void shuffle(float2 points[], int size, int rng_seed)
#define ccl_device_inline
#define CCL_NAMESPACE_END
Matrix< T, M, N > operator-(const Matrix< T, M, N > &m1, const Matrix< T, M, N > &m2)
Vec< T, N > operator*(const typename Vec< T, N >::value_type r, const Vec< T, N > &v)
GPUState operator^(const GPUState &a, const GPUState &b)
constexpr bool operator!=(StringRef a, StringRef b)
constexpr bool operator>=(StringRef a, StringRef b)
constexpr bool operator<(StringRef a, StringRef b)
constexpr bool operator<=(StringRef a, StringRef b)
constexpr bool operator>(StringRef a, StringRef b)
std::string operator+(StringRef a, StringRef b)
__forceinline uint32_t movemask(const avxb &a)
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
__forceinline const avxb operator^=(avxb &a, const avxb &b)
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
__forceinline const avxb operator|(const avxb &a, const avxb &b)
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
__forceinline float extract< 0 >(const avxf &a)
__forceinline avxi & operator-=(avxi &a, const avxi &b)
__forceinline int reduce_max(const avxi &v)
__forceinline avxi & operator<<=(avxi &a, const int32_t b)
__forceinline uint32_t select_max(const avxi &v)
__forceinline const avxi vreduce_add(const avxi &v)
__forceinline const avxi abs(const avxi &a)
__forceinline int reduce_min(const avxi &v)
__forceinline const avxi operator>>(const avxi &a, const int32_t n)
__forceinline const avxi vreduce_min(const avxi &v)
__forceinline avxi & operator>>=(avxi &a, const int32_t b)
__forceinline avxi & operator+=(avxi &a, const avxi &b)
Assignment Operators.
__forceinline const avxi vreduce_max(const avxi &v)
__forceinline const avxi srl(const avxi &a, const int32_t b)
__forceinline int reduce_add(const avxi &v)
__forceinline uint32_t select_min(const avxi &v)
__forceinline const avxi sra(const avxi &a, const int32_t b)
__forceinline avxi & operator*=(avxi &a, const avxi &b)
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
CCL_NAMESPACE_BEGIN __forceinline uint32_t __bsf(const uint32_t x)