17 return int4(_mm_add_epi32(a.m128,
b.m128));
31 return int4(_mm_sub_epi32(a.m128,
b.m128));
45 return int4(_mm_srai_epi32(a.m128,
i));
54 return int4(_mm_slli_epi32(a.m128,
i));
63 return int4(_mm_cmplt_epi32(a.m128,
b.m128));
77 return int4(_mm_cmpeq_epi32(a.m128,
b.m128));
91 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128,
b.m128)));
104# ifdef __KERNEL_SSE__
105 return int4(_mm_and_si128(a.m128,
b.m128));
113# ifdef __KERNEL_SSE__
114 return int4(_mm_or_si128(a.m128,
b.m128));
122# ifdef __KERNEL_SSE__
123 return int4(_mm_xor_si128(a.m128,
b.m128));
195# ifdef __KERNEL_SSE__
198 return int4(_mm_srli_epi32(a.m128,
b));
204# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
205 return int4(_mm_min_epi32(a.m128,
b.m128));
213# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
214 return int4(_mm_max_epi32(a.m128,
b.m128));
222 return min(
max(a, mn), mx);
227# ifdef __KERNEL_SSE__
228 return int4(_mm_or_si128(_mm_and_si128(
mask, a), _mm_andnot_si128(
mask,
b)));
237# ifdef __KERNEL_SSE__
238 return int4(_mm_loadu_si128((__m128i *)
v));
248 return float4(_mm_castsi128_ps(a));
258 return int4(_mm_andnot_si128(a.m128,
b.m128));
261template<
size_t i0, const
size_t i1, const
size_t i2, const
size_t i3>
264# ifdef __KERNEL_NEON__
265 int32x4_t
result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
268 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
272template<
size_t i0, const
size_t i1, const
size_t i2, const
size_t i3>
275# ifdef __KERNEL_NEON__
276 int32x4_t
result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
277 vreinterpretq_s32_m128i(
b));
280 return int4(_mm_castps_si128(
281 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(
b), _MM_SHUFFLE(i3, i2, i1, i0))));
287 return shuffle<i0, i0, i0, i0>(
b);
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline int4 operator>>(const int4 a, const int i)
ccl_device_inline int4 load_int4(const int *v)
ccl_device_inline int4 operator^(const int4 a, const int4 b)
ccl_device_inline int4 operator<(const int4 a, const int4 b)
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
ccl_device_inline int4 operator-(const int4 a, const int4 b)
ccl_device_inline int4 operator==(const int4 a, const int4 b)
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
ccl_device_inline int4 operator|(const int4 a, const int4 b)
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
ccl_device_inline int4 operator<<(const int4 a, const int i)
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
ccl_device_inline int4 operator&(const int4 a, const int4 b)
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
ccl_device_inline int4 operator-=(int4 &a, const int4 b)