6#ifndef __UTIL_MATH_INT4_H__
7#define __UTIL_MATH_INT4_H__
10# error "Do not include this file directly, include util/types.h instead."
19 return int4(_mm_add_epi32(a.m128,
b.m128));
33 return int4(_mm_sub_epi32(a.m128,
b.m128));
47 return int4(_mm_srai_epi32(a.m128, i));
56 return int4(_mm_slli_epi32(a.m128, i));
65 return int4(_mm_cmplt_epi32(a.m128,
b.m128));
79 return int4(_mm_cmpeq_epi32(a.m128,
b.m128));
93 return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128,
b.m128)));
106# ifdef __KERNEL_SSE__
107 return int4(_mm_and_si128(a.m128,
b.m128));
115# ifdef __KERNEL_SSE__
116 return int4(_mm_or_si128(a.m128,
b.m128));
124# ifdef __KERNEL_SSE__
125 return int4(_mm_xor_si128(a.m128,
b.m128));
197# ifdef __KERNEL_SSE__
200 return int4(_mm_srli_epi32(a.m128,
b));
206# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
207 return int4(_mm_min_epi32(a.m128,
b.m128));
215# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
216 return int4(_mm_max_epi32(a.m128,
b.m128));
224 return min(
max(a, mn), mx);
229# ifdef __KERNEL_SSE__
230 return int4(_mm_or_si128(_mm_and_si128(
mask, a), _mm_andnot_si128(
mask,
b)));
239# ifdef __KERNEL_SSE__
240 return int4(_mm_loadu_si128((__m128i *)
v));
250 return float4(_mm_castsi128_ps(a));
260 return int4(_mm_andnot_si128(a.m128,
b.m128));
263template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
266# ifdef __KERNEL_NEON__
267 int32x4_t
result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
270 return int4(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
274template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
277# ifdef __KERNEL_NEON__
278 int32x4_t
result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
279 vreinterpretq_s32_m128i(
b));
282 return int4(_mm_castps_si128(
283 _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(
b), _MM_SHUFFLE(i3, i2, i1, i0))));
289 return shuffle<i0, i0, i0, i0>(
b);
VecBase< int32_t, 4 > int4
ATTR_WARN_UNUSED_RESULT const BMVert * v
local_group_size(16, 16) .push_constant(Type b
#define ccl_device_forceinline
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline float4 mask(const int4 mask, const float4 a)
ccl_device_inline int4 load_int4(const int *v)
ccl_device_inline int4 operator>>(const int4 a, int i)
ccl_device_inline int4 operator^(const int4 a, const int4 b)
ccl_device_inline int4 operator<(const int4 a, const int4 b)
ccl_device_inline int4 & operator|=(int4 &a, const int4 b)
ccl_device_inline int4 operator-(const int4 a, const int4 b)
ccl_device_inline int4 operator<<(const int4 a, int i)
ccl_device_inline int4 operator==(const int4 a, const int4 b)
ccl_device_inline int4 & operator&=(int4 &a, const int4 b)
ccl_device_inline int4 operator|(const int4 a, const int4 b)
ccl_device_inline int4 operator+=(int4 &a, const int4 b)
ccl_device_inline int4 & operator>>=(int4 &a, const int32_t b)
ccl_device_inline int4 select(const int4 mask, const int4 a, const int4 b)
ccl_device_inline int4 & operator<<=(int4 &a, const int32_t b)
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 a, const int4 b)
ccl_device_inline int4 operator>=(const int4 a, const int4 b)
ccl_device_inline int4 & operator^=(int4 &a, const int4 b)
ccl_device_inline int4 operator&(const int4 a, const int4 b)
ccl_device_inline int4 clamp(const int4 a, const int4 mn, const int4 mx)
ccl_device_inline float4 cast(const int4 a)
ccl_device_inline int4 operator-=(int4 &a, const int4 b)
VecBase< float, 4 > float4