17 #include "testing/testing.h"
26 #ifdef __KERNEL_AVX2__
29 # ifdef __KERNEL_AVX__
35 #define INIT_AVX_TEST \
36 if (!validate_cpu_capabilities()) \
39 const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); \
40 const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); \
41 const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
43 #define compare_vector_scalar(a, b) \
44 for (size_t index = 0; index < a.size; index++) \
45 EXPECT_FLOAT_EQ(a[index], b);
47 #define compare_vector_vector(a, b) \
48 for (size_t index = 0; index < a.size; index++) \
49 EXPECT_FLOAT_EQ(a[index], b[index]);
51 #define compare_vector_vector_near(a, b, abserror) \
52 for (size_t index = 0; index < a.size; index++) \
53 EXPECT_NEAR(a[index], b[index], abserror);
55 #define basic_test_vv(a, b, op) \
58 for (size_t i = 0; i < a.size; i++) \
59 EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
62 #define basic_test_vf(a, b, op) \
65 for (size_t i = 0; i < a.size; i++) \
66 EXPECT_FLOAT_EQ(c[i], a[i] op b);
84 static_cast<float>(index));
88 avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f));
90 avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f));
97 avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
110 avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a);
117 avxf res =
msub(avxf_a, avxf_b, avxf_c);
118 avxf exp =
avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7],
119 (avxf_a[6] * avxf_b[6]) - avxf_c[6],
120 (avxf_a[5] * avxf_b[5]) - avxf_c[5],
121 (avxf_a[4] * avxf_b[4]) - avxf_c[4],
122 (avxf_a[3] * avxf_b[3]) - avxf_c[3],
123 (avxf_a[2] * avxf_b[2]) - avxf_c[2],
124 (avxf_a[1] * avxf_b[1]) - avxf_c[1],
125 (avxf_a[0] * avxf_b[0]) - avxf_c[0]);
132 avxf res =
madd(avxf_a, avxf_b, avxf_c);
133 avxf exp =
avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7],
134 (avxf_a[6] * avxf_b[6]) + avxf_c[6],
135 (avxf_a[5] * avxf_b[5]) + avxf_c[5],
136 (avxf_a[4] * avxf_b[4]) + avxf_c[4],
137 (avxf_a[3] * avxf_b[3]) + avxf_c[3],
138 (avxf_a[2] * avxf_b[2]) + avxf_c[2],
139 (avxf_a[1] * avxf_b[1]) + avxf_c[1],
140 (avxf_a[0] * avxf_b[0]) + avxf_c[0]);
147 avxf res =
nmadd(avxf_a, avxf_b, avxf_c);
148 avxf exp =
avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]),
149 avxf_c[6] - (avxf_a[6] * avxf_b[6]),
150 avxf_c[5] - (avxf_a[5] * avxf_b[5]),
151 avxf_c[4] - (avxf_a[4] * avxf_b[4]),
152 avxf_c[3] - (avxf_a[3] * avxf_b[3]),
153 avxf_c[2] - (avxf_a[2] * avxf_b[2]),
154 avxf_c[1] - (avxf_a[1] * avxf_b[1]),
155 avxf_c[0] - (avxf_a[0] * avxf_b[0]));
162 avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f);
163 avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
166 a[0] <= b[0] ? -1 : 0,
167 a[1] <= b[1] ? -1 : 0,
168 a[2] <= b[2] ? -1 : 0,
169 a[3] <= b[3] ? -1 : 0,
170 a[4] <= b[4] ? -1 : 0,
171 a[5] <= b[5] ? -1 : 0,
172 a[6] <= b[6] ? -1 : 0,
173 a[7] <= b[7] ? -1 : 0,
181 avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b);
188 avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b);
195 avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a);
219 dot3(avxf_a, avxf_b, den, den2);
220 EXPECT_FLOAT_EQ(den, 14.9f);
221 EXPECT_FLOAT_EQ(den2, 2.9f);
#define CCL_NAMESPACE_END
#define make_float3(x, y, z)
INLINE Rall1d< T, V, S > exp(const Rall1d< T, V, S > &arg)
__forceinline avxf cross(const avxf &a, const avxf &b)
__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
Ternary Operators.
__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
__forceinline const avxf mm256_sqrt(const avxf &a)
#define TEST_CATEGORY_NAME
#define basic_test_vv(a, b, op)
static CCL_NAMESPACE_BEGIN bool validate_cpu_capabilities()
#define compare_vector_vector(a, b)
static const float float_b
TEST(TEST_CATEGORY_NAME, avxf_add_vv)
#define basic_test_vf(a, b, op)
#define compare_vector_vector_near(a, b, abserror)
#define compare_vector_scalar(a, b)
bool system_cpu_support_avx2()
bool system_cpu_support_avx()