Blender  V2.93
util_ssef.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0(the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #ifndef __UTIL_SSEF_H__
19 #define __UTIL_SSEF_H__
20 
21 #include "util_ssei.h"
22 
24 
25 #ifdef __KERNEL_SSE2__
26 
27 struct sseb;
28 struct ssef;
29 
31 struct ssef {
32  typedef sseb Mask; // mask type
33  typedef ssei Int; // int type
34  typedef ssef Float; // float type
35 
36  enum { size = 4 }; // number of SIMD elements
37  union {
38  __m128 m128;
39  float f[4];
40  int i[4];
41  }; // data
42 
46 
47  __forceinline ssef()
48  {
49  }
50  __forceinline ssef(const ssef &other)
51  {
52  m128 = other.m128;
53  }
54  __forceinline ssef &operator=(const ssef &other)
55  {
56  m128 = other.m128;
57  return *this;
58  }
59 
60  __forceinline ssef(const __m128 a) : m128(a)
61  {
62  }
63  __forceinline operator const __m128 &() const
64  {
65  return m128;
66  }
67  __forceinline operator __m128 &()
68  {
69  return m128;
70  }
71 
72  __forceinline ssef(float a) : m128(_mm_set1_ps(a))
73  {
74  }
75  __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d))
76  {
77  }
78 
79  __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a))
80  {
81  }
82 
86 
87 # if defined(__KERNEL_AVX__)
88  static __forceinline ssef broadcast(const void *const a)
89  {
90  return _mm_broadcast_ss((float *)a);
91  }
92 # else
93  static __forceinline ssef broadcast(const void *const a)
94  {
95  return _mm_set1_ps(*(float *)a);
96  }
97 # endif
98 
102 
103  __forceinline const float &operator[](const size_t i) const
104  {
105  assert(i < 4);
106  return f[i];
107  }
108  __forceinline float &operator[](const size_t i)
109  {
110  assert(i < 4);
111  return f[i];
112  }
113 };
114 
118 
119 __forceinline const ssef cast(const __m128i &a)
120 {
121  return _mm_castsi128_ps(a);
122 }
123 __forceinline const ssef operator+(const ssef &a)
124 {
125  return a;
126 }
127 __forceinline const ssef operator-(const ssef &a)
128 {
129  return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
130 }
131 __forceinline const ssef abs(const ssef &a)
132 {
133  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
134 }
135 # if defined(__KERNEL_SSE41__)
136 __forceinline const ssef sign(const ssef &a)
137 {
138  return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f)));
139 }
140 # endif
141 __forceinline const ssef signmsk(const ssef &a)
142 {
143  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
144 }
145 
146 __forceinline const ssef rcp(const ssef &a)
147 {
148  const ssef r = _mm_rcp_ps(a.m128);
149  return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
150 }
151 __forceinline const ssef sqr(const ssef &a)
152 {
153  return _mm_mul_ps(a, a);
154 }
155 __forceinline const ssef mm_sqrt(const ssef &a)
156 {
157  return _mm_sqrt_ps(a.m128);
158 }
159 __forceinline const ssef rsqrt(const ssef &a)
160 {
161  const ssef r = _mm_rsqrt_ps(a.m128);
162  return _mm_add_ps(
163  _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r),
164  _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r),
165  _mm_mul_ps(r, r)));
166 }
167 
171 
172 __forceinline const ssef operator+(const ssef &a, const ssef &b)
173 {
174  return _mm_add_ps(a.m128, b.m128);
175 }
176 __forceinline const ssef operator+(const ssef &a, const float &b)
177 {
178  return a + ssef(b);
179 }
180 __forceinline const ssef operator+(const float &a, const ssef &b)
181 {
182  return ssef(a) + b;
183 }
184 
185 __forceinline const ssef operator-(const ssef &a, const ssef &b)
186 {
187  return _mm_sub_ps(a.m128, b.m128);
188 }
189 __forceinline const ssef operator-(const ssef &a, const float &b)
190 {
191  return a - ssef(b);
192 }
193 __forceinline const ssef operator-(const float &a, const ssef &b)
194 {
195  return ssef(a) - b;
196 }
197 
198 __forceinline const ssef operator*(const ssef &a, const ssef &b)
199 {
200  return _mm_mul_ps(a.m128, b.m128);
201 }
202 __forceinline const ssef operator*(const ssef &a, const float &b)
203 {
204  return a * ssef(b);
205 }
206 __forceinline const ssef operator*(const float &a, const ssef &b)
207 {
208  return ssef(a) * b;
209 }
210 
211 __forceinline const ssef operator/(const ssef &a, const ssef &b)
212 {
213  return _mm_div_ps(a.m128, b.m128);
214 }
215 __forceinline const ssef operator/(const ssef &a, const float &b)
216 {
217  return a / ssef(b);
218 }
219 __forceinline const ssef operator/(const float &a, const ssef &b)
220 {
221  return ssef(a) / b;
222 }
223 
224 __forceinline const ssef operator^(const ssef &a, const ssef &b)
225 {
226  return _mm_xor_ps(a.m128, b.m128);
227 }
228 __forceinline const ssef operator^(const ssef &a, const ssei &b)
229 {
230  return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128));
231 }
232 
233 __forceinline const ssef operator&(const ssef &a, const ssef &b)
234 {
235  return _mm_and_ps(a.m128, b.m128);
236 }
237 __forceinline const ssef operator&(const ssef &a, const ssei &b)
238 {
239  return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128));
240 }
241 
242 __forceinline const ssef operator|(const ssef &a, const ssef &b)
243 {
244  return _mm_or_ps(a.m128, b.m128);
245 }
246 __forceinline const ssef operator|(const ssef &a, const ssei &b)
247 {
248  return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128));
249 }
250 
251 __forceinline const ssef andnot(const ssef &a, const ssef &b)
252 {
253  return _mm_andnot_ps(a.m128, b.m128);
254 }
255 
256 __forceinline const ssef min(const ssef &a, const ssef &b)
257 {
258  return _mm_min_ps(a.m128, b.m128);
259 }
260 __forceinline const ssef min(const ssef &a, const float &b)
261 {
262  return _mm_min_ps(a.m128, ssef(b));
263 }
264 __forceinline const ssef min(const float &a, const ssef &b)
265 {
266  return _mm_min_ps(ssef(a), b.m128);
267 }
268 
269 __forceinline const ssef max(const ssef &a, const ssef &b)
270 {
271  return _mm_max_ps(a.m128, b.m128);
272 }
273 __forceinline const ssef max(const ssef &a, const float &b)
274 {
275  return _mm_max_ps(a.m128, ssef(b));
276 }
277 __forceinline const ssef max(const float &a, const ssef &b)
278 {
279  return _mm_max_ps(ssef(a), b.m128);
280 }
281 
282 # if defined(__KERNEL_SSE41__)
283 __forceinline ssef mini(const ssef &a, const ssef &b)
284 {
285  const ssei ai = _mm_castps_si128(a);
286  const ssei bi = _mm_castps_si128(b);
287  const ssei ci = _mm_min_epi32(ai, bi);
288  return _mm_castsi128_ps(ci);
289 }
290 # endif
291 
292 # if defined(__KERNEL_SSE41__)
293 __forceinline ssef maxi(const ssef &a, const ssef &b)
294 {
295  const ssei ai = _mm_castps_si128(a);
296  const ssei bi = _mm_castps_si128(b);
297  const ssei ci = _mm_max_epi32(ai, bi);
298  return _mm_castsi128_ps(ci);
299 }
300 # endif
301 
305 
306 __forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c)
307 {
308 # if defined(__KERNEL_NEON__)
309  return vfmaq_f32(c, a, b);
310 # elif defined(__KERNEL_AVX2__)
311  return _mm_fmadd_ps(a, b, c);
312 # else
313  return a * b + c;
314 # endif
315 }
316 __forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c)
317 {
318 # if defined(__KERNEL_NEON__)
319  return vfmaq_f32(vnegq_f32(c), a, b);
320 # elif defined(__KERNEL_AVX2__)
321  return _mm_fmsub_ps(a, b, c);
322 # else
323  return a * b - c;
324 # endif
325 }
326 __forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c)
327 {
328 # if defined(__KERNEL_NEON__)
329  return vfmsq_f32(c, a, b);
330 # elif defined(__KERNEL_AVX2__)
331  return _mm_fnmadd_ps(a, b, c);
332 # else
333  return c - a * b;
334 # endif
335 }
336 __forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c)
337 {
338 # if defined(__KERNEL_NEON__)
339  return vfmsq_f32(vnegq_f32(c), a, b);
340 # elif defined(__KERNEL_AVX2__)
341  return _mm_fnmsub_ps(a, b, c);
342 # else
343  return -a * b - c;
344 # endif
345 }
346 
350 
351 __forceinline ssef &operator+=(ssef &a, const ssef &b)
352 {
353  return a = a + b;
354 }
355 __forceinline ssef &operator+=(ssef &a, const float &b)
356 {
357  return a = a + b;
358 }
359 
360 __forceinline ssef &operator-=(ssef &a, const ssef &b)
361 {
362  return a = a - b;
363 }
364 __forceinline ssef &operator-=(ssef &a, const float &b)
365 {
366  return a = a - b;
367 }
368 
369 __forceinline ssef &operator*=(ssef &a, const ssef &b)
370 {
371  return a = a * b;
372 }
373 __forceinline ssef &operator*=(ssef &a, const float &b)
374 {
375  return a = a * b;
376 }
377 
378 __forceinline ssef &operator/=(ssef &a, const ssef &b)
379 {
380  return a = a / b;
381 }
382 __forceinline ssef &operator/=(ssef &a, const float &b)
383 {
384  return a = a / b;
385 }
386 
390 
391 __forceinline const sseb operator==(const ssef &a, const ssef &b)
392 {
393  return _mm_cmpeq_ps(a.m128, b.m128);
394 }
395 __forceinline const sseb operator==(const ssef &a, const float &b)
396 {
397  return a == ssef(b);
398 }
399 __forceinline const sseb operator==(const float &a, const ssef &b)
400 {
401  return ssef(a) == b;
402 }
403 
404 __forceinline const sseb operator!=(const ssef &a, const ssef &b)
405 {
406  return _mm_cmpneq_ps(a.m128, b.m128);
407 }
408 __forceinline const sseb operator!=(const ssef &a, const float &b)
409 {
410  return a != ssef(b);
411 }
412 __forceinline const sseb operator!=(const float &a, const ssef &b)
413 {
414  return ssef(a) != b;
415 }
416 
417 __forceinline const sseb operator<(const ssef &a, const ssef &b)
418 {
419  return _mm_cmplt_ps(a.m128, b.m128);
420 }
421 __forceinline const sseb operator<(const ssef &a, const float &b)
422 {
423  return a < ssef(b);
424 }
425 __forceinline const sseb operator<(const float &a, const ssef &b)
426 {
427  return ssef(a) < b;
428 }
429 
430 __forceinline const sseb operator>=(const ssef &a, const ssef &b)
431 {
432  return _mm_cmpnlt_ps(a.m128, b.m128);
433 }
434 __forceinline const sseb operator>=(const ssef &a, const float &b)
435 {
436  return a >= ssef(b);
437 }
438 __forceinline const sseb operator>=(const float &a, const ssef &b)
439 {
440  return ssef(a) >= b;
441 }
442 
443 __forceinline const sseb operator>(const ssef &a, const ssef &b)
444 {
445  return _mm_cmpnle_ps(a.m128, b.m128);
446 }
447 __forceinline const sseb operator>(const ssef &a, const float &b)
448 {
449  return a > ssef(b);
450 }
451 __forceinline const sseb operator>(const float &a, const ssef &b)
452 {
453  return ssef(a) > b;
454 }
455 
456 __forceinline const sseb operator<=(const ssef &a, const ssef &b)
457 {
458  return _mm_cmple_ps(a.m128, b.m128);
459 }
460 __forceinline const sseb operator<=(const ssef &a, const float &b)
461 {
462  return a <= ssef(b);
463 }
464 __forceinline const sseb operator<=(const float &a, const ssef &b)
465 {
466  return ssef(a) <= b;
467 }
468 
469 __forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f)
470 {
471 # ifdef __KERNEL_SSE41__
472  return _mm_blendv_ps(f, t, m);
473 # else
474  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
475 # endif
476 }
477 
478 __forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f)
479 {
480 # ifdef __KERNEL_SSE41__
481  return _mm_blendv_ps(f, t, m);
482 # else
483  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
484 # endif
485 }
486 
487 __forceinline const ssef select(const int mask, const ssef &t, const ssef &f)
488 {
489 # if defined(__KERNEL_SSE41__) && \
490  ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
491  return _mm_blend_ps(f, t, mask);
492 # else
493  return select(sseb(mask), t, f);
494 # endif
495 }
496 
500 
501 # if defined(__KERNEL_SSE41__)
502 __forceinline const ssef round_even(const ssef &a)
503 {
504 # ifdef __KERNEL_NEON__
505  return vrndnq_f32(a);
506 # else
507  return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT);
508 # endif
509 }
510 __forceinline const ssef round_down(const ssef &a)
511 {
512 # ifdef __KERNEL_NEON__
513  return vrndmq_f32(a);
514 # else
515  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
516 # endif
517 }
518 __forceinline const ssef round_up(const ssef &a)
519 {
520 # ifdef __KERNEL_NEON__
521  return vrndpq_f32(a);
522 # else
523  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
524 # endif
525 }
526 __forceinline const ssef round_zero(const ssef &a)
527 {
528 # ifdef __KERNEL_NEON__
529  return vrndq_f32(a);
530 # else
531  return _mm_round_ps(a, _MM_FROUND_TO_ZERO);
532 # endif
533 }
534 __forceinline const ssef floor(const ssef &a)
535 {
536 # ifdef __KERNEL_NEON__
537  return vrndnq_f32(a);
538 # else
539  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
540 # endif
541 }
542 __forceinline const ssef ceil(const ssef &a)
543 {
544 # ifdef __KERNEL_NEON__
545  return vrndpq_f32(a);
546 # else
547  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
548 # endif
549 }
550 # endif
551 
552 __forceinline ssei truncatei(const ssef &a)
553 {
554  return _mm_cvttps_epi32(a.m128);
555 }
556 
557 /* This is about 25% faster than straightforward floor to integer conversion
558  * due to better pipelining.
559  *
560  * Unsaturated add 0xffffffff (a < 0) is the same as subtract -1.
561  */
562 __forceinline ssei floori(const ssef &a)
563 {
564  return truncatei(a) + cast((a < 0.0f).m128);
565 }
566 
567 __forceinline ssef floorfrac(const ssef &x, ssei *i)
568 {
569  *i = floori(x);
570  return x - ssef(*i);
571 }
572 
576 
577 __forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t)
578 {
579  return madd(t, b, (ssef(1.0f) - t) * a);
580 }
581 
585 
586 __forceinline ssef unpacklo(const ssef &a, const ssef &b)
587 {
588  return _mm_unpacklo_ps(a.m128, b.m128);
589 }
590 __forceinline ssef unpackhi(const ssef &a, const ssef &b)
591 {
592  return _mm_unpackhi_ps(a.m128, b.m128);
593 }
594 
595 template<size_t i0, size_t i1, size_t i2, size_t i3>
596 __forceinline const ssef shuffle(const ssef &b)
597 {
598 # ifdef __KERNEL_NEON__
599  return shuffle_neon<ssef, i0, i1, i2, i3>(b.m128);
600 # else
601  return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
602 # endif
603 }
604 
605 template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a)
606 {
607  return _mm_movelh_ps(a, a);
608 }
609 
610 template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a)
611 {
612  return _mm_movehl_ps(a, a);
613 }
614 
615 template<size_t i0, size_t i1, size_t i2, size_t i3>
616 __forceinline const ssef shuffle(const ssef &a, const ssef &b)
617 {
618 # ifdef __KERNEL_NEON__
619  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(a, b);
620 # else
621  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
622 # endif
623 }
624 
625 template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
626 {
627 # ifdef __KERNEL_NEON__
628  return shuffle<float32x4_t, i0, i0, i0, i0>(a, b);
629 # else
630  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
631 # endif
632 }
633 
634 # ifndef __KERNEL_NEON__
635 template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b)
636 {
637  return _mm_movelh_ps(a, b);
638 }
639 
640 template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b)
641 {
642  return _mm_movehl_ps(b, a);
643 }
644 # endif
645 
646 # if defined(__KERNEL_SSSE3__)
647 __forceinline const ssef shuffle8(const ssef &a, const ssei &shuf)
648 {
649  return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf));
650 }
651 # endif
652 
653 # if defined(__KERNEL_SSE3__)
654 template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b)
655 {
656  return _mm_moveldup_ps(b);
657 }
658 template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b)
659 {
660  return _mm_movehdup_ps(b);
661 }
662 # endif
663 
664 template<size_t i0> __forceinline const ssef shuffle(const ssef &b)
665 {
666  return shuffle<i0, i0, i0, i0>(b);
667 }
668 
669 # if defined(__KERNEL_AVX__)
670 __forceinline const ssef shuffle(const ssef &a, const ssei &shuf)
671 {
672  return _mm_permutevar_ps(a, shuf);
673 }
674 # endif
675 
676 template<size_t i> __forceinline float extract(const ssef &a)
677 {
678  return _mm_cvtss_f32(shuffle<i, i, i, i>(a));
679 }
680 template<> __forceinline float extract<0>(const ssef &a)
681 {
682  return _mm_cvtss_f32(a);
683 }
684 
685 # if defined(__KERNEL_SSE41__)
686 template<size_t dst, size_t src, size_t clr>
687 __forceinline const ssef insert(const ssef &a, const ssef &b)
688 {
689 # ifdef __KERNEL_NEON__
690  ssef res = a;
691  if (clr)
692  res[dst] = 0;
693  else
694  res[dst] = b[src];
695  return res;
696 # else
697  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
698 # endif
699 }
700 template<size_t dst, size_t src> __forceinline const ssef insert(const ssef &a, const ssef &b)
701 {
702  return insert<dst, src, 0>(a, b);
703 }
704 template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
705 {
706  return insert<dst, 0>(a, _mm_set_ss(b));
707 }
708 # else
709 template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
710 {
711  ssef c = a;
712  c[dst] = b;
713  return c;
714 }
715 # endif
716 
720 
721 __forceinline void transpose(const ssef &r0,
722  const ssef &r1,
723  const ssef &r2,
724  const ssef &r3,
725  ssef &c0,
726  ssef &c1,
727  ssef &c2,
728  ssef &c3)
729 {
730  ssef l02 = unpacklo(r0, r2);
731  ssef h02 = unpackhi(r0, r2);
732  ssef l13 = unpacklo(r1, r3);
733  ssef h13 = unpackhi(r1, r3);
734  c0 = unpacklo(l02, l13);
735  c1 = unpackhi(l02, l13);
736  c2 = unpacklo(h02, h13);
737  c3 = unpackhi(h02, h13);
738 }
739 
741  const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2)
742 {
743  ssef l02 = unpacklo(r0, r2);
744  ssef h02 = unpackhi(r0, r2);
745  ssef l13 = unpacklo(r1, r3);
746  ssef h13 = unpackhi(r1, r3);
747  c0 = unpacklo(l02, l13);
748  c1 = unpackhi(l02, l13);
749  c2 = unpacklo(h02, h13);
750 }
751 
755 
756 __forceinline const ssef vreduce_min(const ssef &v)
757 {
758 # ifdef __KERNEL_NEON__
759  return vdupq_n_f32(vminvq_f32(v));
760 # else
761  ssef h = min(shuffle<1, 0, 3, 2>(v), v);
762  return min(shuffle<2, 3, 0, 1>(h), h);
763 # endif
764 }
765 __forceinline const ssef vreduce_max(const ssef &v)
766 {
767 # ifdef __KERNEL_NEON__
768  return vdupq_n_f32(vmaxvq_f32(v));
769 # else
770  ssef h = max(shuffle<1, 0, 3, 2>(v), v);
771  return max(shuffle<2, 3, 0, 1>(h), h);
772 # endif
773 }
774 __forceinline const ssef vreduce_add(const ssef &v)
775 {
776 # ifdef __KERNEL_NEON__
777  return vdupq_n_f32(vaddvq_f32(v));
778 # else
779  ssef h = shuffle<1, 0, 3, 2>(v) + v;
780  return shuffle<2, 3, 0, 1>(h) + h;
781 # endif
782 }
783 
784 __forceinline float reduce_min(const ssef &v)
785 {
786 # ifdef __KERNEL_NEON__
787  return vminvq_f32(v);
788 # else
789  return _mm_cvtss_f32(vreduce_min(v));
790 # endif
791 }
792 __forceinline float reduce_max(const ssef &v)
793 {
794 # ifdef __KERNEL_NEON__
795  return vmaxvq_f32(v);
796 # else
797  return _mm_cvtss_f32(vreduce_max(v));
798 # endif
799 }
800 __forceinline float reduce_add(const ssef &v)
801 {
802 # ifdef __KERNEL_NEON__
803  return vaddvq_f32(v);
804 # else
805  return _mm_cvtss_f32(vreduce_add(v));
806 # endif
807 }
808 
809 __forceinline uint32_t select_min(const ssef &v)
810 {
811  return __bsf(movemask(v == vreduce_min(v)));
812 }
813 __forceinline uint32_t select_max(const ssef &v)
814 {
815  return __bsf(movemask(v == vreduce_max(v)));
816 }
817 
818 __forceinline uint32_t select_min(const sseb &valid, const ssef &v)
819 {
820  const ssef a = select(valid, v, ssef(pos_inf));
821  return __bsf(movemask(valid & (a == vreduce_min(a))));
822 }
823 __forceinline uint32_t select_max(const sseb &valid, const ssef &v)
824 {
825  const ssef a = select(valid, v, ssef(neg_inf));
826  return __bsf(movemask(valid & (a == vreduce_max(a))));
827 }
828 
829 __forceinline uint32_t movemask(const ssef &a)
830 {
831  return _mm_movemask_ps(a);
832 }
833 
837 
838 __forceinline ssef load4f(const float4 &a)
839 {
840 # ifdef __KERNEL_WITH_SSE_ALIGN__
841  return _mm_load_ps(&a.x);
842 # else
843  return _mm_loadu_ps(&a.x);
844 # endif
845 }
846 
847 __forceinline ssef load4f(const float3 &a)
848 {
849 # ifdef __KERNEL_WITH_SSE_ALIGN__
850  return _mm_load_ps(&a.x);
851 # else
852  return _mm_loadu_ps(&a.x);
853 # endif
854 }
855 
856 __forceinline ssef load4f(const void *const a)
857 {
858  return _mm_load_ps((float *)a);
859 }
860 
861 __forceinline ssef load1f_first(const float a)
862 {
863  return _mm_set_ss(a);
864 }
865 
866 __forceinline void store4f(void *ptr, const ssef &v)
867 {
868  _mm_store_ps((float *)ptr, v);
869 }
870 
871 __forceinline ssef loadu4f(const void *const a)
872 {
873  return _mm_loadu_ps((float *)a);
874 }
875 
876 __forceinline void storeu4f(void *ptr, const ssef &v)
877 {
878  _mm_storeu_ps((float *)ptr, v);
879 }
880 
881 __forceinline void store4f(const sseb &mask, void *ptr, const ssef &f)
882 {
883 # if defined(__KERNEL_AVX__)
884  _mm_maskstore_ps((float *)ptr, (__m128i)mask, f);
885 # else
886  *(ssef *)ptr = select(mask, f, *(ssef *)ptr);
887 # endif
888 }
889 
890 __forceinline ssef load4f_nt(void *ptr)
891 {
892 # if defined(__KERNEL_SSE41__)
893  return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr));
894 # else
895  return _mm_load_ps((float *)ptr);
896 # endif
897 }
898 
899 __forceinline void store4f_nt(void *ptr, const ssef &v)
900 {
901 # if defined(__KERNEL_SSE41__)
902  _mm_stream_ps((float *)ptr, v);
903 # else
904  _mm_store_ps((float *)ptr, v);
905 # endif
906 }
907 
911 
912 __forceinline float dot(const ssef &a, const ssef &b)
913 {
914  return reduce_add(a * b);
915 }
916 
917 /* calculate shuffled cross product, useful when order of components does not matter */
918 __forceinline ssef cross_zxy(const ssef &a, const ssef &b)
919 {
920  const ssef a0 = a;
921  const ssef b0 = shuffle<1, 2, 0, 3>(b);
922  const ssef a1 = shuffle<1, 2, 0, 3>(a);
923  const ssef b1 = b;
924  return msub(a0, b0, a1 * b1);
925 }
926 
927 __forceinline ssef cross(const ssef &a, const ssef &b)
928 {
929  return shuffle<1, 2, 0, 3>(cross_zxy(a, b));
930 }
931 
932 ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b)
933 {
934 # ifdef __KERNEL_SSE41__
935  return _mm_dp_ps(a.m128, b.m128, 0x7f);
936 # else
937  ssef t = a * b;
938  return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]);
939 # endif
940 }
941 
942 /* squared length taking only specified axes into account */
943 template<size_t X, size_t Y, size_t Z, size_t W> ccl_device_inline float len_squared(const ssef &a)
944 {
945 # ifndef __KERNEL_SSE41__
946  float4 &t = (float4 &)a;
947  return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) +
948  (W ? t.w * t.w : 0.0f);
949 # else
950  return extract<0>(
951  ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf)));
952 # endif
953 }
954 
955 ccl_device_inline float dot3(const ssef &a, const ssef &b)
956 {
957 # ifdef __KERNEL_SSE41__
958  return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f)));
959 # else
960  ssef t = a * b;
961  return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
962 # endif
963 }
964 
965 ccl_device_inline const ssef len3_squared_splat(const ssef &a)
966 {
967  return dot3_splat(a, a);
968 }
969 
970 ccl_device_inline float len3_squared(const ssef &a)
971 {
972  return dot3(a, a);
973 }
974 
975 ccl_device_inline float len3(const ssef &a)
976 {
977  return extract<0>(mm_sqrt(dot3_splat(a, a)));
978 }
979 
980 /* SSE shuffle utility functions */
981 
982 # ifdef __KERNEL_SSSE3__
983 
984 /* faster version for SSSE3 */
985 typedef ssei shuffle_swap_t;
986 
987 ccl_device_inline shuffle_swap_t shuffle_swap_identity()
988 {
989  return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
990 }
991 
992 ccl_device_inline shuffle_swap_t shuffle_swap_swap()
993 {
994  return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
995 }
996 
997 ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf)
998 {
999  return cast(_mm_shuffle_epi8(cast(a), shuf));
1000 }
1001 
1002 # else
1003 
1004 /* somewhat slower version for SSE2 */
1005 typedef int shuffle_swap_t;
1006 
1007 ccl_device_inline shuffle_swap_t shuffle_swap_identity()
1008 {
1009  return 0;
1010 }
1011 
1012 ccl_device_inline shuffle_swap_t shuffle_swap_swap()
1013 {
1014  return 1;
1015 }
1016 
1017 ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf)
1018 {
1019  /* shuffle value must be a constant, so we need to branch */
1020  if (shuf)
1021  return shuffle<1, 0, 3, 2>(a);
1022  else
1023  return shuffle<3, 2, 1, 0>(a);
1024 }
1025 
1026 # endif
1027 
1028 # if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__)
1029 
1030 ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
1031  const shuffle_swap_t &shuf_identity,
1032  const shuffle_swap_t &shuf_swap,
1033  const float3 &idir,
1034  ssef idirsplat[3],
1035  shuffle_swap_t shufflexyz[3])
1036 {
1037  const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)};
1038  idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn);
1039  idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn);
1040  idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn);
1041 
1042  const ssef signmask = cast(ssei(0x80000000));
1043  const ssef shuf_identity_f = cast(shuf_identity);
1044  const ssef shuf_swap_f = cast(shuf_swap);
1045 
1046  shufflexyz[0] = _mm_castps_si128(
1047  _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask)));
1048  shufflexyz[1] = _mm_castps_si128(
1049  _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask)));
1050  shufflexyz[2] = _mm_castps_si128(
1051  _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask)));
1052 }
1053 
1054 # else
1055 
1056 ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
1057  const shuffle_swap_t &shuf_identity,
1058  const shuffle_swap_t &shuf_swap,
1059  const float3 &idir,
1060  ssef idirsplat[3],
1061  shuffle_swap_t shufflexyz[3])
1062 {
1063  idirsplat[0] = ssef(idir.x) ^ pn;
1064  idirsplat[1] = ssef(idir.y) ^ pn;
1065  idirsplat[2] = ssef(idir.z) ^ pn;
1066 
1067  shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap;
1068  shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap;
1069  shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap;
1070 }
1071 
1072 # endif
1073 
1074 ccl_device_inline const ssef uint32_to_float(const ssei &in)
1075 {
1076  ssei a = _mm_srli_epi32(in, 16);
1077  ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff));
1078  ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000));
1079  ssef d = _mm_cvtepi32_ps(b);
1080  ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000)));
1081  return _mm_add_ps(e, d);
1082 }
1083 
1084 template<size_t S1, size_t S2, size_t S3, size_t S4>
1085 ccl_device_inline const ssef set_sign_bit(const ssef &a)
1086 {
1087  return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31));
1088 }
1089 
1093 
1094 ccl_device_inline void print_ssef(const char *label, const ssef &a)
1095 {
1096  printf(
1097  "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]);
1098 }
1099 
1100 #endif
1101 
1103 
1104 #endif
void BLI_kdtree_nd_() insert(KDTree *tree, int index, const float co[KD_DIMS]) ATTR_NONNULL(1
struct Mask Mask
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
#define X
Definition: GeomUtils.cpp:213
#define Z
Definition: GeomUtils.cpp:215
#define Y
Definition: GeomUtils.cpp:214
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Separate Vector White RGB Map Separate Set Z Dilate Combine Combine Color Channel Split ID Combine Luminance Directional Alpha Distance Hue Movie Ellipse Bokeh View Corner Anti Mix RGB Hue Separate TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC TEX_NODE_PROC Boolean Random Float
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
btMatrix3x3 transpose() const
Return the transpose of the matrix.
SIMD_FORCE_INLINE btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:157
__forceinline float extract(const int4 &b)
Definition: bvh_binning.cpp:45
const char * label
bool operator==(const GeometrySet &UNUSED(a), const GeometrySet &UNUSED(b))
static void shuffle(float2 points[], int size, int rng_seed)
Definition: jitter.cpp:243
#define ccl_device_inline
#define CCL_NAMESPACE_END
#define rcp(x)
U * cast(T *in)
Definition: Cast.h:27
Matrix< T, M, N > operator-(const Matrix< T, M, N > &m1, const Matrix< T, M, N > &m2)
Definition: VecMat.h:922
Matrix< T, M, N > operator/(const Matrix< T, M, N > &m1, const typename Matrix< T, M, N >::value_type lambda)
Definition: VecMat.h:948
Vec< T, N > operator*(const typename Vec< T, N >::value_type r, const Vec< T, N > &v)
Definition: VecMat.h:858
static unsigned c
Definition: RandGen.cpp:97
static unsigned a[3]
Definition: RandGen.cpp:92
double sign(double arg)
Definition: utility.h:250
GPUState operator^(const GPUState &a, const GPUState &b)
constexpr bool operator!=(StringRef a, StringRef b)
constexpr bool operator>=(StringRef a, StringRef b)
constexpr bool operator<(StringRef a, StringRef b)
constexpr bool operator<=(StringRef a, StringRef b)
constexpr bool operator>(StringRef a, StringRef b)
std::string operator+(StringRef a, StringRef b)
#define min(a, b)
Definition: sort.c:51
unsigned int uint32_t
Definition: stdint.h:83
float z
Definition: sky_float3.h:35
float y
Definition: sky_float3.h:35
float x
Definition: sky_float3.h:35
float max
__forceinline uint32_t movemask(const avxb &a)
Definition: util_avxb.h:227
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
Definition: util_avxb.h:113
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
Definition: util_avxb.h:180
__forceinline const avxb operator|(const avxb &a, const avxb &b)
Definition: util_avxb.h:117
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: util_avxb.h:167
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
Definition: util_avxb.h:184
__forceinline avxf maxi(const avxf &a, const avxf &b)
Definition: util_avxf.h:318
__forceinline avxf cross(const avxf &a, const avxf &b)
Definition: util_avxf.h:119
__forceinline avxf mini(const avxf &a, const avxf &b)
Definition: util_avxf.h:324
__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
Ternary Operators.
Definition: util_avxf.h:334
__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
Definition: util_avxf.h:351
ccl_device_inline const avxf set_sign_bit(const avxf &a)
Definition: util_avxf.h:299
__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
Definition: util_avxf.h:343
__forceinline float extract< 0 >(const avxf &a)
Definition: util_avxf.h:272
__forceinline const avxi shuffle< 0, 0, 2, 2 >(const avxi &b)
Definition: util_avxi.h:625
__forceinline avxi & operator-=(avxi &a, const avxi &b)
Definition: util_avxi.h:407
__forceinline const avxi shuffle< 0, 1, 0, 1 >(const avxi &b)
Definition: util_avxi.h:633
__forceinline int reduce_max(const avxi &v)
Definition: util_avxi.h:705
__forceinline uint32_t select_max(const avxi &v)
Definition: util_avxi.h:718
__forceinline const avxi vreduce_add(const avxi &v)
Definition: util_avxi.h:695
__forceinline const avxi abs(const avxi &a)
Definition: util_avxi.h:186
__forceinline int reduce_min(const avxi &v)
Definition: util_avxi.h:701
__forceinline const avxi shuffle< 1, 1, 3, 3 >(const avxi &b)
Definition: util_avxi.h:629
__forceinline const avxi vreduce_min(const avxi &v)
Definition: util_avxi.h:665
__forceinline const avxi broadcast(const int *ptr)
Definition: util_avxi.h:639
__forceinline avxi & operator+=(avxi &a, const avxi &b)
Assignment Operators.
Definition: util_avxi.h:398
__forceinline const avxi vreduce_max(const avxi &v)
Definition: util_avxi.h:680
__forceinline int reduce_add(const avxi &v)
Definition: util_avxi.h:709
__forceinline uint32_t select_min(const avxi &v)
Definition: util_avxi.h:714
__forceinline avxi & operator*=(avxi &a, const avxi &b)
Definition: util_avxi.h:416
#define __forceinline
Definition: util_defines.h:71
#define mix(a, b, c)
Definition: util_hash.h:30
ccl_device_inline float sqr(float a)
Definition: util_math.h:651
ccl_device_inline float floorfrac(float x, int *i)
Definition: util_math.h:336
ccl_device_inline float dot(const float2 &a, const float2 &b)
ccl_device_inline float2 operator/=(float2 &a, const float2 &b)
ccl_device_inline float2 floor(const float2 &a)
ccl_device_inline float3 ceil(const float3 &a)
ccl_device_inline float len_squared(const float3 a)
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
#define dot3(a, b)
CCL_NAMESPACE_BEGIN __forceinline uint32_t __bsf(const uint32_t x)
Definition: util_simd.h:367
ccl_device_inline size_t round_down(size_t x, size_t multiple)
Definition: util_types.h:80
ccl_device_inline size_t round_up(size_t x, size_t multiple)
Definition: util_types.h:75
PointerRNA * ptr
Definition: wm_files.c:3157