17 #ifndef __UTIL_HALF_H__
18 #define __UTIL_HALF_H__
23 #if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
31 #ifdef __KERNEL_OPENCL__
33 # define float4_store_half(h, f, scale) vstore_half4(f *(scale), 0, h);
38 # ifndef __KERNEL_CUDA__
46 half(
const unsigned short &i) : v(i)
49 operator unsigned short()
68 # ifdef __KERNEL_CUDA__
82 # ifndef __KERNEL_SSE2__
83 for (
int i = 0; i < 4; i++) {
90 float fscale = f[i] * scale;
91 in.f = (fscale > 0.0f) ? ((fscale < 65504.0f) ? fscale : 65504.0f) : 0.0f;
97 int rshift = (
result >> 13);
99 h[i] = (rshift & 0x7FFF);
103 ssef fscale = load4f(f) * scale;
104 ssef
x =
min(
max(fscale, 0.0f), 65504.0f);
106 # ifdef __KERNEL_AVX2__
107 ssei rpack = _mm_cvtps_ph(
x, 0);
112 ssei rshift = (
result >> 13) & 0x7FFF;
113 ssei rpack = _mm_packs_epi32(rshift, rshift);
116 _mm_storel_pi((__m64 *)h, _mm_castsi128_ps(rpack));
124 *((
int *)&f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
145 uint sign_bit = u & 0x80000000;
148 uint exponent_bits = u & 0x7f800000;
150 uint value_bits = u & 0x7fffffff;
152 value_bits -= 0x1c000;
154 value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
156 value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
158 value_bits = (exponent_bits == 0 ? 0 : value_bits);
160 return (value_bits | sign_bit);
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
half(const unsigned short &i)
half & operator=(const unsigned short &i)
__device__ half __float2half(const float f)
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline float4 half4_to_float4(half4 h)
ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
ccl_device_inline half float_to_half(float f)
ccl_device_inline float half_to_float(half h)
ccl_device_inline uint __float_as_uint(float f)