17 #ifndef __KERNEL_COMPAT_CUDA_H__
18 #define __KERNEL_COMPAT_CUDA_H__
20 #define __KERNEL_GPU__
21 #define __KERNEL_CUDA__
22 #define CCL_NAMESPACE_BEGIN
23 #define CCL_NAMESPACE_END
26 #ifndef __NODES_MAX_GROUP__
27 # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
29 #ifndef __NODES_FEATURES__
30 # define __NODES_FEATURES__ NODE_FEATURE_ALL
41 typedef unsigned short half;
44 #ifdef CYCLES_CUBIN_CC
45 # define FLT_MIN 1.175494350822287507969e-38f
46 # define FLT_MAX 340282346638528859811704183484516925440.0f
47 # define FLT_EPSILON 1.192092896e-07F
53 asm(
"{ cvt.rn.f16.f32 %0, %1;}\n" :
"=h"(val) :
"f"(f));
59 #define ccl_device __device__ __inline__
60 #if __CUDA_ARCH__ < 500
61 # define ccl_device_inline __device__ __forceinline__
62 # define ccl_device_forceinline __device__ __forceinline__
64 # define ccl_device_inline __device__ __inline__
65 # define ccl_device_forceinline __device__ __forceinline__
67 #define ccl_device_noinline __device__ __noinline__
68 #define ccl_device_noinline_cpu ccl_device
70 #define ccl_static_constant __constant__
71 #define ccl_constant const
72 #define ccl_local __shared__
73 #define ccl_local_param
76 #define ccl_addr_space
77 #define ccl_restrict __restrict__
78 #define ccl_loop_no_unroll
83 #define ccl_align(n) __align__(n)
84 #define ccl_optional_struct_init
86 #define ATTR_FALLTHROUGH
88 #define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH)
92 #define kernel_assert(cond)
115 #define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d))
131 #define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d))
164 #define kernel_tex_fetch(t, index) t[(index)]
165 #define kernel_tex_array(t) (t)
167 #define kernel_data __data
171 #define cosf(x) __cosf(((float)(x)))
172 #define sinf(x) __sinf(((float)(x)))
173 #define powf(x, y) __powf(((float)(x)), ((float)(y)))
174 #define tanf(x) __tanf(((float)(x)))
175 #define logf(x) __logf(((float)(x)))
176 #define expf(x) __expf(((float)(x)))
__device__ half __float2half(const float f)
ccl_device_inline uint ccl_local_id(uint d)
#define ccl_device_inline
ccl_device_inline uint ccl_local_size(uint d)
ccl_device_inline uint ccl_group_id(uint d)
ccl_device_inline uint ccl_num_groups(uint d)
unsigned long long CUtexObject
unsigned __int64 uint64_t