Blender  V2.93
kernel_compat_cuda.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __KERNEL_COMPAT_CUDA_H__
18 #define __KERNEL_COMPAT_CUDA_H__
19 
20 #define __KERNEL_GPU__
21 #define __KERNEL_CUDA__
22 #define CCL_NAMESPACE_BEGIN
23 #define CCL_NAMESPACE_END
24 
25 /* Selective nodes compilation. */
26 #ifndef __NODES_MAX_GROUP__
27 # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
28 #endif
29 #ifndef __NODES_FEATURES__
30 # define __NODES_FEATURES__ NODE_FEATURE_ALL
31 #endif
32 
33 /* Manual definitions so we can compile without CUDA toolkit. */
34 
35 #ifdef __CUDACC_RTC__
36 typedef unsigned int uint32_t;
37 typedef unsigned long long uint64_t;
38 #else
39 # include <stdint.h>
40 #endif
41 typedef unsigned short half;
42 typedef unsigned long long CUtexObject;
43 
44 #ifdef CYCLES_CUBIN_CC
45 # define FLT_MIN 1.175494350822287507969e-38f
46 # define FLT_MAX 340282346638528859811704183484516925440.0f
47 # define FLT_EPSILON 1.192092896e-07F
48 #endif
49 
50 __device__ half __float2half(const float f)
51 {
52  half val;
53  asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
54  return val;
55 }
56 
57 /* Qualifier wrappers for different names on different devices */
58 
59 #define ccl_device __device__ __inline__
60 #if __CUDA_ARCH__ < 500
61 # define ccl_device_inline __device__ __forceinline__
62 # define ccl_device_forceinline __device__ __forceinline__
63 #else
64 # define ccl_device_inline __device__ __inline__
65 # define ccl_device_forceinline __device__ __forceinline__
66 #endif
67 #define ccl_device_noinline __device__ __noinline__
68 #define ccl_device_noinline_cpu ccl_device
69 #define ccl_global
70 #define ccl_static_constant __constant__
71 #define ccl_constant const
72 #define ccl_local __shared__
73 #define ccl_local_param
74 #define ccl_private
75 #define ccl_may_alias
76 #define ccl_addr_space
77 #define ccl_restrict __restrict__
78 #define ccl_loop_no_unroll
79 /* TODO(sergey): In theory we might use references with CUDA, however
80  * performance impact yet to be investigated.
81  */
82 #define ccl_ref
83 #define ccl_align(n) __align__(n)
84 #define ccl_optional_struct_init
85 
86 #define ATTR_FALLTHROUGH
87 
88 #define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH)
89 
90 /* No assert supported for CUDA */
91 
92 #define kernel_assert(cond)
93 
94 /* Types */
95 
96 #include "util/util_half.h"
97 #include "util/util_types.h"
98 
99 /* Work item functions */
100 
102 {
103  switch (d) {
104  case 0:
105  return threadIdx.x;
106  case 1:
107  return threadIdx.y;
108  case 2:
109  return threadIdx.z;
110  default:
111  return 0;
112  }
113 }
114 
115 #define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d))
116 
118 {
119  switch (d) {
120  case 0:
121  return blockDim.x;
122  case 1:
123  return blockDim.y;
124  case 2:
125  return blockDim.z;
126  default:
127  return 0;
128  }
129 }
130 
131 #define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d))
132 
134 {
135  switch (d) {
136  case 0:
137  return blockIdx.x;
138  case 1:
139  return blockIdx.y;
140  case 2:
141  return blockIdx.z;
142  default:
143  return 0;
144  }
145 }
146 
148 {
149  switch (d) {
150  case 0:
151  return gridDim.x;
152  case 1:
153  return gridDim.y;
154  case 2:
155  return gridDim.z;
156  default:
157  return 0;
158  }
159 }
160 
161 /* Textures */
162 
163 /* Use arrays for regular data. */
164 #define kernel_tex_fetch(t, index) t[(index)]
165 #define kernel_tex_array(t) (t)
166 
167 #define kernel_data __data
168 
169 /* Use fast math functions */
170 
171 #define cosf(x) __cosf(((float)(x)))
172 #define sinf(x) __sinf(((float)(x)))
173 #define powf(x, y) __powf(((float)(x)), ((float)(y)))
174 #define tanf(x) __tanf(((float)(x)))
175 #define logf(x) __logf(((float)(x)))
176 #define expf(x) __expf(((float)(x)))
177 
178 #endif /* __KERNEL_COMPAT_CUDA_H__ */
unsigned int uint
Definition: BLI_sys_types.h:83
Definition: util_half.h:41
__device__ half __float2half(const float f)
unsigned short half
ccl_device_inline uint ccl_local_id(uint d)
#define ccl_device_inline
ccl_device_inline uint ccl_local_size(uint d)
ccl_device_inline uint ccl_group_id(uint d)
ccl_device_inline uint ccl_num_groups(uint d)
unsigned long long CUtexObject
unsigned int uint32_t
Definition: stdint.h:83
unsigned __int64 uint64_t
Definition: stdint.h:93