Blender  V2.93
kernel_compat_cpu.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __KERNEL_COMPAT_CPU_H__
18 #define __KERNEL_COMPAT_CPU_H__
19 
20 #define __KERNEL_CPU__
21 
22 /* Release kernel has too much false-positive maybe-uninitialized warnings,
23  * which makes it possible to miss actual warnings.
24  */
25 #if (defined(__GNUC__) && !defined(__clang__)) && defined(NDEBUG)
26 # pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
27 # pragma GCC diagnostic ignored "-Wuninitialized"
28 #endif
29 
30 /* Selective nodes compilation. */
31 #ifndef __NODES_MAX_GROUP__
32 # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
33 #endif
34 #ifndef __NODES_FEATURES__
35 # define __NODES_FEATURES__ NODE_FEATURE_ALL
36 #endif
37 
38 #include "util/util_half.h"
39 #include "util/util_math.h"
40 #include "util/util_simd.h"
41 #include "util/util_texture.h"
42 #include "util/util_types.h"
43 
44 #define ccl_addr_space
45 
46 #define ccl_local_id(d) 0
47 #define ccl_global_id(d) (kg->global_id[d])
48 
49 #define ccl_local_size(d) 1
50 #define ccl_global_size(d) (kg->global_size[d])
51 
52 #define ccl_group_id(d) ccl_global_id(d)
53 #define ccl_num_groups(d) ccl_global_size(d)
54 
55 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
56  * much slower than the double version. This was fixed in glibc 2.16.
57  */
58 #if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
59  defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
60  (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
61 # define expf(x) ((float)exp((double)(x)))
62 #endif
63 
65 
66 /* Assertions inside the kernel only work for the CPU device, so we wrap it in
67  * a macro which is empty for other devices */
68 
69 #define kernel_assert(cond) assert(cond)
70 
71 /* Texture types to be compatible with CUDA textures. These are really just
72  * simple arrays and after inlining fetch hopefully revert to being a simple
73  * pointer lookup. */
74 template<typename T> struct texture {
75  ccl_always_inline const T &fetch(int index)
76  {
77  kernel_assert(index >= 0 && index < width);
78  return data[index];
79  }
80 #if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
81  /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
82  * compatibility with existing indices and data structures.
83  */
84  ccl_always_inline avxf fetch_avxf(const int index)
85  {
86  kernel_assert(index >= 0 && (index + 1) < width);
87  ssef *ssef_data = (ssef *)data;
88  ssef *ssef_node_data = &ssef_data[index];
89  return _mm256_loadu_ps((float *)ssef_node_data);
90  }
91 #endif
92 
93 #ifdef __KERNEL_SSE2__
94  ccl_always_inline ssef fetch_ssef(int index)
95  {
96  kernel_assert(index >= 0 && index < width);
97  return ((ssef *)data)[index];
98  }
99 
100  ccl_always_inline ssei fetch_ssei(int index)
101  {
102  kernel_assert(index >= 0 && index < width);
103  return ((ssei *)data)[index];
104  }
105 #endif
106 
107  T *data;
108  int width;
109 };
110 
111 /* Macros to handle different memory storage on different devices */
112 
113 #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
114 #define kernel_tex_fetch_avxf(tex, index) (kg->tex.fetch_avxf(index))
115 #define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
116 #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
117 #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
118 #define kernel_tex_array(tex) (kg->tex.data)
119 
120 #define kernel_data (kg->__data)
121 
122 #ifdef __KERNEL_SSE2__
123 typedef vector3<sseb> sse3b;
124 typedef vector3<ssef> sse3f;
125 typedef vector3<ssei> sse3i;
126 
127 ccl_device_inline void print_sse3b(const char *label, sse3b &a)
128 {
129  print_sseb(label, a.x);
130  print_sseb(label, a.y);
131  print_sseb(label, a.z);
132 }
133 
134 ccl_device_inline void print_sse3f(const char *label, sse3f &a)
135 {
136  print_ssef(label, a.x);
137  print_ssef(label, a.y);
138  print_ssef(label, a.z);
139 }
140 
141 ccl_device_inline void print_sse3i(const char *label, sse3i &a)
142 {
143  print_ssei(label, a.x);
144  print_ssei(label, a.y);
145  print_ssei(label, a.z);
146 }
147 
148 # if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
149 typedef vector3<avxf> avx3f;
150 # endif
151 
152 #endif
153 
155 
156 #endif /* __KERNEL_COMPAT_CPU_H__ */
const char * label
#define kernel_assert(cond)
#define ccl_device_inline
#define CCL_NAMESPACE_END
#define T
static unsigned a[3]
Definition: RandGen.cpp:92
Definition: util_avxf.h:24
ccl_always_inline const T & fetch(int index)
#define ccl_always_inline
Definition: util_defines.h:75