Blender
V2.93
intern
cycles
kernel
kernel_compat_cpu.h
Go to the documentation of this file.
1
/*
2
* Copyright 2011-2013 Blender Foundation
3
*
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
7
*
8
* http://www.apache.org/licenses/LICENSE-2.0
9
*
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
15
*/
16
17
#ifndef __KERNEL_COMPAT_CPU_H__
18
#define __KERNEL_COMPAT_CPU_H__
19
20
#define __KERNEL_CPU__
21
22
/* Release kernel has too much false-positive maybe-uninitialized warnings,
23
* which makes it possible to miss actual warnings.
24
*/
25
#if (defined(__GNUC__) && !defined(__clang__)) && defined(NDEBUG)
26
# pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
27
# pragma GCC diagnostic ignored "-Wuninitialized"
28
#endif
29
30
/* Selective nodes compilation. */
31
#ifndef __NODES_MAX_GROUP__
32
# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
33
#endif
34
#ifndef __NODES_FEATURES__
35
# define __NODES_FEATURES__ NODE_FEATURE_ALL
36
#endif
37
38
#include "
util/util_half.h
"
39
#include "
util/util_math.h
"
40
#include "
util/util_simd.h
"
41
#include "
util/util_texture.h
"
42
#include "
util/util_types.h
"
43
44
#define ccl_addr_space
45
46
#define ccl_local_id(d) 0
47
#define ccl_global_id(d) (kg->global_id[d])
48
49
#define ccl_local_size(d) 1
50
#define ccl_global_size(d) (kg->global_size[d])
51
52
#define ccl_group_id(d) ccl_global_id(d)
53
#define ccl_num_groups(d) ccl_global_size(d)
54
55
/* On x86_64, versions of glibc < 2.16 have an issue where expf is
56
* much slower than the double version. This was fixed in glibc 2.16.
57
*/
58
#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \
59
defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
60
(__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16)
61
# define expf(x) ((float)exp((double)(x)))
62
#endif
63
64
CCL_NAMESPACE_BEGIN
65
66
/* Assertions inside the kernel only work for the CPU device, so we wrap it in
67
* a macro which is empty for other devices */
68
69
#define kernel_assert(cond) assert(cond)
70
71
/* Texture types to be compatible with CUDA textures. These are really just
72
* simple arrays and after inlining fetch hopefully revert to being a simple
73
* pointer lookup. */
74
template
<
typename
T>
struct
texture
{
75
ccl_always_inline
const
T
&
fetch
(
int
index)
76
{
77
kernel_assert
(index >= 0 && index <
width
);
78
return
data
[index];
79
}
80
#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
81
/* Reads 256 bytes but indexes in blocks of 128 bytes to maintain
82
* compatibility with existing indices and data structures.
83
*/
84
ccl_always_inline
avxf
fetch_avxf(
const
int
index)
85
{
86
kernel_assert
(index >= 0 && (index + 1) <
width
);
87
ssef *ssef_data = (ssef *)
data
;
88
ssef *ssef_node_data = &ssef_data[index];
89
return
_mm256_loadu_ps((
float
*)ssef_node_data);
90
}
91
#endif
92
93
#ifdef __KERNEL_SSE2__
94
ccl_always_inline
ssef fetch_ssef(
int
index)
95
{
96
kernel_assert
(index >= 0 && index <
width
);
97
return
((ssef *)
data
)[index];
98
}
99
100
ccl_always_inline
ssei fetch_ssei(
int
index)
101
{
102
kernel_assert
(index >= 0 && index <
width
);
103
return
((ssei *)
data
)[index];
104
}
105
#endif
106
107
T
*
data
;
108
int
width
;
109
};
110
111
/* Macros to handle different memory storage on different devices */
112
113
#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
114
#define kernel_tex_fetch_avxf(tex, index) (kg->tex.fetch_avxf(index))
115
#define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
116
#define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
117
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
118
#define kernel_tex_array(tex) (kg->tex.data)
119
120
#define kernel_data (kg->__data)
121
122
#ifdef __KERNEL_SSE2__
123
typedef
vector3<sseb>
sse3b;
124
typedef
vector3<ssef>
sse3f;
125
typedef
vector3<ssei>
sse3i;
126
127
ccl_device_inline
void
print_sse3b(
const
char
*
label
, sse3b &
a
)
128
{
129
print_sseb(
label
,
a
.x);
130
print_sseb(
label
,
a
.y);
131
print_sseb(
label
,
a
.z);
132
}
133
134
ccl_device_inline
void
print_sse3f(
const
char
*
label
, sse3f &
a
)
135
{
136
print_ssef(
label
,
a
.x);
137
print_ssef(
label
,
a
.y);
138
print_ssef(
label
,
a
.z);
139
}
140
141
ccl_device_inline
void
print_sse3i(
const
char
*
label
, sse3i &
a
)
142
{
143
print_ssei(
label
,
a
.x);
144
print_ssei(
label
,
a
.y);
145
print_ssei(
label
,
a
.z);
146
}
147
148
# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
149
typedef
vector3<avxf>
avx3f;
150
# endif
151
152
#endif
153
154
CCL_NAMESPACE_END
155
156
#endif
/* __KERNEL_COMPAT_CPU_H__ */
vector3
Definition:
util_types_vector3.h:27
label
const char * label
Definition:
deg_debug_stats_gnuplot.cc:47
kernel_assert
#define kernel_assert(cond)
Definition:
kernel_compat_cpu.h:69
ccl_device_inline
#define ccl_device_inline
Definition:
kernel_compat_cuda.h:61
CCL_NAMESPACE_END
#define CCL_NAMESPACE_END
Definition:
kernel_compat_cuda.h:23
T
#define T
Definition:
mball_tessellate.c:278
CCL_NAMESPACE_BEGIN
Definition:
blender_python.cpp:54
Freestyle::a
static unsigned a[3]
Definition:
RandGen.cpp:92
avxf
Definition:
util_avxf.h:24
texture
Definition:
kernel_compat_cpu.h:74
texture::data
T * data
Definition:
kernel_compat_cpu.h:107
texture::width
int width
Definition:
kernel_compat_cpu.h:108
texture::fetch
ccl_always_inline const T & fetch(int index)
Definition:
kernel_compat_cpu.h:75
ccl_always_inline
#define ccl_always_inline
Definition:
util_defines.h:75
util_half.h
util_math.h
util_simd.h
util_texture.h
util_types.h
Generated on Tue Jan 31 2023 14:37:24 for Blender by
doxygen
1.9.1