Blender V4.5
cycles/kernel/device/gpu/image.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2017-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
8
9#if !defined __KERNEL_METAL__
10# ifdef WITH_NANOVDB
11# include "kernel/util/nanovdb.h"
12# endif
13#endif
14
15ccl_device_inline float frac(const float x, ccl_private int *ix)
16{
17 int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
18 *ix = i;
19 return x - (float)i;
20}
21
22/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
23ccl_device float cubic_w0(const float a)
24{
25 return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
26}
27ccl_device float cubic_w1(const float a)
28{
29 return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
30}
31ccl_device float cubic_w2(const float a)
32{
33 return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
34}
35ccl_device float cubic_w3(const float a)
36{
37 return (1.0f / 6.0f) * (a * a * a);
38}
39
40/* g0 and g1 are the two amplitude functions. */
41ccl_device float cubic_g0(const float a)
42{
43 return cubic_w0(a) + cubic_w1(a);
44}
45ccl_device float cubic_g1(const float a)
46{
47 return cubic_w2(a) + cubic_w3(a);
48}
49
50/* h0 and h1 are the two offset functions */
51ccl_device float cubic_h0(const float a)
52{
53 return (cubic_w1(a) / cubic_g0(a)) - 1.0f;
54}
55ccl_device float cubic_h1(const float a)
56{
57 return (cubic_w3(a) / cubic_g1(a)) + 1.0f;
58}
59
60/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
61template<typename T>
63 float x,
64 float y)
65{
67
68 x = (x * info.width) - 0.5f;
69 y = (y * info.height) - 0.5f;
70
71 float px = floorf(x);
72 float py = floorf(y);
73 float fx = x - px;
74 float fy = y - py;
75
76 float g0x = cubic_g0(fx);
77 float g1x = cubic_g1(fx);
78 /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
79 float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
80 float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
81 float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
82 float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
83
84 return cubic_g0(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y0) +
85 g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y0)) +
86 cubic_g1(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y1) +
87 g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y1));
88}
89
90/* Fast tricubic texture lookup using 8 trilinear lookups. */
91template<typename T>
94{
96
97 x = (x * info.width) - 0.5f;
98 y = (y * info.height) - 0.5f;
99 z = (z * info.depth) - 0.5f;
100
101 float px = floorf(x);
102 float py = floorf(y);
103 float pz = floorf(z);
104 float fx = x - px;
105 float fy = y - py;
106 float fz = z - pz;
107
108 float g0x = cubic_g0(fx);
109 float g1x = cubic_g1(fx);
110 float g0y = cubic_g0(fy);
111 float g1y = cubic_g1(fy);
112 float g0z = cubic_g0(fz);
113 float g1z = cubic_g1(fz);
114
115 /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
116 float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
117 float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
118 float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
119 float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
120 float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
121 float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
122
123 return g0z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z0) +
124 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z0)) +
125 g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z0) +
126 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z0))) +
127 g1z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z1) +
128 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z1)) +
129 g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z1) +
130 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z1)));
131}
132
133#ifdef WITH_NANOVDB
134template<typename OutT, typename Acc>
135ccl_device OutT kernel_tex_image_interp_trilinear_nanovdb(ccl_private Acc &acc,
136 const float x,
137 float y,
138 const float z)
139{
140 int ix, iy, iz;
141 const float tx = frac(x - 0.5f, &ix);
142 const float ty = frac(y - 0.5f, &iy);
143 const float tz = frac(z - 0.5f, &iz);
144
145 return mix(mix(mix(OutT(acc.getValue(nanovdb::Coord(ix, iy, iz))),
146 OutT(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
147 tz),
148 mix(OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
149 OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
150 1.0f - tz),
151 ty),
152 mix(mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
153 OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
154 tz),
155 mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
156 OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
157 1.0f - tz),
158 1.0f - ty),
159 tx);
160}
161
162template<typename OutT, typename Acc>
163ccl_device OutT kernel_tex_image_interp_tricubic_nanovdb(ccl_private Acc &acc,
164 const float x,
165 const float y,
166 const float z)
167{
168 int ix, iy, iz;
169 int nix, niy, niz;
170 int pix, piy, piz;
171 int nnix, nniy, nniz;
172
173 /* A -0.5 offset is used to center the cubic samples around the sample point. */
174 const float tx = frac(x - 0.5f, &ix);
175 const float ty = frac(y - 0.5f, &iy);
176 const float tz = frac(z - 0.5f, &iz);
177
178 pix = ix - 1;
179 piy = iy - 1;
180 piz = iz - 1;
181 nix = ix + 1;
182 niy = iy + 1;
183 niz = iz + 1;
184 nnix = ix + 2;
185 nniy = iy + 2;
186 nniz = iz + 2;
187
188 const int xc[4] = {pix, ix, nix, nnix};
189 const int yc[4] = {piy, iy, niy, nniy};
190 const int zc[4] = {piz, iz, niz, nniz};
191 float u[4], v[4], w[4];
192
193 /* Some helper macros to keep code size reasonable.
194 * Lets the compiler inline all the matrix multiplications.
195 */
196# define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
197 { \
198 u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
199 u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
200 u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
201 u[3] = (1.0f / 6.0f) * t * t * t; \
202 } \
203 (void)0
204
205# define DATA(x, y, z) (OutT(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
206# define COL_TERM(col, row) \
207 (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
208 u[3] * DATA(3, col, row)))
209# define ROW_TERM(row) \
210 (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
211
215
216 /* Actual interpolation. */
217 return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
218
219# undef COL_TERM
220# undef ROW_TERM
221# undef DATA
222# undef SET_CUBIC_SPLINE_WEIGHTS
223}
224
225# if defined(__KERNEL_METAL__)
226template<typename OutT, typename T>
227__attribute__((noinline)) OutT kernel_tex_image_interp_nanovdb(const ccl_global TextureInfo &info,
228 const float x,
229 const float y,
230 const float z,
231 const uint interpolation)
232# else
233template<typename OutT, typename T>
234ccl_device_noinline OutT kernel_tex_image_interp_nanovdb(const ccl_global TextureInfo &info,
235 const float x,
236 const float y,
237 const float z,
238 const uint interpolation)
239# endif
240{
241 using namespace nanovdb;
242
243 ccl_global NanoGrid<T> *const grid = (ccl_global NanoGrid<T> *)info.data;
244
245 switch (interpolation) {
247 ReadAccessor<T> acc(grid->tree().root());
249 return OutT(acc.getValue(coord));
250 }
252 CachedReadAccessor<T> acc(grid->tree().root());
253 return kernel_tex_image_interp_trilinear_nanovdb<OutT>(acc, x, y, z);
254 }
255 default: {
256 CachedReadAccessor<T> acc(grid->tree().root());
257 return kernel_tex_image_interp_tricubic_nanovdb<OutT>(acc, x, y, z);
258 }
259 }
260}
261#endif
262
263ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, const int id, const float x, float y)
264{
265 const ccl_global TextureInfo &info = kernel_data_fetch(texture_info, id);
266
267 /* float4, byte4, ushort4 and half4 */
268 const int texture_type = info.data_type;
269 if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
270 texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4)
271 {
272 if (info.interpolation == INTERPOLATION_CUBIC || info.interpolation == INTERPOLATION_SMART) {
274 }
275 else {
278 }
279 }
280 /* float, byte and half */
281 else {
282 float f;
283
284 if (info.interpolation == INTERPOLATION_CUBIC || info.interpolation == INTERPOLATION_SMART) {
286 }
287 else {
290 }
291
292 return make_float4(f, f, f, 1.0f);
293 }
294}
295
297 const int id,
298 float3 P,
300{
301 const ccl_global TextureInfo &info = kernel_data_fetch(texture_info, id);
302
303 if (info.use_transform_3d) {
304 P = transform_point(&info.transform_3d, P);
305 }
306
307 const float x = P.x;
308 const float y = P.y;
309 const float z = P.z;
310
311 uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
312 const int texture_type = info.data_type;
313
314#ifdef WITH_NANOVDB
315 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
316 float f = kernel_tex_image_interp_nanovdb<float, float>(info, x, y, z, interpolation);
317 return make_float4(f, f, f, 1.0f);
318 }
319 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
320 float3 f = kernel_tex_image_interp_nanovdb<float3, packed_float3>(
321 info, x, y, z, interpolation);
322 return make_float4(f, 1.0f);
323 }
324 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
325 float f = kernel_tex_image_interp_nanovdb<float, nanovdb::FpN>(info, x, y, z, interpolation);
326 return make_float4(f, f, f, 1.0f);
327 }
328 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
329 float f = kernel_tex_image_interp_nanovdb<float, nanovdb::Fp16>(info, x, y, z, interpolation);
330 return make_float4(f, f, f, 1.0f);
331 }
332#endif
333 if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
334 texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4)
335 {
336 if (interpolation == INTERPOLATION_CUBIC || interpolation == INTERPOLATION_SMART) {
338 }
339 else {
342 }
343 }
344 else {
345 float f;
346
347 if (interpolation == INTERPOLATION_CUBIC || interpolation == INTERPOLATION_SMART) {
349 }
350 else {
353 }
354
355 return make_float4(f, f, f, 1.0f);
356 }
357}
358
unsigned int uint
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
static __attribute__((constructor)) void cpu_check()
Definition cpu_check.cc:94
#define SET_CUBIC_SPLINE_WEIGHTS(u, t)
#define ROW_TERM(row)
ccl_device float cubic_g0(const float a)
ccl_device_noinline T kernel_tex_image_interp_bicubic(const ccl_global TextureInfo &info, float x, float y)
ccl_device float cubic_h1(const float a)
ccl_device float cubic_w3(const float a)
CCL_NAMESPACE_BEGIN ccl_device_inline float frac(const float x, ccl_private int *ix)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, const int id, float3 P, InterpolationType interp)
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, const int id, const float x, float y)
ccl_device float cubic_g1(const float a)
ccl_device_noinline T kernel_tex_image_interp_tricubic(const ccl_global TextureInfo &info, float x, float y, float z)
ccl_device float cubic_w2(const float a)
ccl_device float cubic_h0(const float a)
ccl_device float cubic_w0(const float a)
ccl_device float cubic_w1(const float a)
#define kernel_data_fetch(name, index)
#define ccl_device
#define ccl_private
const ThreadKernelGlobalsCPU * KernelGlobals
#define ccl_device_inline
#define ccl_global
#define ccl_device_noinline
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj, const float x, const float y)
CUtexObject ccl_gpu_tex_object_3D
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj, const float x, const float y, const float z)
#define CCL_NAMESPACE_END
CUtexObject ccl_gpu_tex_object_2D
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
#define floorf(x)
ccl_device_forceinline float4 ccl_gpu_tex_object_read_2D< float4 >(const ccl_gpu_tex_object_2D texobj, const float x, const float y)
ccl_device_forceinline float ccl_gpu_tex_object_read_3D< float >(const ccl_gpu_tex_object_3D texobj, const float x, const float y, const float z)
ccl_device_forceinline float4 ccl_gpu_tex_object_read_3D< float4 >(const ccl_gpu_tex_object_3D texobj, const float x, const float y, const float z)
ccl_device_forceinline float ccl_gpu_tex_object_read_2D< float >(const ccl_gpu_tex_object_2D texobj, const float x, const float y)
VecBase< float, 4 > float4
#define noinline
#define mix(a, b, c)
Definition hash.h:35
ccl_device_inline int float_to_int(const float f)
Definition math_base.h:407
ccl_device_inline float interp(const float a, const float b, const float t)
Definition math_base.h:502
#define T
Grid< NanoTree< BuildT > > NanoGrid
Definition nanovdb.h:322
i
Definition text_draw.cc:230
ccl_device_inline float3 transform_point(const ccl_private Transform *t, const float3 a)
Definition transform.h:56
@ IMAGE_DATA_TYPE_NANOVDB_FP16
@ IMAGE_DATA_TYPE_FLOAT4
@ IMAGE_DATA_TYPE_USHORT4
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
@ IMAGE_DATA_TYPE_BYTE4
@ IMAGE_DATA_TYPE_HALF4
@ IMAGE_DATA_TYPE_NANOVDB_FPN
InterpolationType
@ INTERPOLATION_LINEAR
@ INTERPOLATION_SMART
@ INTERPOLATION_NONE
@ INTERPOLATION_CLOSEST
@ INTERPOLATION_CUBIC