Blender  V2.93
kernel_jitter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* TODO(sergey): Consider moving portable ctz/clz stuff to util. */
18 
20 
21 /* "Correlated Multi-Jittered Sampling"
22  * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
23 
24 /* TODO: find good value, suggested 64 gives pattern on cornell box ceiling. */
25 #define CMJ_RANDOM_OFFSET_LIMIT 4096
26 
28 {
29  return (i > 1) && ((i & (i - 1)) == 0);
30 }
31 
33 {
34  return (a & (b - 1));
35 }
36 
37 /* b must be > 1 */
39 {
40  kernel_assert(b > 1);
41  return a >> count_trailing_zeros(b);
42 }
43 
45 {
46  kernel_assert(w > 1);
47  return ((1 << (32 - count_leading_zeros(w))) - 1);
48 }
49 
51 {
52  uint w = l - 1;
53 
54  if ((l & w) == 0) {
55  /* l is a power of two (fast) */
56  i ^= p;
57  i *= 0xe170893d;
58  i ^= p >> 16;
59  i ^= (i & w) >> 4;
60  i ^= p >> 8;
61  i *= 0x0929eb3f;
62  i ^= p >> 23;
63  i ^= (i & w) >> 1;
64  i *= 1 | p >> 27;
65  i *= 0x6935fa69;
66  i ^= (i & w) >> 11;
67  i *= 0x74dcb303;
68  i ^= (i & w) >> 2;
69  i *= 0x9e501cc3;
70  i ^= (i & w) >> 2;
71  i *= 0xc860a3df;
72  i &= w;
73  i ^= i >> 5;
74 
75  return (i + p) & w;
76  }
77  else {
78  /* l is not a power of two (slow) */
79  w = cmj_w_mask(w);
80 
81  do {
82  i ^= p;
83  i *= 0xe170893d;
84  i ^= p >> 16;
85  i ^= (i & w) >> 4;
86  i ^= p >> 8;
87  i *= 0x0929eb3f;
88  i ^= p >> 23;
89  i ^= (i & w) >> 1;
90  i *= 1 | p >> 27;
91  i *= 0x6935fa69;
92  i ^= (i & w) >> 11;
93  i *= 0x74dcb303;
94  i ^= (i & w) >> 2;
95  i *= 0x9e501cc3;
96  i ^= (i & w) >> 2;
97  i *= 0xc860a3df;
98  i &= w;
99  i ^= i >> 5;
100  } while (i >= l);
101 
102  return (i + p) % l;
103  }
104 }
105 
107 {
108  i ^= p;
109  i ^= i >> 17;
110  i ^= i >> 10;
111  i *= 0xb36534e5;
112  i ^= i >> 12;
113  i ^= i >> 21;
114  i *= 0x93fc4795;
115  i ^= 0xdf6e307f;
116  i ^= i >> 17;
117  i *= 1 | p >> 18;
118 
119  return i;
120 }
121 
123 {
124  i = (i ^ 61) ^ p;
125  i += i << 3;
126  i ^= i >> 4;
127  i *= 0x27d4eb2d;
128  return i;
129 }
130 
132 {
133  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
134 }
135 
136 #ifdef __CMJ__
137 ccl_device float cmj_sample_1D(int s, int N, int p)
138 {
139  kernel_assert(s < N);
140 
141  uint x = cmj_permute(s, N, p * 0x68bc21eb);
142  float jx = cmj_randfloat(s, p * 0x967a889b);
143 
144  float invN = 1.0f / N;
145  return (x + jx) * invN;
146 }
147 
148 /* TODO(sergey): Do some extra tests and consider moving to util_math.h. */
149 ccl_device_inline int cmj_isqrt(int value)
150 {
151 # if defined(__KERNEL_CUDA__)
152  return float_to_int(__fsqrt_ru(value));
153 # elif defined(__KERNEL_GPU__)
154  return float_to_int(sqrtf(value));
155 # else
156  /* This is a work around for fast-math on CPU which might replace sqrtf()
157  * with am approximated version.
158  */
159  return float_to_int(sqrtf(value) + 1e-6f);
160 # endif
161 }
162 
163 ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
164 {
165  kernel_assert(s < N);
166 
167  int m = cmj_isqrt(N);
168  int n = (N - 1) / m + 1;
169  float invN = 1.0f / N;
170  float invm = 1.0f / m;
171  float invn = 1.0f / n;
172 
173  s = cmj_permute(s, N, p * 0x51633e2d);
174 
175  int sdivm, smodm;
176 
177  if (cmj_is_pow2(m)) {
178  sdivm = cmj_fast_div_pow2(s, m);
179  smodm = cmj_fast_mod_pow2(s, m);
180  }
181  else {
182  /* Doing `s * inmv` gives precision issues here. */
183  sdivm = s / m;
184  smodm = s - sdivm * m;
185  }
186 
187  uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
188  uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
189 
190  float jx = cmj_randfloat(s, p * 0x967a889b);
191  float jy = cmj_randfloat(s, p * 0x368cc8b7);
192 
193  *fx = (sx + (sy + jx) * invn) * invm;
194  *fy = (s + jy) * invN;
195 }
196 #endif
197 
198 ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
199 {
200  /* Fallback to random */
201  if (sample >= NUM_PMJ_SAMPLES) {
202  const int p = rng_hash + dimension;
203  return cmj_randfloat(sample, p);
204  }
205  else {
206  const uint mask = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
207  const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
208  return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ mask) - 1.0f;
209  }
210 }
211 
212 ccl_device float2 pmj_sample_2D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
213 {
214  if (sample >= NUM_PMJ_SAMPLES) {
215  const int p = rng_hash + dimension;
216  const float fx = cmj_randfloat(sample, p);
217  const float fy = cmj_randfloat(sample, p + 1);
218  return make_float2(fx, fy);
219  }
220  else {
221  const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
222  const uint maskx = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
223  const uint masky = cmj_hash_simple(dimension + 1, rng_hash) & 0x007fffff;
224  const float fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ maskx) - 1.0f;
225  const float fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ masky) -
226  1.0f;
227  return make_float2(fx, fy);
228  }
229 }
230 
unsigned int uint
Definition: BLI_sys_types.h:83
ATTR_WARN_UNUSED_RESULT const BMLoop * l
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
#define kernel_assert(cond)
#define kernel_tex_fetch(tex, index)
#define ccl_device
#define ccl_device_inline
#define CCL_NAMESPACE_END
#define make_float2(x, y)
#define sqrtf(x)
ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
Definition: kernel_jitter.h:32
ccl_device_inline float cmj_randfloat(uint i, uint p)
ccl_device float2 pmj_sample_2D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
ccl_device_inline bool cmj_is_pow2(int i)
Definition: kernel_jitter.h:27
ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
Definition: kernel_jitter.h:50
ccl_device_inline uint cmj_hash(uint i, uint p)
ccl_device_inline int cmj_fast_div_pow2(int a, int b)
Definition: kernel_jitter.h:38
ccl_device_inline uint cmj_hash_simple(uint i, uint p)
ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
ccl_device_inline uint cmj_w_mask(uint w)
Definition: kernel_jitter.h:44
#define NUM_PMJ_SAMPLES
#define NUM_PMJ_PATTERNS
static unsigned a[3]
Definition: RandGen.cpp:92
static void sample(SocketReader *reader, int x, int y, float color[4])
params N
ccl_device_inline float __uint_as_float(uint i)
Definition: util_math.h:232
ccl_device_inline int float_to_int(float f)
Definition: util_math.h:321
ccl_device_inline uint count_leading_zeros(uint x)
Definition: util_math.h:685
ccl_device_inline uint count_trailing_zeros(uint x)
Definition: util_math.h:703
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)