Blender  V2.93
filter_nlm_cpu.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs))))
20 #define load4_u(buf, ofs) load_float4((buf) + (ofs))
21 
23  int dy,
24  const float *ccl_restrict weight_image,
25  const float *ccl_restrict variance_image,
26  const float *ccl_restrict scale_image,
27  float *difference_image,
28  int4 rect,
29  int stride,
30  int channel_offset,
31  int frame_offset,
32  float a,
33  float k_2)
34 {
35  /* Strides need to be aligned to 16 bytes. */
36  kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0);
37 
38  int aligned_lowx = rect.x & (~3);
39  const int numChannels = (channel_offset > 0) ? 3 : 1;
40  const float4 channel_fac = make_float4(1.0f / numChannels);
41 
42  for (int y = rect.y; y < rect.w; y++) {
43  int idx_p = y * stride + aligned_lowx;
44  int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset;
45  for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) {
46  float4 diff = make_float4(0.0f);
47  float4 scale_fac;
48  if (scale_image) {
49  scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q),
50  make_float4(0.25f),
51  make_float4(4.0f));
52  }
53  else {
54  scale_fac = make_float4(1.0f);
55  }
56  for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) {
57  /* idx_p is guaranteed to be aligned, but idx_q isn't. */
58  float4 color_p = load4_a(weight_image, idx_p + chan_ofs);
59  float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs);
60  float4 cdiff = color_p - color_q;
61  float4 var_p = load4_a(variance_image, idx_p + chan_ofs);
62  float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs);
63  diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) /
64  (make_float4(1e-8f) + k_2 * (var_p + var_q));
65  }
66  load4_a(difference_image, idx_p) = diff * channel_fac;
67  }
68  }
69 }
70 
72  const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
73 {
74  int aligned_lowx = round_down(rect.x, 4);
75  for (int y = rect.y; y < rect.w; y++) {
76  const int low = max(rect.y, y - f);
77  const int high = min(rect.w, y + f + 1);
78  for (int x = aligned_lowx; x < rect.z; x += 4) {
79  load4_a(out_image, y * stride + x) = make_float4(0.0f);
80  }
81  for (int y1 = low; y1 < high; y1++) {
82  for (int x = aligned_lowx; x < rect.z; x += 4) {
83  load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x);
84  }
85  }
86  float fac = 1.0f / (high - low);
87  for (int x = aligned_lowx; x < rect.z; x += 4) {
88  load4_a(out_image, y * stride + x) *= fac;
89  }
90  }
91 }
92 
94  const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
95 {
96  int aligned_lowx = round_down(rect.x, 4);
97  for (int y = rect.y; y < rect.w; y++) {
98  for (int x = aligned_lowx; x < rect.z; x += 4) {
99  load4_a(out_image, y * stride + x) = make_float4(0.0f);
100  }
101  }
102 
103  for (int dx = -f; dx <= f; dx++) {
104  aligned_lowx = round_down(rect.x - min(0, dx), 4);
105  int highx = rect.z - max(0, dx);
106  int4 lowx4 = make_int4(rect.x - min(0, dx));
107  int4 highx4 = make_int4(rect.z - max(0, dx));
108  for (int y = rect.y; y < rect.w; y++) {
109  for (int x = aligned_lowx; x < highx; x += 4) {
110  int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
111  int4 active = (x4 >= lowx4) & (x4 < highx4);
112 
113  float4 diff = load4_u(difference_image, y * stride + x + dx);
114  load4_a(out_image, y * stride + x) += mask(active, diff);
115  }
116  }
117  }
118 
119  aligned_lowx = round_down(rect.x, 4);
120  for (int y = rect.y; y < rect.w; y++) {
121  for (int x = aligned_lowx; x < rect.z; x += 4) {
122  float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f);
123  float4 low = max(make_float4(rect.x), x4 - make_float4(f));
124  float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1));
125  load4_a(out_image, y * stride + x) *= rcp(high - low);
126  }
127  }
128 }
129 
131  const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
132 {
133  nlm_blur_horizontal(difference_image, out_image, rect, stride, f);
134 
135  int aligned_lowx = round_down(rect.x, 4);
136  for (int y = rect.y; y < rect.w; y++) {
137  for (int x = aligned_lowx; x < rect.z; x += 4) {
138  load4_a(out_image, y * stride + x) = fast_expf4(
139  -max(load4_a(out_image, y * stride + x), make_float4(0.0f)));
140  }
141  }
142 }
143 
145  int dy,
146  const float *ccl_restrict difference_image,
147  const float *ccl_restrict image,
148  float *temp_image,
149  float *out_image,
150  float *accum_image,
151  int4 rect,
152  int channel_offset,
153  int stride,
154  int f)
155 {
156  nlm_blur_horizontal(difference_image, temp_image, rect, stride, f);
157 
158  int aligned_lowx = round_down(rect.x, 4);
159  for (int y = rect.y; y < rect.w; y++) {
160  for (int x = aligned_lowx; x < rect.z; x += 4) {
161  int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3);
162  int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z));
163 
164  int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx);
165 
166  float4 weight = load4_a(temp_image, idx_p);
167  load4_a(accum_image, idx_p) += mask(active, weight);
168 
169  float4 val = load4_u(image, idx_q);
170  if (channel_offset) {
171  val += load4_u(image, idx_q + channel_offset);
172  val += load4_u(image, idx_q + 2 * channel_offset);
173  val *= 1.0f / 3.0f;
174  }
175 
176  load4_a(out_image, idx_p) += mask(active, weight * val);
177  }
178  }
179 }
180 
182  int dy,
183  int t,
184  const float *ccl_restrict
185  difference_image,
186  const float *ccl_restrict buffer,
187  float *transform,
188  int *rank,
189  float *XtWX,
190  float3 *XtWY,
191  int4 rect,
192  int4 filter_window,
193  int stride,
194  int f,
195  int pass_stride,
196  int frame_offset,
197  bool use_time)
198 {
199  int4 clip_area = rect_clip(rect, filter_window);
200  /* fy and fy are in filter-window-relative coordinates,
201  * while x and y are in feature-window-relative coordinates. */
202  for (int y = clip_area.y; y < clip_area.w; y++) {
203  for (int x = clip_area.x; x < clip_area.z; x++) {
204  const int low = max(rect.x, x - f);
205  const int high = min(rect.z, x + f + 1);
206  float sum = 0.0f;
207  for (int x1 = low; x1 < high; x1++) {
208  sum += difference_image[y * stride + x1];
209  }
210  float weight = sum * (1.0f / (high - low));
211 
212  int storage_ofs = coord_to_local_index(filter_window, x, y);
213  float *l_transform = transform + storage_ofs * TRANSFORM_SIZE;
214  float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE;
215  float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE;
216  int *l_rank = rank + storage_ofs;
217 
219  y,
220  1,
221  dx,
222  dy,
223  t,
224  stride,
225  pass_stride,
226  frame_offset,
227  use_time,
228  buffer,
229  l_transform,
230  l_rank,
231  weight,
232  l_XtWX,
233  l_XtWY,
234  0);
235  }
236  }
237 }
238 
240  const float *ccl_restrict accum_image,
241  int4 rect,
242  int w)
243 {
244  for (int y = rect.y; y < rect.w; y++) {
245  for (int x = rect.x; x < rect.z; x++) {
246  out_image[y * w + x] /= accum_image[y * w + x];
247  }
248  }
249 }
250 
251 #undef load4_a
252 #undef load4_u
253 
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble y1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
SIMD_FORCE_INLINE btVector3 transform(const btVector3 &point) const
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
static T sum(const btAlignedObjectArray< T > &items)
int x
Definition: btConvexHull.h:149
int w
Definition: btConvexHull.h:149
int y
Definition: btConvexHull.h:149
int z
Definition: btConvexHull.h:149
#define XTWY_SIZE
#define TRANSFORM_SIZE
#define XTWX_SIZE
ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, float *transform, int *rank, float *XtWX, float3 *XtWY, int4 rect, int4 filter_window, int stride, int f, int pass_stride, int frame_offset, bool use_time)
ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict image, float *temp_image, float *out_image, float *accum_image, int4 rect, int channel_offset, int stride, int f)
#define load4_a(buf, ofs)
ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy, const float *ccl_restrict weight_image, const float *ccl_restrict variance_image, const float *ccl_restrict scale_image, float *difference_image, int4 rect, int stride, int channel_offset, int frame_offset, float a, float k_2)
ccl_device_inline void nlm_blur_horizontal(const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f)
ccl_device_inline void kernel_filter_nlm_normalize(float *out_image, const float *ccl_restrict accum_image, int4 rect, int w)
#define load4_u(buf, ofs)
CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage_stride, int dx, int dy, int t, int buffer_stride, int pass_stride, int frame_offset, bool use_time, const ccl_global float *ccl_restrict buffer, const ccl_global float *ccl_restrict transform, ccl_global int *rank, float weight, ccl_global float *XtWX, ccl_global float3 *XtWY, int localIdx)
#define kernel_assert(cond)
#define ccl_restrict
#define ccl_device_inline
#define CCL_NAMESPACE_END
#define make_int4(x, y, z, w)
#define make_float4(x, y, z, w)
#define rcp(x)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
static unsigned c
Definition: RandGen.cpp:97
static unsigned a[3]
Definition: RandGen.cpp:92
IMETHOD Vector diff(const Vector &a, const Vector &b, double dt=1)
bool active
all scheduled work for the GPU.
#define min(a, b)
Definition: sort.c:51
float max
__forceinline ssef low(const avxf &a)
Definition: util_avxf.h:277
__forceinline ssef high(const avxf &a)
Definition: util_avxf.h:281
ccl_device_inline float sqr(float a)
Definition: util_math.h:651
ccl_device_inline int clamp(int a, int mn, int mx)
Definition: util_math.h:283
ccl_device_inline float4 fast_expf4(float4 x)
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
ccl_device_inline int4 rect_clip(int4 a, int4 b)
Definition: util_rect.h:38
ccl_device_inline int coord_to_local_index(int4 rect, int x, int y)
Definition: util_rect.h:49
ccl_device_inline size_t round_down(size_t x, size_t multiple)
Definition: util_types.h:80