Blender  V2.93
filter_transform_sse.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
21  int x,
22  int y,
23  int4 rect,
24  int pass_stride,
25  int frame_stride,
26  bool use_time,
27  float *transform,
28  int *rank,
29  int radius,
30  float pca_threshold)
31 {
32  int buffer_w = align_up(rect.z - rect.x, 4);
33 
34  float4 features[DENOISE_FEATURES];
35  const float *ccl_restrict pixel_buffer;
36  int3 pixel;
37 
38  int num_features = use_time ? 11 : 10;
39 
40  /* === Calculate denoising window. === */
41  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
42  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
43  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
44 
45  /* === Shift feature passes to have mean 0. === */
46  float4 feature_means[DENOISE_FEATURES];
47  math_vector_zero_sse(feature_means, num_features);
49  {
51  x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride);
52  math_vector_add_sse(feature_means, num_features, features);
53  }
55 
56  float4 pixel_scale = make_float4(1.0f / num_pixels);
57  for (int i = 0; i < num_features; i++) {
58  feature_means[i] = reduce_add(feature_means[i]) * pixel_scale;
59  }
60 
61  /* === Scale the shifted feature passes to a range of [-1; 1] ===
62  * Will be baked into the transform later. */
63  float4 feature_scale[DENOISE_FEATURES];
64  math_vector_zero_sse(feature_scale, num_features);
66  {
68  x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
69  math_vector_max_sse(feature_scale, features, num_features);
70  }
72 
73  filter_calculate_scale_sse(feature_scale, use_time);
74 
75  /* === Generate the feature transformation. ===
76  * This transformation maps the num_features-dimensional feature space to a reduced feature
77  * (r-feature) space which generally has fewer dimensions.
78  * This mainly helps to prevent over-fitting. */
79  float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES];
80  math_matrix_zero_sse(feature_matrix_sse, num_features);
82  {
84  x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride);
85  math_vector_mul_sse(features, num_features, feature_scale);
86  math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f));
87  }
89 
90  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
91  math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse);
92 
93  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
94 
95  *rank = 0;
96  /* Prevent over-fitting when a small window is used. */
97  int max_rank = min(num_features, num_pixels / 3);
98  if (pca_threshold < 0.0f) {
99  float threshold_energy = 0.0f;
100  for (int i = 0; i < num_features; i++) {
101  threshold_energy += feature_matrix[i * num_features + i];
102  }
103  threshold_energy *= 1.0f - (-pca_threshold);
104 
105  float reduced_energy = 0.0f;
106  for (int i = 0; i < max_rank; i++, (*rank)++) {
107  if (i >= 2 && reduced_energy >= threshold_energy)
108  break;
109  float s = feature_matrix[i * num_features + i];
110  reduced_energy += s;
111  }
112  }
113  else {
114  for (int i = 0; i < max_rank; i++, (*rank)++) {
115  float s = feature_matrix[i * num_features + i];
116  if (i >= 2 && sqrtf(s) < pca_threshold)
117  break;
118  }
119  }
120 
121  math_matrix_transpose(transform, num_features, 1);
122 
123  /* Bake the feature scaling into the transformation matrix. */
124  for (int i = 0; i < num_features; i++) {
125  math_vector_scale(transform + i * num_features, feature_scale[i][0], *rank);
126  }
127 }
128 
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
SIMD_FORCE_INLINE btVector3 transform(const btVector3 &point) const
int x
Definition: btConvexHull.h:149
int w
Definition: btConvexHull.h:149
int y
Definition: btConvexHull.h:149
int z
Definition: btConvexHull.h:149
#define CCL_FILTER_TILE_INFO
#define DENOISE_FEATURES
ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t, int4 active_pixels, const float *ccl_restrict buffer, float4 *features, bool use_time, const float4 *ccl_restrict mean, int pass_stride)
ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
#define FOR_PIXEL_WINDOW_SSE
#define END_FOR_PIXEL_WINDOW_SSE
ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4 t, int4 active_pixels, const float *ccl_restrict buffer, float4 *scales, bool use_time, const float4 *ccl_restrict mean, int pass_stride)
CCL_NAMESPACE_BEGIN ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer, CCL_FILTER_TILE_INFO, int x, int y, int4 rect, int pass_stride, int frame_stride, bool use_time, float *transform, int *rank, int radius, float pca_threshold)
#define ccl_restrict
#define ccl_device
#define CCL_NAMESPACE_END
#define make_float4(x, y, z, w)
#define make_int2(x, y)
#define sqrtf(x)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
#define min(a, b)
Definition: sort.c:51
float max
__forceinline ssef low(const avxf &a)
Definition: util_avxf.h:277
__forceinline ssef high(const avxf &a)
Definition: util_avxf.h:281
__forceinline int reduce_add(const avxi &v)
Definition: util_avxi.h:709
ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride)
ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float *V, int n, int v_stride)
ccl_device_inline void math_vector_scale(float *a, float b, int n)
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
Definition: util_types.h:65