Blender  V2.93
filter_transform_gpu.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
21  int x,
22  int y,
23  int4 rect,
24  int pass_stride,
25  int frame_stride,
26  bool use_time,
27  ccl_global float *transform,
28  ccl_global int *rank,
29  int radius,
30  float pca_threshold,
31  int transform_stride,
32  int localIdx)
33 {
34  int buffer_w = align_up(rect.z - rect.x, 4);
35 
36 #ifdef __KERNEL_CUDA__
37  ccl_local float shared_features[DENOISE_FEATURES * CCL_MAX_LOCAL_SIZE];
38  ccl_local_param float *features = shared_features + localIdx * DENOISE_FEATURES;
39 #else
40  float features[DENOISE_FEATURES];
41 #endif
42 
43  int num_features = use_time ? 11 : 10;
44 
45  /* === Calculate denoising window. === */
46  int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius));
47  int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1));
48  int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames;
49  const ccl_global float *ccl_restrict pixel_buffer;
50  int3 pixel;
51 
52  /* === Shift feature passes to have mean 0. === */
53  float feature_means[DENOISE_FEATURES];
54  math_vector_zero(feature_means, num_features);
56  {
57  filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride);
58  math_vector_add(feature_means, features, num_features);
59  }
61 
62  math_vector_scale(feature_means, 1.0f / num_pixels, num_features);
63 
64  /* === Scale the shifted feature passes to a range of [-1; 1] ===
65  * Will be baked into the transform later. */
66  float feature_scale[DENOISE_FEATURES];
67  math_vector_zero(feature_scale, num_features);
68 
70  {
71  filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
72  math_vector_max(feature_scale, features, num_features);
73  }
75 
76  filter_calculate_scale(feature_scale, use_time);
77 
78  /* === Generate the feature transformation. ===
79  * This transformation maps the num_features-dimensional feature space to a reduced feature
80  * (r-feature) space which generally has fewer dimensions.
81  * This mainly helps to prevent over-fitting. */
82  float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
83  math_matrix_zero(feature_matrix, num_features);
85  {
86  filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride);
87  math_vector_mul(features, feature_scale, num_features);
88  math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f);
89  }
91 
92  math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride);
93  *rank = 0;
94  /* Prevent over-fitting when a small window is used. */
95  int max_rank = min(num_features, num_pixels / 3);
96  if (pca_threshold < 0.0f) {
97  float threshold_energy = 0.0f;
98  for (int i = 0; i < num_features; i++) {
99  threshold_energy += feature_matrix[i * num_features + i];
100  }
101  threshold_energy *= 1.0f - (-pca_threshold);
102 
103  float reduced_energy = 0.0f;
104  for (int i = 0; i < max_rank; i++, (*rank)++) {
105  if (i >= 2 && reduced_energy >= threshold_energy)
106  break;
107  float s = feature_matrix[i * num_features + i];
108  reduced_energy += s;
109  }
110  }
111  else {
112  for (int i = 0; i < max_rank; i++, (*rank)++) {
113  float s = feature_matrix[i * num_features + i];
114  if (i >= 2 && sqrtf(s) < pca_threshold)
115  break;
116  }
117  }
118 
119  math_matrix_transpose(transform, num_features, transform_stride);
120 
121  /* Bake the feature scaling into the transformation matrix. */
122  for (int i = 0; i < num_features; i++) {
123  for (int j = 0; j < (*rank); j++) {
124  transform[(i * num_features + j) * transform_stride] *= feature_scale[i];
125  }
126  }
127 }
128 
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
SIMD_FORCE_INLINE btVector3 transform(const btVector3 &point) const
int x
Definition: btConvexHull.h:149
int w
Definition: btConvexHull.h:149
int y
Definition: btConvexHull.h:149
int z
Definition: btConvexHull.h:149
#define CCL_FILTER_TILE_INFO
#define DENOISE_FEATURES
ccl_device_inline void filter_get_features(int3 pixel, const ccl_global float *ccl_restrict buffer, float *features, bool use_time, const float *ccl_restrict mean, int pass_stride)
#define FOR_PIXEL_WINDOW
#define END_FOR_PIXEL_WINDOW
ccl_device_inline void filter_get_feature_scales(int3 pixel, const ccl_global float *ccl_restrict buffer, float *scales, bool use_time, const float *ccl_restrict mean, int pass_stride)
ccl_device_inline void filter_calculate_scale(float *scale, bool use_time)
CCL_NAMESPACE_BEGIN ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer, CCL_FILTER_TILE_INFO, int x, int y, int4 rect, int pass_stride, int frame_stride, bool use_time, ccl_global float *transform, ccl_global int *rank, int radius, float pca_threshold, int transform_stride, int localIdx)
#define ccl_restrict
#define ccl_local_param
#define ccl_device
#define CCL_MAX_LOCAL_SIZE
#define ccl_local
#define ccl_global
#define CCL_NAMESPACE_END
#define make_int2(x, y)
#define sqrtf(x)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
#define min(a, b)
Definition: sort.c:51
float max
__forceinline ssef low(const avxf &a)
Definition: util_avxf.h:277
__forceinline ssef high(const avxf &a)
Definition: util_avxf.h:281
ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride)
ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n)
ccl_device_inline void math_matrix_zero(float *A, int n)
ccl_device_inline void math_vector_zero(float *v, int n)
ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float *V, int n, int v_stride)
ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n)
ccl_device_inline void math_vector_scale(float *a, float b, int n)
ccl_device_inline void math_matrix_add_gramian(float *A, int n, const float *ccl_restrict v, float weight)
ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n)
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
Definition: util_types.h:65