Blender  V2.93
filter_features_sse.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #define ccl_get_feature_sse(pass) load_float4(buffer + (pass)*pass_stride)
20 
21 /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y), 4 at a time.
22  * pixel_buffer always points to the first of the 4 current pixel in the first pass.
23  * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set
24  * for all pixels within the window. Repeat the loop for every secondary frame if there are any. */
25 #define FOR_PIXEL_WINDOW_SSE \
26  for (int frame = 0; frame < tile_info->num_frames; frame++) { \
27  pixel.z = tile_info->frames[frame]; \
28  pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \
29  frame * frame_stride; \
30  float4 t4 = make_float4(pixel.z); \
31  for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \
32  float4 y4 = make_float4(pixel.y); \
33  for (pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \
34  float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \
35  int4 active_pixels = x4 < make_float4(high.x);
36 
37 #define END_FOR_PIXEL_WINDOW_SSE \
38  } \
39  pixel_buffer += buffer_w - (high.x - low.x); \
40  } \
41  }
42 
44  float4 y,
45  float4 t,
46  int4 active_pixels,
47  const float *ccl_restrict buffer,
48  float4 *features,
49  bool use_time,
50  const float4 *ccl_restrict mean,
51  int pass_stride)
52 {
53  int num_features = use_time ? 11 : 10;
54 
55  features[0] = x;
56  features[1] = y;
57  features[2] = fabs(ccl_get_feature_sse(0));
58  features[3] = ccl_get_feature_sse(1);
59  features[4] = ccl_get_feature_sse(2);
60  features[5] = ccl_get_feature_sse(3);
61  features[6] = ccl_get_feature_sse(4);
62  features[7] = ccl_get_feature_sse(5);
63  features[8] = ccl_get_feature_sse(6);
64  features[9] = ccl_get_feature_sse(7);
65  if (use_time) {
66  features[10] = t;
67  }
68 
69  if (mean) {
70  for (int i = 0; i < num_features; i++) {
71  features[i] = features[i] - mean[i];
72  }
73  }
74  for (int i = 0; i < num_features; i++) {
75  features[i] = mask(active_pixels, features[i]);
76  }
77 }
78 
80  float4 y,
81  float4 t,
82  int4 active_pixels,
83  const float *ccl_restrict buffer,
84  float4 *scales,
85  bool use_time,
86  const float4 *ccl_restrict mean,
87  int pass_stride)
88 {
89  scales[0] = fabs(x - mean[0]);
90  scales[1] = fabs(y - mean[1]);
91  scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]);
92  scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + sqr(ccl_get_feature_sse(2) - mean[4]) +
93  sqr(ccl_get_feature_sse(3) - mean[5]);
94  scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]);
95  scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + sqr(ccl_get_feature_sse(6) - mean[8]) +
96  sqr(ccl_get_feature_sse(7) - mean[9]);
97  if (use_time) {
98  scales[6] = fabs(t - mean[10]);
99  }
100 
101  for (int i = 0; i < (use_time ? 7 : 6); i++)
102  scales[i] = mask(active_pixels, scales[i]);
103 }
104 
105 ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
106 {
107  scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f)));
108  scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f)));
109  scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f)));
110  if (use_time) {
111  scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));
112  }
113  scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f)));
114  scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f)));
115  scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f)));
116 }
117 
sqrt(x)+1/max(0
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t, int4 active_pixels, const float *ccl_restrict buffer, float4 *features, bool use_time, const float4 *ccl_restrict mean, int pass_stride)
ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time)
#define ccl_get_feature_sse(pass)
ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4 t, int4 active_pixels, const float *ccl_restrict buffer, float4 *scales, bool use_time, const float4 *ccl_restrict mean, int pass_stride)
#define ccl_restrict
#define ccl_device_inline
#define CCL_NAMESPACE_END
#define make_float4(x, y, z, w)
#define rcp(x)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
float max
__forceinline int reduce_max(const avxi &v)
Definition: util_avxi.h:705
ccl_device_inline float sqr(float a)
Definition: util_math.h:651
ccl_device_inline float2 fabs(const float2 &a)
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)