Blender  V2.93
COM_GaussianYBlurOperation.cc
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * Copyright 2011, Blender Foundation.
17  */
18 
20 #include "BLI_math.h"
21 #include "COM_OpenCLDevice.h"
22 #include "MEM_guardedalloc.h"
23 
24 #include "RE_pipeline.h"
25 
26 namespace blender::compositor {
27 
29 {
30  this->m_gausstab = nullptr;
31 #ifdef BLI_HAVE_SSE2
32  this->m_gausstab_sse = nullptr;
33 #endif
34  this->m_filtersize = 0;
35 }
36 
38 {
39  lockMutex();
40  if (!this->m_sizeavailable) {
41  updateGauss();
42  }
43  void *buffer = getInputOperation(0)->initializeTileData(nullptr);
44  unlockMutex();
45  return buffer;
46 }
47 
49 {
51 
52  initMutex();
53 
54  if (this->m_sizeavailable) {
55  float rad = max_ff(m_size * m_data.sizey, 0.0f);
56  m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
57 
58  this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
59 #ifdef BLI_HAVE_SSE2
60  this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
61 #endif
62  }
63 }
64 
65 void GaussianYBlurOperation::updateGauss()
66 {
67  if (this->m_gausstab == nullptr) {
68  updateSize();
69  float rad = max_ff(m_size * m_data.sizey, 0.0f);
70  rad = min_ff(rad, MAX_GAUSSTAB_RADIUS);
71  m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
72 
73  this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
74 #ifdef BLI_HAVE_SSE2
75  this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
76 #endif
77  }
78 }
79 
80 void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *data)
81 {
82  float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
83  float multiplier_accum = 0.0f;
84  MemoryBuffer *inputBuffer = (MemoryBuffer *)data;
85  const rcti &input_rect = inputBuffer->get_rect();
86  float *buffer = inputBuffer->getBuffer();
87  int bufferwidth = inputBuffer->getWidth();
88  int bufferstartx = input_rect.xmin;
89  int bufferstarty = input_rect.ymin;
90 
91  int xmin = max_ii(x, input_rect.xmin);
92  int ymin = max_ii(y - m_filtersize, input_rect.ymin);
93  int ymax = min_ii(y + m_filtersize + 1, input_rect.ymax);
94 
95  int index;
96  int step = getStep();
97  const int bufferIndexx = ((xmin - bufferstartx) * 4);
98 
99 #ifdef BLI_HAVE_SSE2
100  __m128 accum_r = _mm_load_ps(color_accum);
101  for (int ny = ymin; ny < ymax; ny += step) {
102  index = (ny - y) + this->m_filtersize;
103  int bufferindex = bufferIndexx + ((ny - bufferstarty) * 4 * bufferwidth);
104  const float multiplier = this->m_gausstab[index];
105  __m128 reg_a = _mm_load_ps(&buffer[bufferindex]);
106  reg_a = _mm_mul_ps(reg_a, this->m_gausstab_sse[index]);
107  accum_r = _mm_add_ps(accum_r, reg_a);
108  multiplier_accum += multiplier;
109  }
110  _mm_store_ps(color_accum, accum_r);
111 #else
112  for (int ny = ymin; ny < ymax; ny += step) {
113  index = (ny - y) + this->m_filtersize;
114  int bufferindex = bufferIndexx + ((ny - bufferstarty) * 4 * bufferwidth);
115  const float multiplier = this->m_gausstab[index];
116  madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier);
117  multiplier_accum += multiplier;
118  }
119 #endif
120  mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
121 }
122 
124  MemoryBuffer *outputMemoryBuffer,
125  cl_mem clOutputBuffer,
126  MemoryBuffer **inputMemoryBuffers,
127  std::list<cl_mem> *clMemToCleanUp,
128  std::list<cl_kernel> * /*clKernelsToCleanUp*/)
129 {
130  cl_kernel gaussianYBlurOperationKernel = device->COM_clCreateKernel(
131  "gaussianYBlurOperationKernel", nullptr);
132  cl_int filter_size = this->m_filtersize;
133 
134  cl_mem gausstab = clCreateBuffer(device->getContext(),
135  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
136  sizeof(float) * (this->m_filtersize * 2 + 1),
137  this->m_gausstab,
138  nullptr);
139 
140  device->COM_clAttachMemoryBufferToKernelParameter(gaussianYBlurOperationKernel,
141  0,
142  1,
143  clMemToCleanUp,
144  inputMemoryBuffers,
145  this->m_inputProgram);
147  gaussianYBlurOperationKernel, 2, clOutputBuffer);
149  gaussianYBlurOperationKernel, 3, outputMemoryBuffer);
150  clSetKernelArg(gaussianYBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
151  device->COM_clAttachSizeToKernelParameter(gaussianYBlurOperationKernel, 5, this);
152  clSetKernelArg(gaussianYBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
153 
154  device->COM_clEnqueueRange(gaussianYBlurOperationKernel, outputMemoryBuffer, 7, this);
155 
156  clReleaseMemObject(gausstab);
157 }
158 
160 {
162 
163  if (this->m_gausstab) {
164  MEM_freeN(this->m_gausstab);
165  this->m_gausstab = nullptr;
166  }
167 #ifdef BLI_HAVE_SSE2
168  if (this->m_gausstab_sse) {
169  MEM_freeN(this->m_gausstab_sse);
170  this->m_gausstab_sse = nullptr;
171  }
172 #endif
173 
174  deinitMutex();
175 }
176 
178  ReadBufferOperation *readOperation,
179  rcti *output)
180 {
181  rcti newInput;
182 
183  if (!m_sizeavailable) {
184  rcti sizeInput;
185  sizeInput.xmin = 0;
186  sizeInput.ymin = 0;
187  sizeInput.xmax = 5;
188  sizeInput.ymax = 5;
189  NodeOperation *operation = this->getInputOperation(1);
190  if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
191  return true;
192  }
193  }
194  {
195  if (this->m_sizeavailable && this->m_gausstab != nullptr) {
196  newInput.xmax = input->xmax;
197  newInput.xmin = input->xmin;
198  newInput.ymax = input->ymax + this->m_filtersize + 1;
199  newInput.ymin = input->ymin - this->m_filtersize - 1;
200  }
201  else {
202  newInput.xmax = this->getWidth();
203  newInput.xmin = 0;
204  newInput.ymax = this->getHeight();
205  newInput.ymin = 0;
206  }
207  return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
208  }
209 }
210 
211 } // namespace blender::compositor
#define ATTR_ALIGN(x)
MINLINE float max_ff(float a, float b)
MINLINE int min_ii(int a, int b)
MINLINE float min_ff(float a, float b)
MINLINE int max_ii(int a, int b)
MINLINE void mul_v4_v4fl(float r[3], const float a[4], float f)
MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
#define MAX_GAUSSTAB_RADIUS
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble ny
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
Read Guarded memory(de)allocation.
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Color
#define output
float * make_gausstab(float rad, int size)
void executePixel(float output[4], int x, int y, void *data) override
void initExecution() override
initialize the execution
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override
void executeOpenCL(OpenCLDevice *device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, std::list< cl_mem > *clMemToCleanUp, std::list< cl_kernel > *clKernelsToCleanUp) override
custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevic...
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int getWidth() const
get the width of this MemoryBuffer
float * getBuffer()
get the data of this MemoryBuffer
NodeOperation contains calculation logic.
virtual void * initializeTileData(rcti *)
NodeOperation * getInputOperation(unsigned int inputSocketindex)
virtual bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers)
cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, std::list< cl_mem > *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation)
cl_kernel COM_clCreateKernel(const char *kernelname, std::list< cl_kernel > *clKernelsToCleanUp)
DataType
possible data types for sockets
Definition: COM_defines.h:27
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:41
int ymin
Definition: DNA_vec_types.h:80
int ymax
Definition: DNA_vec_types.h:80
int xmin
Definition: DNA_vec_types.h:79
int xmax
Definition: DNA_vec_types.h:79
ccl_device_inline float3 ceil(const float3 &a)