Blender  V2.93
COM_GaussianXBlurOperation.cc
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * Copyright 2011, Blender Foundation.
17  */
18 
20 #include "BLI_math.h"
21 #include "COM_OpenCLDevice.h"
22 #include "MEM_guardedalloc.h"
23 
24 #include "RE_pipeline.h"
25 
26 namespace blender::compositor {
27 
29 {
30  this->m_gausstab = nullptr;
31 #ifdef BLI_HAVE_SSE2
32  this->m_gausstab_sse = nullptr;
33 #endif
34  this->m_filtersize = 0;
35 }
36 
38 {
39  lockMutex();
40  if (!this->m_sizeavailable) {
41  updateGauss();
42  }
43  void *buffer = getInputOperation(0)->initializeTileData(nullptr);
44  unlockMutex();
45  return buffer;
46 }
47 
49 {
51 
52  initMutex();
53 
54  if (this->m_sizeavailable) {
55  float rad = max_ff(m_size * m_data.sizex, 0.0f);
56  m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
57 
58  /* TODO(sergey): De-duplicate with the case below and Y blur. */
59  this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
60 #ifdef BLI_HAVE_SSE2
61  this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
62 #endif
63  }
64 }
65 
66 void GaussianXBlurOperation::updateGauss()
67 {
68  if (this->m_gausstab == nullptr) {
69  updateSize();
70  float rad = max_ff(m_size * m_data.sizex, 0.0f);
71  rad = min_ff(rad, MAX_GAUSSTAB_RADIUS);
72  m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
73 
74  this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
75 #ifdef BLI_HAVE_SSE2
76  this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
77 #endif
78  }
79 }
80 
81 void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *data)
82 {
83  float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
84  float multiplier_accum = 0.0f;
85  MemoryBuffer *inputBuffer = (MemoryBuffer *)data;
86  const rcti &input_rect = inputBuffer->get_rect();
87  float *buffer = inputBuffer->getBuffer();
88  int bufferwidth = inputBuffer->getWidth();
89  int bufferstartx = input_rect.xmin;
90  int bufferstarty = input_rect.ymin;
91 
92  int xmin = max_ii(x - m_filtersize, input_rect.xmin);
93  int xmax = min_ii(x + m_filtersize + 1, input_rect.xmax);
94  int ymin = max_ii(y, input_rect.ymin);
95 
96  int step = getStep();
97  int offsetadd = getOffsetAdd();
98  int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth);
99 
100 #ifdef BLI_HAVE_SSE2
101  __m128 accum_r = _mm_load_ps(color_accum);
102  for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax;
103  nx += step, index += step) {
104  __m128 reg_a = _mm_load_ps(&buffer[bufferindex]);
105  reg_a = _mm_mul_ps(reg_a, this->m_gausstab_sse[index]);
106  accum_r = _mm_add_ps(accum_r, reg_a);
107  multiplier_accum += this->m_gausstab[index];
108  bufferindex += offsetadd;
109  }
110  _mm_store_ps(color_accum, accum_r);
111 #else
112  for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax;
113  nx += step, index += step) {
114  const float multiplier = this->m_gausstab[index];
115  madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier);
116  multiplier_accum += multiplier;
117  bufferindex += offsetadd;
118  }
119 #endif
120  mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
121 }
122 
124  MemoryBuffer *outputMemoryBuffer,
125  cl_mem clOutputBuffer,
126  MemoryBuffer **inputMemoryBuffers,
127  std::list<cl_mem> *clMemToCleanUp,
128  std::list<cl_kernel> * /*clKernelsToCleanUp*/)
129 {
130  cl_kernel gaussianXBlurOperationKernel = device->COM_clCreateKernel(
131  "gaussianXBlurOperationKernel", nullptr);
132  cl_int filter_size = this->m_filtersize;
133 
134  cl_mem gausstab = clCreateBuffer(device->getContext(),
135  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
136  sizeof(float) * (this->m_filtersize * 2 + 1),
137  this->m_gausstab,
138  nullptr);
139 
140  device->COM_clAttachMemoryBufferToKernelParameter(gaussianXBlurOperationKernel,
141  0,
142  1,
143  clMemToCleanUp,
144  inputMemoryBuffers,
145  this->m_inputProgram);
147  gaussianXBlurOperationKernel, 2, clOutputBuffer);
149  gaussianXBlurOperationKernel, 3, outputMemoryBuffer);
150  clSetKernelArg(gaussianXBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
151  device->COM_clAttachSizeToKernelParameter(gaussianXBlurOperationKernel, 5, this);
152  clSetKernelArg(gaussianXBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
153 
154  device->COM_clEnqueueRange(gaussianXBlurOperationKernel, outputMemoryBuffer, 7, this);
155 
156  clReleaseMemObject(gausstab);
157 }
158 
160 {
162 
163  if (this->m_gausstab) {
164  MEM_freeN(this->m_gausstab);
165  this->m_gausstab = nullptr;
166  }
167 #ifdef BLI_HAVE_SSE2
168  if (this->m_gausstab_sse) {
169  MEM_freeN(this->m_gausstab_sse);
170  this->m_gausstab_sse = nullptr;
171  }
172 #endif
173 
174  deinitMutex();
175 }
176 
178  ReadBufferOperation *readOperation,
179  rcti *output)
180 {
181  rcti newInput;
182 
183  if (!this->m_sizeavailable) {
184  rcti sizeInput;
185  sizeInput.xmin = 0;
186  sizeInput.ymin = 0;
187  sizeInput.xmax = 5;
188  sizeInput.ymax = 5;
189  NodeOperation *operation = this->getInputOperation(1);
190  if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
191  return true;
192  }
193  }
194  {
195  if (this->m_sizeavailable && this->m_gausstab != nullptr) {
196  newInput.xmax = input->xmax + this->m_filtersize + 1;
197  newInput.xmin = input->xmin - this->m_filtersize - 1;
198  newInput.ymax = input->ymax;
199  newInput.ymin = input->ymin;
200  }
201  else {
202  newInput.xmax = this->getWidth();
203  newInput.xmin = 0;
204  newInput.ymax = this->getHeight();
205  newInput.ymin = 0;
206  }
207  return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
208  }
209 }
210 
211 } // namespace blender::compositor
#define ATTR_ALIGN(x)
MINLINE float max_ff(float a, float b)
MINLINE int min_ii(int a, int b)
MINLINE float min_ff(float a, float b)
MINLINE int max_ii(int a, int b)
MINLINE void mul_v4_v4fl(float r[3], const float a[4], float f)
MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
#define MAX_GAUSSTAB_RADIUS
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
Read Guarded memory(de)allocation.
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Color
#define output
float * make_gausstab(float rad, int size)
void initExecution() override
initialize the execution
void executePixel(float output[4], int x, int y, void *data) override
The inner loop of this operation.
void deinitExecution() override
Deinitialize the execution.
void executeOpenCL(OpenCLDevice *device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, std::list< cl_mem > *clMemToCleanUp, std::list< cl_kernel > *clKernelsToCleanUp) override
custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevic...
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int getWidth() const
get the width of this MemoryBuffer
float * getBuffer()
get the data of this MemoryBuffer
NodeOperation contains calculation logic.
virtual void * initializeTileData(rcti *)
NodeOperation * getInputOperation(unsigned int inputSocketindex)
virtual bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers)
cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, std::list< cl_mem > *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation)
cl_kernel COM_clCreateKernel(const char *kernelname, std::list< cl_kernel > *clKernelsToCleanUp)
DataType
possible data types for sockets
Definition: COM_defines.h:27
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:41
int ymin
Definition: DNA_vec_types.h:80
int ymax
Definition: DNA_vec_types.h:80
int xmin
Definition: DNA_vec_types.h:79
int xmax
Definition: DNA_vec_types.h:79
ccl_device_inline float3 ceil(const float3 &a)