30 this->m_gausstab =
nullptr;
32 this->m_gausstab_sse =
nullptr;
34 this->m_filtersize = 0;
61 this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
66 void GaussianXBlurOperation::updateGauss()
68 if (this->m_gausstab ==
nullptr) {
76 this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
83 float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
84 float multiplier_accum = 0.0f;
88 int bufferwidth = inputBuffer->
getWidth();
89 int bufferstartx = input_rect.
xmin;
90 int bufferstarty = input_rect.
ymin;
92 int xmin =
max_ii(
x - m_filtersize, input_rect.
xmin);
93 int xmax =
min_ii(
x + m_filtersize + 1, input_rect.
xmax);
98 int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth);
101 __m128 accum_r = _mm_load_ps(color_accum);
102 for (
int nx = xmin, index = (xmin -
x) + this->m_filtersize; nx < xmax;
103 nx += step, index += step) {
104 __m128 reg_a = _mm_load_ps(&
buffer[bufferindex]);
105 reg_a = _mm_mul_ps(reg_a, this->m_gausstab_sse[index]);
106 accum_r = _mm_add_ps(accum_r, reg_a);
107 multiplier_accum += this->m_gausstab[index];
108 bufferindex += offsetadd;
110 _mm_store_ps(color_accum, accum_r);
112 for (
int nx = xmin, index = (xmin -
x) + this->m_filtersize; nx < xmax;
113 nx += step, index += step) {
114 const float multiplier = this->m_gausstab[index];
116 multiplier_accum += multiplier;
117 bufferindex += offsetadd;
125 cl_mem clOutputBuffer,
127 std::list<cl_mem> *clMemToCleanUp,
128 std::list<cl_kernel> * )
131 "gaussianXBlurOperationKernel",
nullptr);
132 cl_int filter_size = this->m_filtersize;
134 cl_mem gausstab = clCreateBuffer(device->
getContext(),
135 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
136 sizeof(
float) * (this->m_filtersize * 2 + 1),
147 gaussianXBlurOperationKernel, 2, clOutputBuffer);
149 gaussianXBlurOperationKernel, 3, outputMemoryBuffer);
150 clSetKernelArg(gaussianXBlurOperationKernel, 4,
sizeof(cl_int), &filter_size);
152 clSetKernelArg(gaussianXBlurOperationKernel, 6,
sizeof(cl_mem), &gausstab);
154 device->
COM_clEnqueueRange(gaussianXBlurOperationKernel, outputMemoryBuffer, 7,
this);
156 clReleaseMemObject(gausstab);
163 if (this->m_gausstab) {
165 this->m_gausstab =
nullptr;
168 if (this->m_gausstab_sse) {
170 this->m_gausstab_sse =
nullptr;
196 newInput.
xmax = input->
xmax + this->m_filtersize + 1;
197 newInput.
xmin = input->
xmin - this->m_filtersize - 1;
MINLINE float max_ff(float a, float b)
MINLINE int min_ii(int a, int b)
MINLINE float min_ff(float a, float b)
MINLINE int max_ii(int a, int b)
MINLINE void mul_v4_v4fl(float r[3], const float a[4], float f)
MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
#define MAX_GAUSSTAB_RADIUS
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
Read Guarded memory(de)allocation.
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Color
void deinitExecution() override
float * make_gausstab(float rad, int size)
void initExecution() override
SocketReader * m_inputProgram
void initExecution() override
initialize the execution
void executePixel(float output[4], int x, int y, void *data) override
The inner loop of this operation.
void deinitExecution() override
Deinitialize the execution.
void executeOpenCL(OpenCLDevice *device, MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer, MemoryBuffer **inputMemoryBuffers, std::list< cl_mem > *clMemToCleanUp, std::list< cl_kernel > *clKernelsToCleanUp) override
custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevic...
void * initializeTileData(rcti *rect) override
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output) override
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int getWidth() const
get the width of this MemoryBuffer
float * getBuffer()
get the data of this MemoryBuffer
NodeOperation contains calculation logic.
virtual void * initializeTileData(rcti *)
unsigned int getHeight() const
NodeOperation * getInputOperation(unsigned int inputSocketindex)
unsigned int getWidth() const
virtual bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers)
cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, std::list< cl_mem > *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation)
cl_kernel COM_clCreateKernel(const char *kernelname, std::list< cl_kernel > *clKernelsToCleanUp)
DataType
possible data types for sockets
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
void(* MEM_freeN)(void *vmemh)
ccl_device_inline float3 ceil(const float3 &a)