Blender  V2.93
COM_OpenCLDevice.cc
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * Copyright 2011, Blender Foundation.
17  */
18 
19 #include "COM_OpenCLDevice.h"
20 #include "COM_WorkScheduler.h"
21 
22 namespace blender::compositor {
23 
24 enum COM_VendorID { NVIDIA = 0x10DE, AMD = 0x1002 };
25 const cl_image_format IMAGE_FORMAT_COLOR = {
26  CL_RGBA,
27  CL_FLOAT,
28 };
29 const cl_image_format IMAGE_FORMAT_VECTOR = {
30  CL_RGB,
31  CL_FLOAT,
32 };
33 const cl_image_format IMAGE_FORMAT_VALUE = {
34  CL_R,
35  CL_FLOAT,
36 };
37 
39  cl_device_id device,
40  cl_program program,
41  cl_int vendorId)
42 {
43  this->m_device = device;
44  this->m_context = context;
45  this->m_program = program;
46  this->m_queue = nullptr;
47  this->m_vendorID = vendorId;
48 
49  cl_int error;
50  this->m_queue = clCreateCommandQueue(this->m_context, this->m_device, 0, &error);
51 }
52 
54  : m_context(other.m_context),
55  m_device(other.m_device),
56  m_program(other.m_program),
57  m_queue(other.m_queue),
58  m_vendorID(other.m_vendorID)
59 {
60  other.m_queue = nullptr;
61 }
62 
64 {
65  if (this->m_queue) {
66  clReleaseCommandQueue(this->m_queue);
67  }
68 }
69 
71 {
72  const unsigned int chunkNumber = work_package->chunk_number;
73  ExecutionGroup *executionGroup = work_package->execution_group;
74 
75  MemoryBuffer **inputBuffers = executionGroup->getInputBuffersOpenCL(chunkNumber);
76  MemoryBuffer *outputBuffer = executionGroup->allocateOutputBuffer(work_package->rect);
77 
78  executionGroup->getOutputOperation()->executeOpenCLRegion(
79  this, &work_package->rect, chunkNumber, inputBuffers, outputBuffer);
80 
81  delete outputBuffer;
82 
83  executionGroup->finalizeChunkExecution(chunkNumber, inputBuffers);
84 }
86  int parameterIndex,
87  int offsetIndex,
88  std::list<cl_mem> *cleanup,
89  MemoryBuffer **inputMemoryBuffers,
90  SocketReader *reader)
91 {
93  parameterIndex,
94  offsetIndex,
95  cleanup,
96  inputMemoryBuffers,
97  (ReadBufferOperation *)reader);
98 }
99 
100 const cl_image_format *OpenCLDevice::determineImageFormat(MemoryBuffer *memoryBuffer)
101 {
102  switch (memoryBuffer->get_num_channels()) {
103  case 1:
104  return &IMAGE_FORMAT_VALUE;
105  break;
106  case 3:
107  return &IMAGE_FORMAT_VECTOR;
108  break;
109  case 4:
110  return &IMAGE_FORMAT_COLOR;
111  break;
112  default:
113  BLI_assert(!"Unsupported num_channels.");
114  }
115 
116  return &IMAGE_FORMAT_COLOR;
117 }
118 
120  int parameterIndex,
121  int offsetIndex,
122  std::list<cl_mem> *cleanup,
123  MemoryBuffer **inputMemoryBuffers,
124  ReadBufferOperation *reader)
125 {
126  cl_int error;
127 
128  MemoryBuffer *result = reader->getInputMemoryBuffer(inputMemoryBuffers);
129 
130  const cl_image_format *imageFormat = determineImageFormat(result);
131 
132  cl_mem clBuffer = clCreateImage2D(this->m_context,
133  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
134  imageFormat,
135  result->getWidth(),
136  result->getHeight(),
137  0,
138  result->getBuffer(),
139  &error);
140 
141  if (error != CL_SUCCESS) {
142  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
143  }
144  if (error == CL_SUCCESS) {
145  cleanup->push_back(clBuffer);
146  }
147 
148  error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clBuffer);
149  if (error != CL_SUCCESS) {
150  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
151  }
152 
154  return clBuffer;
155 }
156 
158  int offsetIndex,
159  MemoryBuffer *memoryBuffer)
160 {
161  if (offsetIndex != -1) {
162  cl_int error;
163  const rcti &rect = memoryBuffer->get_rect();
164  cl_int2 offset = {{rect.xmin, rect.ymin}};
165 
166  error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
167  if (error != CL_SUCCESS) {
168  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
169  }
170  }
171 }
172 
174  int offsetIndex,
175  NodeOperation *operation)
176 {
177  if (offsetIndex != -1) {
178  cl_int error;
179  cl_int2 offset = {{(cl_int)operation->getWidth(), (cl_int)operation->getHeight()}};
180 
181  error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
182  if (error != CL_SUCCESS) {
183  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
184  }
185  }
186 }
187 
189  int parameterIndex,
190  cl_mem clOutputMemoryBuffer)
191 {
192  cl_int error;
193  error = clSetKernelArg(kernel, parameterIndex, sizeof(cl_mem), &clOutputMemoryBuffer);
194  if (error != CL_SUCCESS) {
195  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
196  }
197 }
198 
199 void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
200 {
201  cl_int error;
202  const size_t size[] = {
203  (size_t)outputMemoryBuffer->getWidth(),
204  (size_t)outputMemoryBuffer->getHeight(),
205  };
206 
207  error = clEnqueueNDRangeKernel(
208  this->m_queue, kernel, 2, nullptr, size, nullptr, 0, nullptr, nullptr);
209  if (error != CL_SUCCESS) {
210  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
211  }
212 }
213 
214 void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel,
215  MemoryBuffer *outputMemoryBuffer,
216  int offsetIndex,
217  NodeOperation *operation)
218 {
219  cl_int error;
220  const int width = outputMemoryBuffer->getWidth();
221  const int height = outputMemoryBuffer->getHeight();
222  int offsetx;
223  int offsety;
224  int localSize = 1024;
225  size_t size[2];
226  cl_int2 offset;
227 
228  if (this->m_vendorID == NVIDIA) {
229  localSize = 32;
230  }
231 
232  bool breaked = false;
233  for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
234  offset.s[1] = offsety;
235  if (offsety + localSize < height) {
236  size[1] = localSize;
237  }
238  else {
239  size[1] = height - offsety;
240  }
241 
242  for (offsetx = 0; offsetx < width && (!breaked); offsetx += localSize) {
243  if (offsetx + localSize < width) {
244  size[0] = localSize;
245  }
246  else {
247  size[0] = width - offsetx;
248  }
249  offset.s[0] = offsetx;
250 
251  error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
252  if (error != CL_SUCCESS) {
253  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
254  }
255  error = clEnqueueNDRangeKernel(
256  this->m_queue, kernel, 2, nullptr, size, nullptr, 0, nullptr, nullptr);
257  if (error != CL_SUCCESS) {
258  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
259  }
260  clFlush(this->m_queue);
261  if (operation->isBraked()) {
262  breaked = false;
263  }
264  }
265  }
266 }
267 
268 cl_kernel OpenCLDevice::COM_clCreateKernel(const char *kernelname,
269  std::list<cl_kernel> *clKernelsToCleanUp)
270 {
271  cl_int error;
272  cl_kernel kernel = clCreateKernel(this->m_program, kernelname, &error);
273  if (error != CL_SUCCESS) {
274  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
275  }
276  else {
277  if (clKernelsToCleanUp) {
278  clKernelsToCleanUp->push_back(kernel);
279  }
280  }
281  return kernel;
282 }
283 
284 } // namespace blender::compositor
#define BLI_assert(a)
Definition: BLI_assert.h:58
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
Class ExecutionGroup is a group of Operations that are executed as one. This grouping is used to comb...
MemoryBuffer * allocateOutputBuffer(rcti &rect)
allocate the outputbuffer of a chunk
MemoryBuffer ** getInputBuffersOpenCL(int chunkNumber)
get all inputbuffers needed to calculate an chunk
NodeOperation * getOutputOperation() const
get the output operation of this ExecutionGroup
void finalizeChunkExecution(int chunkNumber, MemoryBuffer **memoryBuffers)
after a chunk is executed the needed resources can be freed or unlocked.
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int getHeight() const
get the height of this MemoryBuffer
const int getWidth() const
get the width of this MemoryBuffer
NodeOperation contains calculation logic.
virtual void executeOpenCLRegion(OpenCLDevice *, rcti *, unsigned int, MemoryBuffer **, MemoryBuffer *)
when a chunk is executed by an OpenCLDevice, this method is called
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
static const cl_image_format * determineImageFormat(MemoryBuffer *memoryBuffer)
determine an image format
void COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel kernel, int offsetIndex, MemoryBuffer *memoryBuffers)
cl_mem COM_clAttachMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, int offsetIndex, std::list< cl_mem > *cleanup, MemoryBuffer **inputMemoryBuffers, SocketReader *reader)
void COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemoryBuffer)
void COM_clAttachOutputMemoryBufferToKernelParameter(cl_kernel kernel, int parameterIndex, cl_mem clOutputMemoryBuffer)
void COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offsetIndex, NodeOperation *operation)
cl_kernel COM_clCreateKernel(const char *kernelname, std::list< cl_kernel > *clKernelsToCleanUp)
OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendorId)
constructor with opencl device
void execute(WorkPackage *work) override
execute a WorkPackage
MemoryBuffer * getInputMemoryBuffer(MemoryBuffer **memoryBuffers) override
static void error(const char *str)
Definition: meshlaplacian.c:65
const cl_image_format IMAGE_FORMAT_COLOR
const cl_image_format IMAGE_FORMAT_VECTOR
const cl_image_format IMAGE_FORMAT_VALUE
contains data about work that can be scheduled
unsigned int chunk_number
number of the chunk to be executed
ExecutionGroup * execution_group
executionGroup with the operations-setup to be evaluated
int ymin
Definition: DNA_vec_types.h:80
int xmin
Definition: DNA_vec_types.h:79