Blender  V2.93
COM_WorkScheduler.cc
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * Copyright 2011, Blender Foundation.
17  */
18 
19 #include <cstdio>
20 #include <list>
21 
22 #include "COM_CPUDevice.h"
23 #include "COM_OpenCLDevice.h"
24 #include "COM_OpenCLKernels.cl.h"
25 #include "COM_WorkScheduler.h"
27 #include "COM_compositor.h"
28 
29 #include "clew.h"
30 
31 #include "MEM_guardedalloc.h"
32 
33 #include "BLI_task.h"
34 #include "BLI_threads.h"
35 #include "BLI_vector.hh"
36 #include "PIL_time.h"
37 
38 #include "BKE_global.h"
39 
40 namespace blender::compositor {
41 
42 enum class ThreadingModel {
46  Queue,
48  Task
49 };
50 
57 {
58  return ThreadingModel::Queue;
59 }
60 
64 constexpr bool COM_is_opencl_enabled()
65 {
67 }
68 
69 static ThreadLocal(CPUDevice *) g_thread_device;
70 static struct {
71  struct {
76 
79  bool initialized = false;
82  } queue;
83 
84  struct {
86  } task;
87 
88  struct {
90  cl_context context;
91  cl_program program;
98  bool active = false;
99  bool initialized = false;
102 
103 /* -------------------------------------------------------------------- */
107 static void CL_CALLBACK clContextError(const char *errinfo,
108  const void * /*private_info*/,
109  size_t /*cb*/,
110  void * /*user_data*/)
111 {
112  printf("OPENCL error: %s\n", errinfo);
113 }
114 
115 static void *thread_execute_gpu(void *data)
116 {
117  Device *device = (Device *)data;
118  WorkPackage *work;
119 
120  while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.opencl.queue))) {
121  device->execute(work);
122  }
123 
124  return nullptr;
125 }
126 
128 {
129  if (context.getHasActiveOpenCLDevices()) {
130  g_work_scheduler.opencl.queue = BLI_thread_queue_init();
131  BLI_threadpool_init(&g_work_scheduler.opencl.threads,
133  g_work_scheduler.opencl.devices.size());
134  for (Device &device : g_work_scheduler.opencl.devices) {
135  BLI_threadpool_insert(&g_work_scheduler.opencl.threads, &device);
136  }
137  g_work_scheduler.opencl.active = true;
138  }
139  else {
140  g_work_scheduler.opencl.active = false;
141  }
142 }
143 
144 static bool opencl_schedule(WorkPackage *package)
145 {
146  if (package->execution_group->get_flags().open_cl && g_work_scheduler.opencl.active) {
147  BLI_thread_queue_push(g_work_scheduler.opencl.queue, package);
148  return true;
149  }
150  return false;
151 }
152 
153 static void opencl_finish()
154 {
155  if (g_work_scheduler.opencl.active) {
157  }
158 }
159 
160 static void opencl_stop()
161 {
162  if (g_work_scheduler.opencl.active) {
164  BLI_threadpool_end(&g_work_scheduler.opencl.threads);
166  g_work_scheduler.opencl.queue = nullptr;
167  }
168 }
169 
171 {
172  return !g_work_scheduler.opencl.devices.is_empty();
173 }
174 
175 static void opencl_initialize(const bool use_opencl)
176 {
177  /* deinitialize OpenCL GPU's */
178  if (use_opencl && !g_work_scheduler.opencl.initialized) {
179  g_work_scheduler.opencl.context = nullptr;
180  g_work_scheduler.opencl.program = nullptr;
181 
182  /* This will check for errors and skip if already initialized. */
183  if (clewInit() != CLEW_SUCCESS) {
184  return;
185  }
186 
187  if (clCreateContextFromType) {
188  cl_uint numberOfPlatforms = 0;
189  cl_int error;
190  error = clGetPlatformIDs(0, nullptr, &numberOfPlatforms);
191  if (error == -1001) {
192  } /* GPU not supported */
193  else if (error != CL_SUCCESS) {
194  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
195  }
196  if (G.f & G_DEBUG) {
197  printf("%u number of platforms\n", numberOfPlatforms);
198  }
199  cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(
200  sizeof(cl_platform_id) * numberOfPlatforms, __func__);
201  error = clGetPlatformIDs(numberOfPlatforms, platforms, nullptr);
202  unsigned int indexPlatform;
203  for (indexPlatform = 0; indexPlatform < numberOfPlatforms; indexPlatform++) {
204  cl_platform_id platform = platforms[indexPlatform];
205  cl_uint numberOfDevices = 0;
206  clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &numberOfDevices);
207  if (numberOfDevices <= 0) {
208  continue;
209  }
210 
211  cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(
212  sizeof(cl_device_id) * numberOfDevices, __func__);
213  clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numberOfDevices, cldevices, nullptr);
214 
215  g_work_scheduler.opencl.context = clCreateContext(
216  nullptr, numberOfDevices, cldevices, clContextError, nullptr, &error);
217  if (error != CL_SUCCESS) {
218  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
219  }
220  const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, nullptr};
221  g_work_scheduler.opencl.program = clCreateProgramWithSource(
222  g_work_scheduler.opencl.context, 1, cl_str, nullptr, &error);
223  error = clBuildProgram(g_work_scheduler.opencl.program,
224  numberOfDevices,
225  cldevices,
226  nullptr,
227  nullptr,
228  nullptr);
229  if (error != CL_SUCCESS) {
230  cl_int error2;
231  size_t ret_val_size = 0;
232  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
233  error2 = clGetProgramBuildInfo(g_work_scheduler.opencl.program,
234  cldevices[0],
235  CL_PROGRAM_BUILD_LOG,
236  0,
237  nullptr,
238  &ret_val_size);
239  if (error2 != CL_SUCCESS) {
240  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
241  }
242  char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__);
243  error2 = clGetProgramBuildInfo(g_work_scheduler.opencl.program,
244  cldevices[0],
245  CL_PROGRAM_BUILD_LOG,
246  ret_val_size,
247  build_log,
248  nullptr);
249  if (error2 != CL_SUCCESS) {
250  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
251  }
252  build_log[ret_val_size] = '\0';
253  printf("%s", build_log);
254  MEM_freeN(build_log);
255  }
256  else {
257  unsigned int indexDevices;
258  for (indexDevices = 0; indexDevices < numberOfDevices; indexDevices++) {
259  cl_device_id device = cldevices[indexDevices];
260  cl_int vendorID = 0;
261  cl_int error2 = clGetDeviceInfo(
262  device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, nullptr);
263  if (error2 != CL_SUCCESS) {
264  printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2));
265  }
266  g_work_scheduler.opencl.devices.append(OpenCLDevice(g_work_scheduler.opencl.context,
267  device,
268  g_work_scheduler.opencl.program,
269  vendorID));
270  }
271  }
272  MEM_freeN(cldevices);
273  }
274  MEM_freeN(platforms);
275  }
276 
277  g_work_scheduler.opencl.initialized = true;
278  }
279 }
280 
281 static void opencl_deinitialize()
282 {
283  g_work_scheduler.opencl.devices.clear_and_make_inline();
284 
285  if (g_work_scheduler.opencl.program) {
286  clReleaseProgram(g_work_scheduler.opencl.program);
287  g_work_scheduler.opencl.program = nullptr;
288  }
289 
290  if (g_work_scheduler.opencl.context) {
291  clReleaseContext(g_work_scheduler.opencl.context);
292  g_work_scheduler.opencl.context = nullptr;
293  }
294 
295  g_work_scheduler.opencl.initialized = false;
296 }
297 
298 /* \} */
299 
300 /* -------------------------------------------------------------------- */
305 {
306  CPUDevice device(0);
307  device.execute(package);
308 }
309 
310 /* \} */
311 
312 /* -------------------------------------------------------------------- */
317 {
318  CPUDevice *device = (CPUDevice *)data;
319  WorkPackage *work;
320  BLI_thread_local_set(g_thread_device, device);
321  while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.queue.queue))) {
322  device->execute(work);
323  }
324 
325  return nullptr;
326 }
327 
329 {
330  BLI_thread_queue_push(g_work_scheduler.queue.queue, package);
331 }
332 
334 {
335  g_work_scheduler.queue.queue = BLI_thread_queue_init();
336  BLI_threadpool_init(&g_work_scheduler.queue.threads,
338  g_work_scheduler.queue.devices.size());
339  for (Device &device : g_work_scheduler.queue.devices) {
340  BLI_threadpool_insert(&g_work_scheduler.queue.threads, &device);
341  }
342 }
343 
345 {
347 }
348 
350 {
352  BLI_threadpool_end(&g_work_scheduler.queue.threads);
354  g_work_scheduler.queue.queue = nullptr;
355 }
356 
357 static void threading_model_queue_initialize(const int num_cpu_threads)
358 {
359  /* Reinitialize if number of threads doesn't match. */
360  if (g_work_scheduler.queue.devices.size() != num_cpu_threads) {
361  g_work_scheduler.queue.devices.clear();
362  if (g_work_scheduler.queue.initialized) {
363  BLI_thread_local_delete(g_thread_device);
364  g_work_scheduler.queue.initialized = false;
365  }
366  }
367 
368  /* Initialize CPU threads. */
369  if (!g_work_scheduler.queue.initialized) {
370  for (int index = 0; index < num_cpu_threads; index++) {
371  g_work_scheduler.queue.devices.append(CPUDevice(index));
372  }
373  BLI_thread_local_create(g_thread_device);
374  g_work_scheduler.queue.initialized = true;
375  }
376 }
378 {
379  /* deinitialize CPU threads */
380  if (g_work_scheduler.queue.initialized) {
381  g_work_scheduler.queue.devices.clear_and_make_inline();
382 
383  BLI_thread_local_delete(g_thread_device);
384  g_work_scheduler.queue.initialized = false;
385  }
386 }
387 
388 /* \} */
389 
390 /* -------------------------------------------------------------------- */
394 static void threading_model_task_execute(TaskPool *__restrict UNUSED(pool), void *task_data)
395 {
396  WorkPackage *package = static_cast<WorkPackage *>(task_data);
397  CPUDevice device(BLI_task_parallel_thread_id(nullptr));
398  BLI_thread_local_set(g_thread_device, &device);
399  device.execute(package);
400 }
401 
403 {
405  g_work_scheduler.task.pool, threading_model_task_execute, package, false, nullptr);
406 }
407 
409 {
410  BLI_thread_local_create(g_thread_device);
412 }
413 
415 {
417 }
418 
420 {
422  g_work_scheduler.task.pool = nullptr;
423  BLI_thread_local_delete(g_thread_device);
424 }
425 
426 /* \} */
427 
428 /* -------------------------------------------------------------------- */
433 {
434  if (COM_is_opencl_enabled()) {
435  if (opencl_schedule(package)) {
436  return;
437  }
438  }
439 
440  switch (COM_threading_model()) {
443  break;
444  }
445 
446  case ThreadingModel::Queue: {
448  break;
449  }
450 
451  case ThreadingModel::Task: {
453  break;
454  }
455  }
456 }
457 
459 {
460  if (COM_is_opencl_enabled()) {
462  }
463 
464  switch (COM_threading_model()) {
466  /* Nothing to do. */
467  break;
468 
471  break;
472 
475  break;
476  }
477 }
478 
480 {
481  if (COM_is_opencl_enabled()) {
482  opencl_finish();
483  }
484 
485  switch (COM_threading_model()) {
487  /* Nothing to do. */
488  break;
489 
492  break;
493 
496  break;
497  }
498 }
499 
501 {
502  if (COM_is_opencl_enabled()) {
503  opencl_stop();
504  }
505 
506  switch (COM_threading_model()) {
508  /* Nothing to do. */
509  break;
510 
513  break;
514 
517  break;
518  }
519 }
520 
522 {
523  if (COM_is_opencl_enabled()) {
524  return opencl_has_gpu_devices();
525  }
526  return false;
527 }
528 
529 void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
530 {
531  if (COM_is_opencl_enabled()) {
532  opencl_initialize(use_opencl);
533  }
534 
535  switch (COM_threading_model()) {
537  /* Nothing to do. */
538  break;
539 
541  threading_model_queue_initialize(num_cpu_threads);
542  break;
543 
545  /* Nothing to do. */
546  break;
547  }
548 }
549 
551 {
552  if (COM_is_opencl_enabled()) {
554  }
555 
556  switch (COM_threading_model()) {
558  /* Nothing to do. */
559  break;
560 
563  break;
564 
566  /* Nothing to do. */
567  break;
568  }
569 }
570 
572 {
574  return 0;
575  }
576 
577  CPUDevice *device = (CPUDevice *)BLI_thread_local_get(g_thread_device);
578  return device->thread_id();
579 }
580 
581 /* \} */
582 
583 } // namespace blender::compositor
@ G_DEBUG
Definition: BKE_global.h:133
void BLI_task_pool_work_and_wait(TaskPool *pool)
Definition: task_pool.cc:496
int BLI_task_parallel_thread_id(const TaskParallelTLS *tls)
TaskPool * BLI_task_pool_create(void *userdata, TaskPriority priority)
Definition: task_pool.cc:406
void BLI_task_pool_free(TaskPool *pool)
Definition: task_pool.cc:456
@ TASK_PRIORITY_HIGH
Definition: BLI_task.h:67
void BLI_task_pool_push(TaskPool *pool, TaskRunFunction run, void *taskdata, bool free_taskdata, TaskFreeFunction freedata)
Definition: task_pool.cc:475
void BLI_thread_queue_push(ThreadQueue *queue, void *work)
Definition: threads.cc:669
#define BLI_thread_local_create(name)
Definition: BLI_threads.h:192
#define BLI_thread_local_set(name, value)
Definition: BLI_threads.h:195
#define BLI_thread_local_delete(name)
Definition: BLI_threads.h:193
void BLI_threadpool_init(struct ListBase *threadbase, void *(*do_thread)(void *), int tot)
Definition: threads.cc:159
void BLI_thread_queue_free(ThreadQueue *queue)
Definition: threads.cc:657
#define BLI_thread_local_get(name)
Definition: BLI_threads.h:194
void BLI_threadpool_end(struct ListBase *threadbase)
Definition: threads.cc:289
void BLI_thread_queue_nowait(ThreadQueue *queue)
Definition: threads.cc:795
void BLI_thread_queue_wait_finish(ThreadQueue *queue)
Definition: threads.cc:806
void * BLI_thread_queue_pop(ThreadQueue *queue)
Definition: threads.cc:680
void BLI_threadpool_insert(struct ListBase *threadbase, void *callerdata)
Definition: threads.cc:239
ThreadQueue * BLI_thread_queue_init(void)
Definition: threads.cc:643
#define UNUSED(x)
Read Guarded memory(de)allocation.
Platform independent time functions.
class representing a CPU device.
Definition: COM_CPUDevice.h:30
void execute(WorkPackage *work) override
execute a WorkPackage
Overall context of the compositor.
Abstract class for device implementations to be used by the Compositor. devices are queried,...
Definition: COM_Device.h:30
virtual void execute(struct WorkPackage *work)=0
execute a WorkPackage
const ExecutionGroupFlags get_flags() const
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:41
void *(* MEM_mallocN)(size_t len, const char *str)
Definition: mallocn.c:47
static void error(const char *str)
Definition: meshlaplacian.c:65
static void opencl_deinitialize()
static void opencl_initialize(const bool use_opencl)
static bool opencl_has_gpu_devices()
static void threading_model_queue_deinitialize()
static void * thread_execute_gpu(void *data)
ThreadQueue * queue
all scheduled work for the cpu
static void CL_CALLBACK clContextError(const char *errinfo, const void *, size_t, void *)
constexpr ThreadingModel COM_threading_model()
static void threading_model_task_execute(TaskPool *__restrict UNUSED(pool), void *task_data)
static void threading_model_single_thread_execute(WorkPackage *package)
static void threading_model_queue_stop()
static void opencl_start(CompositorContext &context)
static ThreadLocal(CPUDevice *) g_thread_device
bool active
all scheduled work for the GPU.
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
ListBase threads
list of all thread for every CPUDevice in cpudevices a thread exists.
static void threading_model_queue_initialize(const int num_cpu_threads)
static bool opencl_schedule(WorkPackage *package)
static void threading_model_queue_schedule(WorkPackage *package)
static void opencl_finish()
static void threading_model_task_stop()
static struct blender::compositor::@172 g_work_scheduler
static void threading_model_queue_start()
struct blender::compositor::@172::@174 task
constexpr bool COM_is_opencl_enabled()
static void threading_model_task_start()
static void * threading_model_queue_execute(void *data)
static void threading_model_task_finish()
static void threading_model_queue_finish()
static void threading_model_task_schedule(WorkPackage *package)
struct blender::compositor::@172::@175 opencl
contains data about work that can be scheduled
ExecutionGroup * execution_group
executionGroup with the operations-setup to be evaluated
static void schedule(WorkPackage *package)
schedule a chunk of a group to be calculated. An execution group schedules a chunk in the WorkSchedul...
static void deinitialize()
deinitialize the WorkScheduler free all allocated resources
static bool has_gpu_devices()
Are there OpenCL capable GPU devices initialized? the result of this method is stored in the Composit...
static void finish()
wait for all work to be completed.
static void stop()
stop the execution All created thread by the start method are destroyed.
static void start(CompositorContext &context)
Start the execution this methods will start the WorkScheduler. Inside this method all threads are ini...
static void initialize(bool use_opencl, int num_cpu_threads)
initialize the WorkScheduler
#define G(x, y, z)