Blender  V2.93
kernel_work_stealing.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __KERNEL_WORK_STEALING_H__
18 #define __KERNEL_WORK_STEALING_H__
19 
21 
22 /*
23  * Utility functions for work stealing
24  */
25 
26 /* Map global work index to tile, pixel X/Y and sample. */
28  uint global_work_index,
32 {
33 #ifdef __KERNEL_CUDA__
34  /* Keeping threads for the same pixel together improves performance on CUDA. */
35  uint sample_offset = global_work_index % tile->num_samples;
36  uint pixel_offset = global_work_index / tile->num_samples;
37 #else /* __KERNEL_CUDA__ */
38  uint tile_pixels = tile->w * tile->h;
39  uint sample_offset = global_work_index / tile_pixels;
40  uint pixel_offset = global_work_index - sample_offset * tile_pixels;
41 #endif /* __KERNEL_CUDA__ */
42  uint y_offset = pixel_offset / tile->w;
43  uint x_offset = pixel_offset - y_offset * tile->w;
44 
45  *x = tile->x + x_offset;
46  *y = tile->y + y_offset;
47  *sample = tile->start_sample + sample_offset;
48 }
49 
50 #ifdef __KERNEL_OPENCL__
51 # pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
52 #endif
53 
54 #ifdef __SPLIT_KERNEL__
55 /* Returns true if there is work */
56 ccl_device bool get_next_work_item(KernelGlobals *kg,
58  uint total_work_size,
59  uint ray_index,
60  ccl_private uint *global_work_index)
61 {
62  /* With a small amount of work there may be more threads than work due to
63  * rounding up of global size, stop such threads immediately. */
64  if (ray_index >= total_work_size) {
65  return false;
66  }
67 
68  /* Increase atomic work index counter in pool. */
69  uint pool = ray_index / WORK_POOL_SIZE;
71 
72  /* Map per-pool work index to a global work index. */
73  uint global_size = ccl_global_size(0) * ccl_global_size(1);
74  kernel_assert(global_size % WORK_POOL_SIZE == 0);
75  kernel_assert(ray_index < global_size);
76 
77  *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) +
78  (work_index % WORK_POOL_SIZE);
79 
80  /* Test if all work for this pool is done. */
81  return (*global_work_index < total_work_size);
82 }
83 
84 ccl_device bool get_next_work(KernelGlobals *kg,
86  uint total_work_size,
87  uint ray_index,
88  ccl_private uint *global_work_index)
89 {
90  bool got_work = false;
91  if (kernel_data.film.pass_adaptive_aux_buffer) {
92  do {
93  got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
94  if (got_work) {
96  uint x, y, sample;
97  get_work_pixel(tile, *global_work_index, &x, &y, &sample);
98  uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
99  ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
100  ccl_global float4 *aux = (ccl_global float4 *)(buffer +
101  kernel_data.film.pass_adaptive_aux_buffer);
102  if ((*aux).w == 0.0f) {
103  break;
104  }
105  }
106  } while (got_work);
107  }
108  else {
109  got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
110  }
111  return got_work;
112 }
113 #endif
114 
116 
117 #endif /* __KERNEL_WORK_STEALING_H__ */
unsigned int uint
Definition: BLI_sys_types.h:83
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
#define kernel_data
#define kernel_assert(cond)
#define ccl_global_size(d)
#define ccl_device
#define ccl_private
#define ccl_device_inline
#define ccl_global
#define CCL_NAMESPACE_END
#define kernel_split_params
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int * work_pools
#define WORK_POOL_SIZE
Definition: kernel_types.h:72
CCL_NAMESPACE_BEGIN ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, uint global_work_index, ccl_private uint *x, ccl_private uint *y, ccl_private uint *sample)
static void sample(SocketReader *reader, int x, int y, float color[4])
#define atomic_fetch_and_inc_uint32(p)
Definition: util_atomic.h:29