Blender  V2.93
kernel_queues.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __KERNEL_QUEUE_H__
18 #define __KERNEL_QUEUE_H__
19 
21 
22 /*
23  * Queue utility functions for split kernel
24  */
25 #ifdef __KERNEL_OPENCL__
26 # pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
27 # pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
28 #endif
29 
30 /*
31  * Enqueue ray index into the queue
32  */
34  int ray_index, /* Ray index to be enqueued. */
35  int queue_number, /* Queue in which the ray index should be enqueued. */
36  ccl_global int *queues, /* Buffer of all queues. */
37  int queue_size, /* Size of each queue. */
38  ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
39 {
40  /* This thread's queue index. */
41  int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint *)&queue_index[queue_number]) +
42  (queue_number * queue_size);
43  queues[my_queue_index] = ray_index;
44 }
45 
46 /*
47  * Get the ray index for this thread
48  * Returns a positive ray_index for threads that have to do some work;
49  * Returns 'QUEUE_EMPTY_SLOT' for threads that don't have any work
50  * i.e All ray's in the queue has been successfully allocated and there
51  * is no more ray to allocate to other threads.
52  */
54  KernelGlobals *kg,
55  int thread_index, /* Global thread index. */
56  int queue_number, /* Queue to operate on. */
57  ccl_global int *queues, /* Buffer of all queues. */
58  int queuesize, /* Size of a queue. */
59  int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */
60 {
61  int ray_index = queues[queue_number * queuesize + thread_index];
62  if (empty_queue && ray_index != QUEUE_EMPTY_SLOT) {
63  queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
64  }
65  return ray_index;
66 }
67 
68 /* The following functions are to realize Local memory variant of enqueue ray index function. */
69 
70 /* All threads should call this function. */
72  int ray_index, /* Ray index to enqueue. */
73  int queue_number, /* Queue in which to enqueue ray index. */
74  char enqueue_flag, /* True for threads whose ray index has to be enqueued. */
75  int queuesize, /* queue size. */
76  ccl_local_param unsigned int *local_queue_atomics, /* To do local queue atomics. */
77  ccl_global int *Queue_data, /* Queues. */
78  ccl_global int *Queue_index) /* To do global queue atomics. */
79 {
80  int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
81 
82  /* Get local queue id .*/
83  unsigned int lqidx;
84  if (enqueue_flag) {
85  lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics);
86  }
88 
89  /* Get global queue offset. */
90  if (lidx == 0) {
91  *local_queue_atomics = atomic_fetch_and_add_uint32(
92  (ccl_global uint *)&Queue_index[queue_number], *local_queue_atomics);
93  }
95 
96  /* Get global queue index and enqueue ray. */
97  if (enqueue_flag) {
98  unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx;
99  Queue_data[my_gqidx] = ray_index;
100  }
101 }
102 
104  int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
105  ccl_local_param unsigned int *local_queue_atomics)
106 {
107  int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]);
108  return my_lqidx;
109 }
110 
112  int queue_number,
113  ccl_local_param unsigned int *local_queue_atomics,
114  ccl_global int *global_queue_atomics)
115 {
116  unsigned int queue_offset = atomic_fetch_and_add_uint32(
117  (ccl_global uint *)&global_queue_atomics[queue_number], local_queue_atomics[queue_number]);
118  return queue_offset;
119 }
120 
122  int queue_number,
123  int queuesize,
124  unsigned int lqidx,
125  ccl_local_param unsigned int *global_per_queue_offset)
126 {
127  int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number];
128  return my_gqidx;
129 }
130 
131 ccl_device int dequeue_ray_index(int queue_number,
132  ccl_global int *queues,
133  int queue_size,
134  ccl_global int *queue_index)
135 {
136  int index = atomic_fetch_and_dec_uint32((ccl_global uint *)&queue_index[queue_number]) - 1;
137 
138  if (index < 0) {
139  return QUEUE_EMPTY_SLOT;
140  }
141 
142  return queues[index + queue_number * queue_size];
143 }
144 
146 
147 #endif // __KERNEL_QUEUE_H__
unsigned int uint
Definition: BLI_sys_types.h:83
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_local_param
#define ccl_device
ccl_device_inline uint ccl_local_id(uint d)
ccl_device_inline uint ccl_local_size(uint d)
#define ccl_global
#define CCL_NAMESPACE_END
ccl_device unsigned int get_global_queue_index(int queue_number, int queuesize, unsigned int lqidx, ccl_local_param unsigned int *global_per_queue_offset)
ccl_device unsigned int get_local_queue_index(int queue_number, ccl_local_param unsigned int *local_queue_atomics)
CCL_NAMESPACE_BEGIN ccl_device void enqueue_ray_index(int ray_index, int queue_number, ccl_global int *queues, int queue_size, ccl_global int *queue_index)
Definition: kernel_queues.h:33
ccl_device int dequeue_ray_index(int queue_number, ccl_global int *queues, int queue_size, ccl_global int *queue_index)
ccl_device int get_ray_index(KernelGlobals *kg, int thread_index, int queue_number, ccl_global int *queues, int queuesize, int empty_queue)
Definition: kernel_queues.h:53
ccl_device unsigned int get_global_per_queue_offset(int queue_number, ccl_local_param unsigned int *local_queue_atomics, ccl_global int *global_queue_atomics)
ccl_device void enqueue_ray_index_local(int ray_index, int queue_number, char enqueue_flag, int queuesize, ccl_local_param unsigned int *local_queue_atomics, ccl_global int *Queue_data, ccl_global int *Queue_index)
Definition: kernel_queues.h:71
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int * queue_index
#define QUEUE_EMPTY_SLOT
#define CCL_LOCAL_MEM_FENCE
Definition: util_atomic.h:32
#define atomic_fetch_and_dec_uint32(p)
Definition: util_atomic.h:30
#define atomic_fetch_and_inc_uint32(p)
Definition: util_atomic.h:29
#define ccl_barrier(flags)
Definition: util_atomic.h:33