Blender V4.5
oneapi/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Intel Corporation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_ONEAPI
6
7# include "device/oneapi/queue.h"
9# include "util/log.h"
10
12
14
15struct KernelExecutionInfo {
16 double elapsed_summary = 0.0;
17 int enqueue_count = 0;
18};
19
20/* OneapiDeviceQueue */
21
22OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
23 : DeviceQueue(device), oneapi_device_(device)
24{
25}
26
27int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
28{
29 int num_states = 4 * num_concurrent_busy_states(state_size);
30
31 VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
33
34 return num_states;
35}
36
37int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) const
38{
39 const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
40 oneapi_device_->get_max_num_threads_per_multiprocessor();
41
42 return 4 * max(8 * max_num_threads, 65536);
43}
44
45int OneapiDeviceQueue::num_sort_partitions(int max_num_paths, uint /*max_scene_shaders*/) const
46{
47 int sort_partition_elements = (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ?
48 65536 :
49 8192;
50 /* Sort partitioning with local sorting on Intel GPUs is currently the most effective solution no
51 * matter the number of shaders. */
52 return max(max_num_paths / sort_partition_elements, 1);
53}
54
55void OneapiDeviceQueue::init_execution()
56{
57 oneapi_device_->load_texture_info();
58
59 SyclQueue *device_queue = oneapi_device_->sycl_queue();
60 void *kg_dptr = oneapi_device_->kernel_globals_device_pointer();
61 assert(device_queue);
62 assert(kg_dptr);
63 kernel_context_ = make_unique<KernelContext>();
64 kernel_context_->queue = device_queue;
65 kernel_context_->kernel_globals = kg_dptr;
66
67 debug_init_execution();
68}
69
70bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
71 const int signed_kernel_work_size,
72 const DeviceKernelArguments &_args)
73{
74 if (oneapi_device_->have_error()) {
75 return false;
76 }
77
78 /* Update texture info in case memory moved to host. */
79 if (oneapi_device_->load_texture_info()) {
80 if (!synchronize()) {
81 return false;
82 }
83 }
84
85 void **args = const_cast<void **>(_args.values);
86
87 debug_enqueue_begin(kernel, signed_kernel_work_size);
88 assert(signed_kernel_work_size >= 0);
89 size_t kernel_global_size = (size_t)signed_kernel_work_size;
90 size_t kernel_local_size;
91
92 assert(kernel_context_);
93 kernel_context_->scene_max_shaders = oneapi_device_->scene_max_shaders();
94
95 oneapi_device_->get_adjusted_global_and_local_sizes(
96 kernel_context_->queue, kernel, kernel_global_size, kernel_local_size);
97
98 /* Call the oneAPI kernel DLL to launch the requested kernel. */
99 bool is_finished_ok = oneapi_device_->enqueue_kernel(
100 kernel_context_.get(), kernel, kernel_global_size, kernel_local_size, args);
101
102 if (is_finished_ok == false) {
103 oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
104 "\" execution error: got runtime exception \"" +
105 oneapi_device_->oneapi_error_message() + "\"");
106 }
107
108 debug_enqueue_end();
109
110 return is_finished_ok;
111}
112
113bool OneapiDeviceQueue::synchronize()
114{
115 if (oneapi_device_->have_error()) {
116 return false;
117 }
118
119 bool is_finished_ok = oneapi_device_->queue_synchronize(oneapi_device_->sycl_queue());
120 if (is_finished_ok == false) {
121 oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
122 oneapi_device_->oneapi_error_message() + "\"");
123 }
124
125 debug_synchronize();
126
127 return !(oneapi_device_->have_error());
128}
129
130void OneapiDeviceQueue::zero_to_device(device_memory &mem)
131{
132 oneapi_device_->mem_zero(mem);
133}
134
135void OneapiDeviceQueue::copy_to_device(device_memory &mem)
136{
137 oneapi_device_->mem_copy_to(mem);
138}
139
140void OneapiDeviceQueue::copy_from_device(device_memory &mem)
141{
142 oneapi_device_->mem_copy_from(mem);
143}
144
146
147#endif /* WITH_ONEAPI */
unsigned int uint
#define CCL_NAMESPACE_END
const char * device_kernel_as_string(DeviceKernel kernel)
#define assert(assertion)
const int num_states
DeviceKernel
#define VLOG_DEVICE_STATS
Definition log.h:77
string string_human_readable_size(size_t size)
Definition string.cpp:257
void * values[MAX_ARGS]
max
Definition text_draw.cc:251