Blender V4.5
vk_device_submission.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <chrono>
10#include <condition_variable>
11#include <thread>
12
13#include "BLI_mutex.hh"
14#include "BLI_task.h"
15
16#include "vk_device.hh"
17
18#include "CLG_log.h"
19
20static CLG_LogRef LOG = {"gpu.vulkan"};
21
22namespace blender::gpu {
23
24/* -------------------------------------------------------------------- */
27
33
44
46 VKDiscardPool &context_discard_pool,
47 bool submit_to_device,
48 bool wait_for_completion,
49 VkPipelineStageFlags wait_dst_stage_mask,
50 VkSemaphore wait_semaphore,
51 VkSemaphore signal_semaphore,
52 VkFence signal_fence)
53{
54 if (render_graph->is_empty()) {
55 render_graph->reset();
56 BLI_thread_queue_push(unused_render_graphs_, render_graph);
57 return 0;
58 }
59
60 VKRenderGraphSubmitTask *submit_task = MEM_new<VKRenderGraphSubmitTask>(__func__);
61 submit_task->render_graph = render_graph;
62 submit_task->submit_to_device = submit_to_device;
63 submit_task->wait_dst_stage_mask = wait_dst_stage_mask;
64 submit_task->wait_semaphore = wait_semaphore;
65 submit_task->signal_semaphore = signal_semaphore;
66 submit_task->signal_fence = signal_fence;
67 submit_task->wait_for_submission = nullptr;
68
69 /* We need to wait for submission as otherwise the signal semaphore can still not be in an
70 * initial state. */
71 const bool wait_for_submission = signal_semaphore != VK_NULL_HANDLE && !wait_for_completion;
72 VKRenderGraphWait wait_condition{};
73 if (wait_for_submission) {
74 submit_task->wait_for_submission = &wait_condition;
75 }
76 TimelineValue timeline = 0;
77 {
78 std::scoped_lock lock(orphaned_data.mutex_get());
79 timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ : timeline_value_ + 1;
80 orphaned_data.timeline_ = timeline;
81 orphaned_data.move_data(context_discard_pool, timeline);
82 BLI_thread_queue_push(submitted_render_graphs_, submit_task);
83 }
84 submit_task = nullptr;
85
86 if (wait_for_submission) {
87 std::unique_lock<blender::Mutex> lock(wait_condition.is_submitted_mutex);
88 wait_condition.is_submitted_condition.wait(lock, [&] { return wait_condition.is_submitted; });
89 }
90
91 if (wait_for_completion) {
92 wait_for_timeline(timeline);
93 }
94 return timeline;
95}
96
98{
99 if (timeline == 0) {
100 return;
101 }
102 VkSemaphoreWaitInfo vk_semaphore_wait_info = {
103 VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, nullptr, 0, 1, &vk_timeline_semaphore_, &timeline};
104 vkWaitSemaphores(vk_device_, &vk_semaphore_wait_info, UINT64_MAX);
105}
106
108{
109 std::scoped_lock lock(*queue_mutex_);
110 vkQueueWaitIdle(vk_queue_);
111}
112
114{
116 BLI_thread_queue_pop_timeout(unused_render_graphs_, 0));
117 if (render_graph) {
118 return render_graph;
119 }
120
121 std::scoped_lock lock(resources.mutex);
122 render_graph = MEM_new<render_graph::VKRenderGraph>(__func__, resources);
123 render_graphs_.append(render_graph);
124 return render_graph;
125}
126
127void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
128{
129 CLOG_INFO(&LOG, 3, "submission runner has started");
130 UNUSED_VARS(task_data);
131
132 VKDevice *device = static_cast<VKDevice *>(BLI_task_pool_user_data(pool));
133 VkCommandPool vk_command_pool = VK_NULL_HANDLE;
134 VkCommandPoolCreateInfo vk_command_pool_create_info = {
135 VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
136 nullptr,
137 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
138 device->vk_queue_family_};
139 vkCreateCommandPool(device->vk_device_, &vk_command_pool_create_info, nullptr, &vk_command_pool);
140
142 render_graph::VKCommandBuilder command_builder;
143 Vector<VkCommandBuffer> command_buffers_unused;
144 TimelineResources<VkCommandBuffer> command_buffers_in_use;
145 VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
146 Vector<VkCommandBuffer> unsubmitted_command_buffers;
147 Vector<VkSubmitInfo> submit_infos;
148 submit_infos.reserve(2);
149 std::optional<render_graph::VKCommandBufferWrapper> command_buffer;
150 uint64_t previous_gc_timeline = 0;
151
152 CLOG_INFO(&LOG, 3, "submission runner initialized");
153 while (!BLI_task_pool_current_canceled(pool)) {
154 VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
155 BLI_thread_queue_pop_timeout(device->submitted_render_graphs_, 1));
156 if (submit_task == nullptr) {
157 continue;
158 }
159 uint64_t current_timeline = device->submission_finished_timeline_get();
160 if (assign_if_different(previous_gc_timeline, current_timeline)) {
162 }
163
164 /* End current command buffer when we need to wait for a semaphore. In this case all previous
165 * recorded commands can run before the wait semaphores. The commands that must be guarded by
166 * the semaphores are part of the new submitted render graph. */
167 if (submit_task->wait_semaphore != VK_NULL_HANDLE && command_buffer.has_value()) {
168 command_buffer->end_recording();
169 unsubmitted_command_buffers.append(vk_command_buffer);
170 command_buffer.reset();
171 }
172
173 if (!command_buffer.has_value()) {
174 /* Check for completed command buffers that can be reused. */
175 if (command_buffers_unused.is_empty()) {
176 command_buffers_in_use.remove_old(current_timeline,
177 [&](VkCommandBuffer vk_command_buffer) {
178 command_buffers_unused.append(vk_command_buffer);
179 });
180 }
181
182 /* Create new command buffers when there are no left to be reused. */
183 if (command_buffers_unused.is_empty()) {
184 command_buffers_unused.resize(10, VK_NULL_HANDLE);
185 VkCommandBufferAllocateInfo vk_command_buffer_allocate_info = {
186 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
187 nullptr,
188 vk_command_pool,
189 VK_COMMAND_BUFFER_LEVEL_PRIMARY,
190 10};
191 vkAllocateCommandBuffers(
192 device->vk_device_, &vk_command_buffer_allocate_info, command_buffers_unused.data());
193 };
194
195 vk_command_buffer = command_buffers_unused.pop_last();
196 command_buffer = std::make_optional<render_graph::VKCommandBufferWrapper>(
197 vk_command_buffer, device->extensions_);
198 command_buffer->begin_recording();
199 }
200
201 BLI_assert(vk_command_buffer != VK_NULL_HANDLE);
202
205 {
206 std::scoped_lock lock_resources(device->resources.mutex);
207 command_builder.build_nodes(render_graph, *command_buffer, node_handles);
208 }
209 command_builder.record_commands(render_graph, *command_buffer, node_handles);
210
211 if (submit_task->submit_to_device) {
212 /* Create submit infos for previous command buffers. */
213 submit_infos.clear();
214 if (!unsubmitted_command_buffers.is_empty()) {
215 VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
216 nullptr,
217 0,
218 nullptr,
219 nullptr,
220 uint32_t(unsubmitted_command_buffers.size()),
221 unsubmitted_command_buffers.data(),
222 0,
223 nullptr};
224 submit_infos.append(vk_submit_info);
225 }
226
227 /* Finalize current command buffer. */
228 command_buffer->end_recording();
229 unsubmitted_command_buffers.append(vk_command_buffer);
230
231 uint32_t wait_semaphore_len = submit_task->wait_semaphore == VK_NULL_HANDLE ? 0 : 1;
232 uint32_t signal_semaphore_len = submit_task->signal_semaphore == VK_NULL_HANDLE ? 1 : 2;
233 VkSemaphore signal_semaphores[2] = {device->vk_timeline_semaphore_,
234 submit_task->signal_semaphore};
235 uint64_t signal_semaphore_values[2] = {submit_task->timeline, 0};
236
237 VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = {
238 VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
239 nullptr,
240 0,
241 nullptr,
242 signal_semaphore_len,
243 signal_semaphore_values};
244 VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
245 &vk_timeline_semaphore_submit_info,
246 wait_semaphore_len,
247 &submit_task->wait_semaphore,
248 &submit_task->wait_dst_stage_mask,
249 1,
250 &unsubmitted_command_buffers.last(),
251 signal_semaphore_len,
252 signal_semaphores};
253 submit_infos.append(vk_submit_info);
254
255 {
256 std::scoped_lock lock_queue(*device->queue_mutex_);
257 vkQueueSubmit(device->vk_queue_,
258 submit_infos.size(),
259 submit_infos.data(),
260 submit_task->signal_fence);
261 }
262 if (submit_task->wait_for_submission != nullptr) {
263 std::unique_lock<blender::Mutex> lock(
265 submit_task->wait_for_submission->is_submitted = true;
266 submit_task->wait_for_submission->is_submitted_condition.notify_one();
267 }
268 vk_command_buffer = VK_NULL_HANDLE;
269 for (VkCommandBuffer vk_command_buffer : unsubmitted_command_buffers) {
270 command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
271 }
272 unsubmitted_command_buffers.clear();
273 command_buffer.reset();
274 }
275
276 render_graph.reset();
277 BLI_thread_queue_push(device->unused_render_graphs_, std::move(submit_task->render_graph));
278 MEM_delete<VKRenderGraphSubmitTask>(submit_task);
279 }
280 CLOG_INFO(&LOG, 3, "submission runner is being canceled");
281
282 /* Clear command buffers and pool */
283 vkDeviceWaitIdle(device->vk_device_);
284 command_buffers_in_use.remove_old(UINT64_MAX, [&](VkCommandBuffer vk_command_buffer) {
285 command_buffers_unused.append(vk_command_buffer);
286 });
287 vkFreeCommandBuffers(device->vk_device_,
288 vk_command_pool,
289 command_buffers_unused.size(),
290 command_buffers_unused.data());
291 vkDestroyCommandPool(device->vk_device_, vk_command_pool, nullptr);
292 CLOG_INFO(&LOG, 3, "submission runner finished");
293}
294
295void VKDevice::init_submission_pool()
296{
297 CLOG_INFO(&LOG, 3, "create submission pool");
299 submitted_render_graphs_ = BLI_thread_queue_init();
300 unused_render_graphs_ = BLI_thread_queue_init();
301
302 VkSemaphoreTypeCreateInfo vk_semaphore_type_create_info = {
303 VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_TIMELINE, 0};
304 VkSemaphoreCreateInfo vk_semaphore_create_info = {
305 VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &vk_semaphore_type_create_info, 0};
306 vkCreateSemaphore(vk_device_, &vk_semaphore_create_info, nullptr, &vk_timeline_semaphore_);
307
308 BLI_task_pool_push(submission_pool_, VKDevice::submission_runner, nullptr, false, nullptr);
309}
310
311void VKDevice::deinit_submission_pool()
312{
313 CLOG_INFO(&LOG, 3, "cancelling submission pool");
314 BLI_task_pool_cancel(submission_pool_);
315 CLOG_INFO(&LOG, 3, "waiting for completion");
316 BLI_task_pool_work_and_wait(submission_pool_);
317 CLOG_INFO(&LOG, 3, "freeing submission pool");
318 BLI_task_pool_free(submission_pool_);
319 submission_pool_ = nullptr;
320
321 while (!BLI_thread_queue_is_empty(submitted_render_graphs_)) {
322 VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
323 BLI_thread_queue_pop(submitted_render_graphs_));
324 MEM_delete<VKRenderGraphSubmitTask>(submit_task);
325 }
326 BLI_thread_queue_free(submitted_render_graphs_);
327 submitted_render_graphs_ = nullptr;
328 BLI_thread_queue_free(unused_render_graphs_);
329 unused_render_graphs_ = nullptr;
330
331 vkDestroySemaphore(vk_device_, vk_timeline_semaphore_, nullptr);
332 vk_timeline_semaphore_ = VK_NULL_HANDLE;
333}
334
336
337} // namespace blender::gpu
#define BLI_assert(a)
Definition BLI_assert.h:46
@ TASK_PRIORITY_HIGH
Definition BLI_task.h:53
void * BLI_task_pool_user_data(TaskPool *pool)
Definition task_pool.cc:546
bool BLI_task_pool_current_canceled(TaskPool *pool)
Definition task_pool.cc:541
void BLI_task_pool_work_and_wait(TaskPool *pool)
Definition task_pool.cc:531
void BLI_task_pool_cancel(TaskPool *pool)
Definition task_pool.cc:536
TaskPool * BLI_task_pool_create_background_serial(void *userdata, eTaskPriority priority)
Definition task_pool.cc:512
void BLI_task_pool_free(TaskPool *pool)
Definition task_pool.cc:517
void BLI_task_pool_push(TaskPool *pool, TaskRunFunction run, void *taskdata, bool free_taskdata, TaskFreeFunction freedata)
Definition task_pool.cc:522
void BLI_thread_queue_push(ThreadQueue *queue, void *work)
Definition threads.cc:642
void * BLI_thread_queue_pop(ThreadQueue *queue)
Definition threads.cc:653
ThreadQueue * BLI_thread_queue_init(void)
Definition threads.cc:616
void BLI_thread_queue_free(ThreadQueue *queue)
Definition threads.cc:630
bool BLI_thread_queue_is_empty(ThreadQueue *queue)
Definition threads.cc:757
void * BLI_thread_queue_pop_timeout(ThreadQueue *queue, int ms)
Definition threads.cc:712
#define UNUSED_VARS(...)
#define CLOG_INFO(clg_ref, level,...)
Definition CLG_log.h:179
volatile int lock
unsigned long long int uint64_t
int64_t size() const
void append(const T &value)
const T & last(const int64_t n=0) const
bool is_empty() const
void resize(const int64_t new_size)
void reserve(const int64_t min_capacity)
void remove_old(TimelineValue current_timeline, Deleter deleter)
void append_timeline(TimelineValue timeline, Item item)
render_graph::VKResourceStateTracker resources
Definition vk_device.hh:242
TimelineValue submission_finished_timeline_get() const
Definition vk_device.hh:427
render_graph::VKRenderGraph * render_graph_new()
VKDiscardPool orphaned_data
Definition vk_device.hh:243
static void submission_runner(TaskPool *__restrict pool, void *task_data)
TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph, VKDiscardPool &context_discard_pool, bool submit_to_device, bool wait_for_completion, VkPipelineStageFlags wait_dst_stage_mask, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkFence signal_fence)
void wait_for_timeline(TimelineValue timeline)
void destroy_discarded_resources(VKDevice &device, bool force=false)
void record_commands(VKRenderGraph &render_graph, VKCommandBufferInterface &command_buffer, Span< NodeHandle > node_handles)
void build_nodes(VKRenderGraph &render_graph, VKCommandBufferInterface &command_buffer, Span< NodeHandle > node_handles)
Span< NodeHandle > select_nodes(const VKRenderGraph &render_graph)
#define UINT64_MAX
#define LOG(severity)
Definition log.h:32
uint64_t TimelineValue
Definition vk_common.hh:36
static CLG_LogRef LOG
bool assign_if_different(T &old_value, T new_value)
std::mutex Mutex
Definition BLI_mutex.hh:47
render_graph::VKRenderGraph * render_graph
std::condition_variable_any is_submitted_condition