Blender V4.5
gpu_pass.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
10
11#include "MEM_guardedalloc.h"
12
13#include "BLI_map.hh"
14#include "BLI_span.hh"
15#include "BLI_time.h"
16#include "BLI_vector.hh"
17
18#include "GPU_capabilities.hh"
19#include "GPU_context.hh"
20#include "GPU_pass.hh"
21#include "GPU_vertex_format.hh"
22#include "gpu_codegen.hh"
23
24#include <mutex>
25#include <string>
26
27using namespace blender;
28using namespace blender::gpu::shader;
29
30static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info);
31
32/* -------------------------------------------------------------------- */
35
36struct GPUPass {
37 static inline std::atomic<uint64_t> compilation_counts = 0;
38
41 std::atomic<GPUShader *> shader = nullptr;
42 std::atomic<eGPUPassStatus> status = GPU_PASS_QUEUED;
43 /* Orphaned GPUPasses gets freed by the garbage collector. */
44 std::atomic<int> refcount = 1;
45 /* The last time the refcount was greater than 0. */
46 double gc_timestamp = 0.0f;
47
49
52 bool should_optimize = false;
54
56 bool deferred_compilation,
58 bool should_optimize)
59 : create_info(info),
62 {
64 if (is_optimization_pass && deferred_compilation) {
65 // Defer until all non optimization passes are compiled.
66 return;
67 }
68
69 GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
70
71 if (deferred_compilation) {
74 }
75 else {
78 }
79 }
80
92
97
99 {
100 BLI_assert_msg(create_info, "GPUPass::finalize_compilation() called more than once.");
101
102 if (compilation_handle) {
104 }
105
107
109 fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
110 }
111
113
114 MEM_delete(create_info);
115 create_info = nullptr;
116 }
117
118 void update(double timestamp)
119 {
121 update_gc_timestamp(timestamp);
122 }
123
125 {
126 if (compilation_handle) {
129 }
130 }
131 else if (status == GPU_PASS_QUEUED && refcount > 0) {
133 GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
136 }
137 }
138
139 void update_gc_timestamp(double timestamp)
140 {
141 if (refcount != 0 || gc_timestamp == 0.0f) {
142 gc_timestamp = timestamp;
143 }
144 }
145
146 bool should_gc(int gc_collect_rate, double timestamp)
147 {
148 BLI_assert(gc_timestamp != 0.0f);
150 (timestamp - gc_timestamp) >= gc_collect_rate;
151 }
152};
153
155{
156 return pass->status;
157}
158
160{
161 /* Returns optimization heuristic prepared during
162 * initial codegen.
163 * NOTE: Only enabled on Metal, since it doesn't seem to yield any performance improvements for
164 * other backends. */
166
167#if 0
168 /* Returns optimization heuristic prepared during initial codegen. */
169 return pass->should_optimize;
170#endif
171}
172
174{
175 return pass->shader;
176}
177
179{
180 int previous_refcount = pass->refcount++;
181 UNUSED_VARS_NDEBUG(previous_refcount);
182 BLI_assert(previous_refcount > 0);
183}
184
186{
187 int previous_refcount = pass->refcount--;
188 UNUSED_VARS_NDEBUG(previous_refcount);
189 BLI_assert(previous_refcount > 0);
190}
191
196
201
203
204/* -------------------------------------------------------------------- */
211
213
214 /* Number of seconds with 0 users required before garbage collecting a pass.*/
215 static constexpr float gc_collect_rate_ = 60.0f;
216 /* Number of seconds without base compilations required before starting to compile optimization
217 * passes.*/
218 static constexpr float optimization_delay_ = 10.0f;
219
220 double last_base_compilation_timestamp_ = -1.0;
221
222 Map<uint32_t, std::unique_ptr<GPUPass>> passes_[GPU_MAT_ENGINE_MAX][2 /*is_optimization_pass*/];
223 std::mutex mutex_;
224
225 public:
227 GPUCodegen &codegen,
228 bool deferred_compilation,
229 bool is_optimization_pass)
230 {
231 std::lock_guard lock(mutex_);
232
233 passes_[engine][is_optimization_pass].add(
234 codegen.hash_get(),
235 std::make_unique<GPUPass>(codegen.create_info,
236 deferred_compilation,
237 is_optimization_pass,
238 codegen.should_optimize_heuristic()));
239 };
240
242 size_t hash,
243 bool allow_deferred,
244 bool is_optimization_pass)
245 {
246 std::lock_guard lock(mutex_);
247 std::unique_ptr<GPUPass> *pass = passes_[engine][is_optimization_pass].lookup_ptr(hash);
248 if (!allow_deferred && pass && pass->get()->status == GPU_PASS_QUEUED) {
249 pass->get()->finalize_compilation();
250 }
251 return pass ? pass->get() : nullptr;
252 }
253
254 void update()
255 {
256 std::lock_guard lock(mutex_);
257
258 double timestamp = BLI_time_now_seconds();
259
260 bool base_passes_ready = true;
261
262 /* Base Passes. */
263 for (auto &engine_passes : passes_) {
264 for (std::unique_ptr<GPUPass> &pass : engine_passes[false].values()) {
265 pass->update(timestamp);
266 if (pass->status == GPU_PASS_QUEUED) {
267 base_passes_ready = false;
268 }
269 }
270
271 engine_passes[false].remove_if(
272 [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
273 }
274
275 /* Optimization Passes GC. */
276 for (auto &engine_passes : passes_) {
277 for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
278 pass->update_gc_timestamp(timestamp);
279 }
280
281 engine_passes[true].remove_if(
282 /* TODO: Use lower rate for optimization passes? */
283 [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
284 }
285
286 if (!base_passes_ready) {
287 last_base_compilation_timestamp_ = timestamp;
288 return;
289 }
290
291 if ((timestamp - last_base_compilation_timestamp_) < optimization_delay_) {
292 return;
293 }
294
295 /* Optimization Passes Compilation. */
296 for (auto &engine_passes : passes_) {
297 for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
298 pass->update_compilation();
299 }
300 }
301 }
302
303 std::mutex &get_mutex()
304 {
305 return mutex_;
306 }
307};
308
309static GPUPassCache *g_cache = nullptr;
310
312{
313 if (pass->status == GPU_PASS_QUEUED) {
314 std::lock_guard lock(g_cache->get_mutex());
315 if (pass->status == GPU_PASS_QUEUED) {
316 pass->finalize_compilation();
317 }
318 }
319}
320
322{
323 g_cache = MEM_new<GPUPassCache>(__func__);
324}
325
327{
328 g_cache->update();
329}
330
336
338{
339 MEM_SAFE_DELETE(g_cache);
340}
341
343
344/* -------------------------------------------------------------------- */
347
349{
350 int samplers_len = 0;
351 for (const ShaderCreateInfo::Resource &res : create_info->resources_get_all_()) {
353 samplers_len++;
354 }
355 }
356
357 /* Validate against GPU limit. */
358 if ((samplers_len > GPU_max_textures_frag()) || (samplers_len > GPU_max_textures_vert())) {
359 return false;
360 }
361
362 return (samplers_len * 2 <= GPU_max_textures());
363}
364
366 GPUNodeGraph *graph,
367 const char *debug_name,
368 eGPUMaterialEngine engine,
369 bool deferred_compilation,
370 GPUCodegenCallbackFn finalize_source_cb,
371 void *thunk,
372 bool optimize_graph)
373{
375
376 /* If Optimize flag is passed in, we are generating an optimized
377 * variant of the GPUMaterial's GPUPass. */
378 if (optimize_graph) {
380 }
381
382 /* Extract attributes before compiling so the generated VBOs are ready to accept the future
383 * shader. */
385
386 GPUCodegen codegen(material, graph, debug_name);
387 codegen.generate_graphs();
388 codegen.generate_cryptomatte();
389
390 GPUPass *pass = nullptr;
391
392 if (!optimize_graph) {
393 /* The optimized version of the shader should not re-generate a UBO.
394 * The UBO will not be used for this variant. */
395 codegen.generate_uniform_buffer();
396 }
397
398 /* Cache lookup: Reuse shaders already compiled. */
399 pass = g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
400
401 if (pass) {
402 pass->refcount++;
403 return pass;
404 }
405
406 /* The shader is not compiled, continue generating the shader strings. */
407 codegen.generate_attribs();
408 codegen.generate_resources();
409 codegen.generate_library();
410
411 /* Make engine add its own code and implement the generated functions. */
412 finalize_source_cb(thunk, material, &codegen.output);
413
414 codegen.create_info->finalize();
415 g_cache->add(engine, codegen, deferred_compilation, optimize_graph);
416 codegen.create_info = nullptr;
417
418 return g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
419}
420
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
Platform independent time functions.
double BLI_time_now_seconds(void)
Definition time.cc:65
#define UNUSED_VARS_NDEBUG(...)
int GPU_max_textures()
int GPU_max_textures_frag()
int GPU_max_textures_vert()
eGPUBackendType GPU_backend_get_type()
void(*)(void *thunk, GPUMaterial *mat, struct GPUCodegenOutput *codegen) GPUCodegenCallbackFn
eGPUMaterialEngine
@ GPU_MAT_ENGINE_MAX
eGPUPassStatus
Definition GPU_pass.hh:20
@ GPU_PASS_FAILED
Definition GPU_pass.hh:21
@ GPU_PASS_QUEUED
Definition GPU_pass.hh:22
@ GPU_PASS_SUCCESS
Definition GPU_pass.hh:23
#define GPU_SHADER_FREE_SAFE(shader)
blender::Vector< GPUShader * > GPU_shader_batch_finalize(BatchHandle &handle)
void GPU_shader_batch_wait_for_all()
int64_t BatchHandle
Definition GPU_shader.hh:83
bool GPU_shader_batch_is_ready(BatchHandle handle)
CompilationPriority
Definition GPU_shader.hh:81
GPUShader * GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
void GPU_shader_batch_cancel(BatchHandle &handle)
BatchHandle GPU_shader_batch_create_from_infos(blender::Span< const GPUShaderCreateInfo * > infos, CompilationPriority priority=CompilationPriority::High)
Read Guarded memory(de)allocation.
volatile int lock
unsigned long long int uint64_t
void update()
Definition gpu_pass.cc:254
std::mutex & get_mutex()
Definition gpu_pass.cc:303
GPUPass * get(eGPUMaterialEngine engine, size_t hash, bool allow_deferred, bool is_optimization_pass)
Definition gpu_pass.cc:241
void add(eGPUMaterialEngine engine, GPUCodegen &codegen, bool deferred_compilation, bool is_optimization_pass)
Definition gpu_pass.cc:226
GPUCodegenCreateInfo * create_info
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph)
void gpu_node_graph_optimize(GPUNodeGraph *graph)
void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
uint64_t GPU_pass_global_compilation_count()
Definition gpu_pass.cc:192
static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info)
Definition gpu_pass.cc:348
eGPUPassStatus GPU_pass_status(GPUPass *pass)
Definition gpu_pass.cc:154
void GPU_pass_ensure_its_ready(GPUPass *pass)
Definition gpu_pass.cc:311
static GPUPassCache * g_cache
Definition gpu_pass.cc:309
void GPU_pass_cache_wait_for_all()
Definition gpu_pass.cc:331
void GPU_pass_cache_init()
Definition gpu_pass.cc:321
void GPU_pass_release(GPUPass *pass)
Definition gpu_pass.cc:185
void GPU_pass_acquire(GPUPass *pass)
Definition gpu_pass.cc:178
void GPU_pass_cache_update()
Definition gpu_pass.cc:326
GPUPass * GPU_generate_pass(GPUMaterial *material, GPUNodeGraph *graph, const char *debug_name, eGPUMaterialEngine engine, bool deferred_compilation, GPUCodegenCallbackFn finalize_source_cb, void *thunk, bool optimize_graph)
Definition gpu_pass.cc:365
uint64_t GPU_pass_compilation_timestamp(GPUPass *pass)
Definition gpu_pass.cc:197
bool GPU_pass_should_optimize(GPUPass *pass)
Definition gpu_pass.cc:159
void GPU_pass_cache_free()
Definition gpu_pass.cc:337
GPUShader * GPU_pass_shader_get(GPUPass *pass)
Definition gpu_pass.cc:173
#define hash
Definition noise_c.cc:154
std::atomic< GPUShader * > shader
Definition gpu_pass.cc:41
GPUPass(GPUCodegenCreateInfo *info, bool deferred_compilation, bool is_optimization_pass, bool should_optimize)
Definition gpu_pass.cc:55
void finalize_compilation()
Definition gpu_pass.cc:98
bool is_optimization_pass
Definition gpu_pass.cc:53
void update_gc_timestamp(double timestamp)
Definition gpu_pass.cc:139
~GPUPass()
Definition gpu_pass.cc:81
static std::atomic< uint64_t > compilation_counts
Definition gpu_pass.cc:37
BatchHandle compilation_handle
Definition gpu_pass.cc:40
void update(double timestamp)
Definition gpu_pass.cc:118
double gc_timestamp
Definition gpu_pass.cc:46
std::atomic< eGPUPassStatus > status
Definition gpu_pass.cc:42
bool should_optimize
Definition gpu_pass.cc:52
bool should_gc(int gc_collect_rate, double timestamp)
Definition gpu_pass.cc:146
void update_compilation()
Definition gpu_pass.cc:124
std::atomic< int > refcount
Definition gpu_pass.cc:44
GPUCodegenCreateInfo * create_info
Definition gpu_pass.cc:39
uint64_t compilation_timestamp
Definition gpu_pass.cc:48
CompilationPriority compilation_priority()
Definition gpu_pass.cc:93
void finalize(const bool recursive=false)