Blender V4.5
gpu_codegen.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2005 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
10
11#include "MEM_guardedalloc.h"
12
13#include "DNA_material_types.h"
14
15#include "BLI_span.hh"
16#include "BLI_string.h"
17#include "BLI_vector.hh"
18
19#include "BKE_cryptomatte.hh"
20
22
23#include "GPU_capabilities.hh"
24#include "GPU_shader.hh"
25#include "GPU_uniform_buffer.hh"
26#include "GPU_vertex_format.hh"
27
28#include "gpu_codegen.hh"
30
31#include <cstdarg>
32#include <cstring>
33
34using namespace blender;
35using namespace blender::gpu::shader;
36
37/* -------------------------------------------------------------------- */
40
41#if 0
42# define SRC_NAME(io, link, list, type) \
43 link->node->name << "_" << io << BLI_findindex(&link->node->list, (const void *)link) << "_" \
44 << type
45#else
46# define SRC_NAME(io, list, link, type) type
47#endif
48
49static std::ostream &operator<<(std::ostream &stream, const GPUInput *input)
50{
51 switch (input->source) {
54 return stream << SRC_NAME("in", input, inputs, "tmp") << input->id;
56 return stream << SRC_NAME("in", input, inputs, "cons") << input->id;
58 return stream << "node_tree.u" << input->id;
59 case GPU_SOURCE_ATTR:
60 return stream << "var_attrs.v" << input->attr->id;
62 return stream << "UNI_ATTR(unf_attrs[resource_id].attr" << input->uniform_attr->id << ")";
64 return stream << "attr_load_layer(" << input->layer_attr->hash_code << ")";
66 return stream << "strct" << input->id;
67 case GPU_SOURCE_TEX:
68 return stream << input->texture->sampler_name;
70 return stream << input->texture->tiled_mapping_name;
71 default:
72 BLI_assert(0);
73 return stream;
74 }
75}
76
77static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
78{
79 return stream << SRC_NAME("out", output, outputs, "tmp") << output->id;
80}
81
82/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
83static std::ostream &operator<<(std::ostream &stream, const Span<float> &span)
84{
85 stream << (eGPUType)span.size() << "(";
86 /* Use uint representation to allow exact same bit pattern even if NaN. This is
87 * because we can pass UINTs as floats for constants. */
88 const Span<uint32_t> uint_span = span.cast<uint32_t>();
89 for (const uint32_t &element : uint_span) {
90 char formatted_float[32];
91 SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", element);
92 stream << formatted_float;
93 if (&element != &uint_span.last()) {
94 stream << ", ";
95 }
96 }
97 stream << ")";
98 return stream;
99}
100
101/* Trick type to change overload and keep a somewhat nice syntax. */
102struct GPUConstant : public GPUInput {};
103
104static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
105{
106 stream << Span<float>(input->vec, input->type);
107 return stream;
108}
109
110namespace blender::gpu::shader {
111/* Needed to use the << operators from nested namespaces. :(
112 * https://stackoverflow.com/questions/5195512/namespaces-and-operator-resolution */
113using ::operator<<;
114} // namespace blender::gpu::shader
115
117
118/* -------------------------------------------------------------------- */
121
123{
124 auto index = sampler_names.size();
125 sampler_names.append(std::make_unique<NameEntry>());
126 char *name_buffer = sampler_names[index]->data();
127 memcpy(name_buffer, name, 32);
128 return name_buffer;
129}
130
131GPUCodegen::GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name)
132 : mat(*mat_), graph(*graph_)
133{
136 create_info = MEM_new<GPUCodegenCreateInfo>(__func__, debug_name);
137 output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
138 static_cast<ShaderCreateInfo *>(create_info));
139}
140
142{
143 MEM_SAFE_FREE(cryptomatte_input_);
144 MEM_delete(create_info);
145 BLI_freelistN(&ubo_inputs_);
146};
147
149{
150 /* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
151 * the baseline is met. */
152 bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
153 (textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
154 return do_optimize;
155}
156
158{
159 if (BLI_listbase_is_empty(&graph.attributes)) {
160 output.attr_load.clear();
161 return;
162 }
163
165
166 info.interface_generated = MEM_new<StageInterfaceInfo>(__func__, "codegen_iface", "var_attrs");
168 info.vertex_out(iface);
169
170 /* Input declaration, loading / assignment to interface and geometry shader passthrough. */
171 std::stringstream load_ss;
172
173 int slot = GPU_shader_draw_parameters_support() ? 15 : 14;
174 LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
175 if (slot == -1) {
176 BLI_assert_msg(0, "Too many attributes");
177 break;
178 }
179 STRNCPY(info.name_buffer.attr_names[slot], attr->input_name);
180 SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id);
181
182 StringRefNull attr_name = info.name_buffer.attr_names[slot];
183 StringRefNull var_name = info.name_buffer.var_names[slot];
184
185 eGPUType input_type, iface_type;
186
187 load_ss << "var_attrs." << var_name;
188 if (attr->is_hair_length) {
189 iface_type = input_type = GPU_FLOAT;
190 load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n";
191 }
192 else {
193 switch (attr->type) {
194 case CD_ORCO:
195 /* Need vec4 to detect usage of default attribute. */
196 input_type = GPU_VEC4;
197 iface_type = GPU_VEC3;
198 load_ss << " = attr_load_orco(" << attr_name << ");\n";
199 break;
200 case CD_TANGENT:
201 iface_type = input_type = GPU_VEC4;
202 load_ss << " = attr_load_tangent(" << attr_name << ");\n";
203 break;
204 default:
205 iface_type = input_type = GPU_VEC4;
206 load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n";
207 break;
208 }
209 }
210
211 info.vertex_in(slot--, to_type(input_type), attr_name);
212 iface.smooth(to_type(iface_type), var_name);
213 }
214
215 output.attr_load = load_ss.str();
216}
217
219{
221
222 std::stringstream ss;
223
224 /* Textures. */
225 int slot = 0;
226 LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) {
227 if (tex->colorband) {
228 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
229 info.sampler(slot++, ImageType::Float1DArray, name, Frequency::BATCH);
230 }
231 else if (tex->sky) {
232 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
233 info.sampler(0, ImageType::Float2DArray, name, Frequency::BATCH);
234 }
235 else if (tex->tiled_mapping_name[0] != '\0') {
236 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
237 info.sampler(slot++, ImageType::Float2DArray, name, Frequency::BATCH);
238
239 const char *name_mapping = info.name_buffer.append_sampler_name(tex->tiled_mapping_name);
240 info.sampler(slot++, ImageType::Float1DArray, name_mapping, Frequency::BATCH);
241 }
242 else {
243 const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
244 info.sampler(slot++, ImageType::Float2D, name, Frequency::BATCH);
245 }
246 }
247
248 /* Increment heuristic. */
249 textures_total_ = slot;
250
251 if (!BLI_listbase_is_empty(&ubo_inputs_)) {
252 /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
253 ss << "struct NodeTree {\n";
254 LISTBASE_FOREACH (LinkData *, link, &ubo_inputs_) {
255 GPUInput *input = (GPUInput *)(link->data);
256 if (input->source == GPU_SOURCE_CRYPTOMATTE) {
257 ss << input->type << " crypto_hash;\n";
258 }
259 else {
260 ss << input->type << " u" << input->id << ";\n";
261 }
262 }
263 ss << "};\n\n";
264
266 }
267
268 if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) {
269 ss << "struct UniformAttrs {\n";
270 LISTBASE_FOREACH (GPUUniformAttr *, attr, &graph.uniform_attrs.list) {
271 ss << "vec4 attr" << attr->id << ";\n";
272 }
273 ss << "};\n\n";
274
275 /* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */
276 /* DRW_RESOURCE_CHUNK_LEN = 512 */
277 info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
278 }
279
280 if (!BLI_listbase_is_empty(&graph.layer_attrs)) {
281 info.additional_info("draw_layer_attributes");
282 }
283
284 info.typedef_source_generated = ss.str();
285}
286
288{
290
291 void *value;
292 Vector<std::string> source_files;
293
294 /* Iterate over libraries. We need to keep this struct intact in case it is required for the
295 * optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
296 * are unordered this can cause order differences between invocations, so we collect the keys
297 * first, and sort them before doing actual work, to guarantee stable behavior while still
298 * having cheap insertions into the GSET */
299 GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
300 while (!BLI_ghashIterator_done(ihash)) {
301 value = BLI_ghashIterator_getKey(ihash);
302 source_files.append((const char *)value);
304 }
306
307 std::sort(source_files.begin(), source_files.end());
308 for (auto &key : source_files) {
309 auto deps = gpu_shader_dependency_get_resolved_source(key.c_str());
310 info.dependencies_generated.extend_non_duplicates(deps);
311 }
312}
313
314void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
315{
316 /* Declare constants. */
318 switch (input->source) {
320 eval_ss << input->type << " " << input << "; " << input->function_call << input << ");\n";
321 break;
323 eval_ss << input->type << " " << input << " = CLOSURE_DEFAULT;\n";
324 break;
326 eval_ss << input->type << " " << input << " = " << (GPUConstant *)input << ";\n";
327 break;
328 default:
329 break;
330 }
331 }
332 /* Declare temporary variables for node output storage. */
334 eval_ss << output->type << " " << output << ";\n";
335 }
336
337 /* Function call. */
338 eval_ss << node->name << "(";
339 /* Input arguments. */
341 switch (input->source) {
343 case GPU_SOURCE_ATTR: {
344 /* These inputs can have non matching types. Do conversion. */
345 eGPUType to = input->type;
346 eGPUType from = (input->source == GPU_SOURCE_ATTR) ? input->attr->gputype :
347 input->link->output->type;
348 if (from != to) {
349 /* Use defines declared inside codegen_lib (i.e: vec4_from_float). */
350 eval_ss << to << "_from_" << from << "(";
351 }
352
353 if (input->source == GPU_SOURCE_ATTR) {
354 eval_ss << input;
355 }
356 else {
357 eval_ss << input->link->output;
358 }
359
360 if (from != to) {
361 /* Special case that needs luminance coefficients as argument. */
362 if (from == GPU_VEC4 && to == GPU_FLOAT) {
363 float coefficients[3];
365 eval_ss << ", " << Span<float>(coefficients, 3);
366 }
367
368 eval_ss << ")";
369 }
370 break;
371 }
372 default:
373 eval_ss << input;
374 break;
375 }
376 eval_ss << ", ";
377 }
378 /* Output arguments. */
379 LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
380 eval_ss << output;
381 if (output->next) {
382 eval_ss << ", ";
383 }
384 }
385 eval_ss << ");\n\n";
386
387 /* Increment heuristic. */
388 nodes_total_++;
389}
390
391std::string GPUCodegen::graph_serialize(eGPUNodeTag tree_tag,
392 GPUNodeLink *output_link,
393 const char *output_default)
394{
395 if (output_link == nullptr && output_default == nullptr) {
396 return "";
397 }
398
399 std::stringstream eval_ss;
400 bool has_nodes = false;
401 /* NOTE: The node order is already top to bottom (or left to right in node editor)
402 * because of the evaluation order inside ntreeExecGPUNodes(). */
403 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
404 if ((node->tag & tree_tag) == 0) {
405 continue;
406 }
407 node_serialize(eval_ss, node);
408 has_nodes = true;
409 }
410
411 if (!has_nodes) {
412 return "";
413 }
414
415 if (output_link) {
416 eval_ss << "return " << output_link->output << ";\n";
417 }
418 else {
419 /* Default output in case there are only AOVs. */
420 eval_ss << "return " << output_default << ";\n";
421 }
422
423 std::string str = eval_ss.str();
424 BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
425 return str;
426}
427
428std::string GPUCodegen::graph_serialize(eGPUNodeTag tree_tag)
429{
430 std::stringstream eval_ss;
431 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
432 if (node->tag & tree_tag) {
433 node_serialize(eval_ss, node);
434 }
435 }
436 std::string str = eval_ss.str();
437 BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
438 return str;
439}
440
442{
443 cryptomatte_input_ = MEM_callocN<GPUInput>(__func__);
444 cryptomatte_input_->type = GPU_FLOAT;
445 cryptomatte_input_->source = GPU_SOURCE_CRYPTOMATTE;
446
447 float material_hash = 0.0f;
449 if (material) {
451 BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
452 material_hash = hash.float_encoded();
453 }
454 cryptomatte_input_->vec[0] = material_hash;
455
456 BLI_addtail(&ubo_inputs_, BLI_genericNodeN(cryptomatte_input_));
457}
458
460{
461 /* Extract uniform inputs. */
462 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
464 if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
465 /* We handle the UBO uniforms separately. */
466 BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
467 uniforms_total_++;
468 }
469 }
470 }
471 if (!BLI_listbase_is_empty(&ubo_inputs_)) {
472 /* This sorts the inputs based on size. */
474 }
475}
476
477/* Sets id for unique names for all inputs, resources and temp variables. */
478void GPUCodegen::set_unique_ids()
479{
480 int id = 1;
481 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
483 input->id = id++;
484 }
486 output->id = id++;
487 }
488 }
489}
490
492{
493 set_unique_ids();
494
495 output.surface = graph_serialize(
496 GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface, "CLOSURE_DEFAULT");
497 output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume, "CLOSURE_DEFAULT");
498 output.displacement = graph_serialize(
499 GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement, nullptr);
500 output.thickness = graph_serialize(GPU_NODE_TAG_THICKNESS, graph.outlink_thickness, nullptr);
501 if (!BLI_listbase_is_empty(&graph.outlink_compositor)) {
502 output.composite = graph_serialize(GPU_NODE_TAG_COMPOSITOR);
503 }
504
505 if (!BLI_listbase_is_empty(&graph.material_functions)) {
506 std::stringstream eval_ss;
507 eval_ss << "\n/* Generated Functions */\n\n";
508 LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) {
509 /* Untag every node in the graph to avoid serializing nodes from other functions */
510 LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
512 }
513 /* Tag only the nodes needed for the current function */
514 gpu_nodes_tag(func_link->outlink, GPU_NODE_TAG_FUNCTION);
515 const std::string fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
516 eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n";
517 }
518 output.material_functions = eval_ss.str();
519 /* Leave the function tags as they were before serialization */
520 LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) {
521 gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION);
522 }
523 }
524
525 LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
526 BLI_hash_mm2a_add(&hm2a_, (uchar *)attr->name, strlen(attr->name));
527 }
528
529 hash_ = BLI_hash_mm2a_end(&hm2a_);
530}
531
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
BLI_INLINE void * BLI_ghashIterator_getKey(GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT
Definition BLI_ghash.h:295
void BLI_ghashIterator_step(GHashIterator *ghi)
Definition BLI_ghash.cc:911
void BLI_ghashIterator_free(GHashIterator *ghi)
Definition BLI_ghash.cc:925
GHashIterator * BLI_ghashIterator_new(GHash *gh) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition BLI_ghash.cc:888
BLI_INLINE bool BLI_ghashIterator_done(const GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT
Definition BLI_ghash.h:307
void BLI_hash_mm2a_init(BLI_HashMurmur2A *mm2, uint32_t seed)
Definition hash_mm2a.cc:62
void BLI_hash_mm2a_add(BLI_HashMurmur2A *mm2, const unsigned char *data, size_t len)
Definition hash_mm2a.cc:70
void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data)
Definition hash_mm2a.cc:85
uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2)
Definition hash_mm2a.cc:90
LinkData * BLI_genericNodeN(void *data)
Definition listbase.cc:922
#define LISTBASE_FOREACH(type, var, list)
BLI_INLINE bool BLI_listbase_is_empty(const ListBase *lb)
void void BLI_freelistN(ListBase *listbase) ATTR_NONNULL(1)
Definition listbase.cc:497
void BLI_addtail(ListBase *listbase, void *vlink) ATTR_NONNULL(1)
Definition listbase.cc:111
#define SNPRINTF(dst, format,...)
Definition BLI_string.h:599
int char char int int int int size_t BLI_strnlen(const char *str, size_t maxlen) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
Definition string.cc:923
char * STRNCPY(char(&dst)[N], const char *src)
Definition BLI_string.h:688
unsigned char uchar
bool GPU_shader_draw_parameters_support()
uint64_t GPU_material_uuid_get(GPUMaterial *mat)
Material * GPU_material_get_material(GPUMaterial *material)
eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
eGPUType
@ GPU_VEC4
@ GPU_VEC3
@ GPU_FLOAT
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
#define GPU_UBO_BLOCK_NAME
static constexpr int GPU_NODE_TREE_UBO_SLOT
#define GPU_ATTRIBUTE_UBO_BLOCK_NAME
BLI_INLINE void IMB_colormanagement_get_luminance_coefficients(float r_rgb[3])
Read Guarded memory(de)allocation.
ATTR_WARN_UNUSED_RESULT const void * element
Span< NewT > constexpr cast() const
Definition BLI_span.hh:418
constexpr int64_t size() const
Definition BLI_span.hh:252
constexpr const T & last(const int64_t n=0) const
Definition BLI_span.hh:325
void append(const T &value)
GPUCodegenCreateInfo * create_info
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name)
#define str(s)
#define SRC_NAME(io, list, link, type)
#define input
#define output
void gpu_nodes_tag(GPUNodeLink *link, eGPUNodeTag tag)
eGPUNodeTag
@ GPU_NODE_TAG_SURFACE
@ GPU_NODE_TAG_DISPLACEMENT
@ GPU_NODE_TAG_VOLUME
@ GPU_NODE_TAG_FUNCTION
@ GPU_NODE_TAG_COMPOSITOR
@ GPU_NODE_TAG_THICKNESS
@ GPU_NODE_TAG_AOV
@ GPU_SOURCE_CONSTANT
@ GPU_SOURCE_FUNCTION_CALL
@ GPU_SOURCE_ATTR
@ GPU_SOURCE_CRYPTOMATTE
@ GPU_SOURCE_UNIFORM
@ GPU_SOURCE_OUTPUT
@ GPU_SOURCE_TEX_TILED_MAPPING
@ GPU_SOURCE_UNIFORM_ATTR
@ GPU_SOURCE_LAYER_ATTR
@ GPU_SOURCE_STRUCT
@ GPU_SOURCE_TEX
#define MEM_SAFE_FREE(v)
#define MAX_NAME
void * MEM_callocN(size_t len, const char *str)
Definition mallocn.cc:118
static Type to_type(const eGPUType type)
Vector< StringRefNull > gpu_shader_dependency_get_resolved_source(const StringRefNull shader_source_name)
std::ostream & operator<<(std::ostream &stream, const eAlpha &space)
Definition BLI_color.cc:15
static blender::bke::bNodeSocketTemplate outputs[]
static blender::bke::bNodeSocketTemplate inputs[]
#define hash
Definition noise_c.cc:154
eGPUNodeTag tag
ListBase outputs
ListBase inputs
const char * name
char name[66]
Definition DNA_ID.h:415
char attr_names[16][GPU_MAX_SAFE_ATTR_NAME+1]
const char * append_sampler_name(const char name[32])
Vector< std::unique_ptr< NameEntry >, 16 > sampler_names
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Self & vertex_in(int slot, Type type, StringRefNull name)
Self & additional_info(StringRefNull info_name)
Self & vertex_out(StageInterfaceInfo &interface)
Self & sampler(int slot, ImageType type, StringRefNull name, Frequency freq=Frequency::PASS, GPUSamplerState sampler=GPUSamplerState::internal_sampler())
Self & uniform_buf(int slot, StringRefNull type_name, StringRefNull name, Frequency freq=Frequency::PASS)
Self & smooth(Type type, StringRefNull _name)