Blender V4.5
gl_shader.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2020 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <iomanip>
10
11#include "BKE_appdir.hh"
12#include "BKE_global.hh"
13
14#include "BLI_fileops.h"
15#include "BLI_path_utils.hh"
16#include "BLI_string.h"
17#include "BLI_time.h"
18#include "BLI_vector.hh"
19
20#include "BLI_system.h"
21#include BLI_SYSTEM_PID_H
22
23#include "GPU_capabilities.hh"
24#include "GPU_debug.hh"
25#include "GPU_platform.hh"
28
29#include "gl_debug.hh"
30#include "gl_vertex_buffer.hh"
31
33#include "gl_shader.hh"
35
36#include <sstream>
37#include <stdio.h>
38#ifdef WIN32
39# define popen _popen
40# define pclose _pclose
41#endif
42
43using namespace blender;
44using namespace blender::gpu;
45using namespace blender::gpu::shader;
46
48
49/* -------------------------------------------------------------------- */
52
54{
55#if 0 /* Would be nice to have, but for now the Deferred compilation \
56 * does not have a GPUContext. */
57 BLI_assert(GLContext::get() != nullptr);
58#endif
59}
60
62{
63#if 0 /* Would be nice to have, but for now the Deferred compilation \
64 * does not have a GPUContext. */
65 BLI_assert(GLContext::get() != nullptr);
66#endif
67}
68
69void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
70{
71 async_compilation_ = is_batch_compilation;
72
73 /* Extract the constants names from info and store them locally. */
74 for (const SpecializationConstant &constant : info.specialization_constants_) {
75 specialization_constant_names_.append(constant.name.c_str());
76 }
77
78 /* NOTE: This is not threadsafe with regards to the specialization constants state access.
79 * The shader creation must be externally synchronized. */
80 main_program_ = program_cache_
81 .lookup_or_add_cb(constants->values,
82 []() { return std::make_unique<GLProgram>(); })
83 .get();
84 if (!main_program_->program_id) {
85 main_program_->program_id = glCreateProgram();
86 debug::object_label(GL_PROGRAM, main_program_->program_id, name);
87 }
88}
89
91{
92 main_program_ = program_cache_
93 .lookup_or_add_cb(constants->values,
94 []() { return std::make_unique<GLProgram>(); })
95 .get();
96 if (!main_program_->program_id) {
97 main_program_->program_id = glCreateProgram();
98 debug::object_label(GL_PROGRAM, main_program_->program_id, name);
99 }
100}
101
103
104/* -------------------------------------------------------------------- */
107
108static const char *to_string(const Interpolation &interp)
109{
110 switch (interp) {
112 return "smooth";
114 return "flat";
116 return "noperspective";
117 default:
118 return "unknown";
119 }
120}
121
122static const char *to_string(const Type &type)
123{
124 switch (type) {
125 case Type::float_t:
126 return "float";
127 case Type::float2_t:
128 return "vec2";
129 case Type::float3_t:
130 return "vec3";
131 case Type::float4_t:
132 return "vec4";
133 case Type::float3x3_t:
134 return "mat3";
135 case Type::float4x4_t:
136 return "mat4";
137 case Type::uint_t:
138 return "uint";
139 case Type::uint2_t:
140 return "uvec2";
141 case Type::uint3_t:
142 return "uvec3";
143 case Type::uint4_t:
144 return "uvec4";
145 case Type::int_t:
146 return "int";
147 case Type::int2_t:
148 return "ivec2";
149 case Type::int3_t:
150 return "ivec3";
151 case Type::int4_t:
152 return "ivec4";
153 case Type::bool_t:
154 return "bool";
155 /* Alias special types. */
156 case Type::uchar_t:
157 case Type::ushort_t:
158 return "uint";
159 case Type::uchar2_t:
160 case Type::ushort2_t:
161 return "uvec2";
162 case Type::uchar3_t:
163 case Type::ushort3_t:
164 return "uvec3";
165 case Type::uchar4_t:
166 case Type::ushort4_t:
167 return "uvec4";
168 case Type::char_t:
169 case Type::short_t:
170 return "int";
171 case Type::char2_t:
172 case Type::short2_t:
173 return "ivec2";
174 case Type::char3_t:
175 case Type::short3_t:
176 return "ivec3";
177 case Type::char4_t:
178 case Type::short4_t:
179 return "ivec4";
181 return "vec3";
182 }
184 return "unknown";
185}
186
188{
189 switch (type) {
190 case Type::float_t:
191 case Type::float2_t:
192 case Type::float3_t:
193 case Type::float4_t:
194 case Type::float3x3_t:
195 case Type::float4x4_t:
196 return Type::float_t;
197 case Type::uint_t:
198 case Type::uint2_t:
199 case Type::uint3_t:
200 case Type::uint4_t:
201 return Type::uint_t;
202 case Type::int_t:
203 case Type::int2_t:
204 case Type::int3_t:
205 case Type::int4_t:
206 case Type::bool_t:
207 return Type::int_t;
208 /* Alias special types. */
209 case Type::uchar_t:
210 case Type::uchar2_t:
211 case Type::uchar3_t:
212 case Type::uchar4_t:
213 case Type::ushort_t:
214 case Type::ushort2_t:
215 case Type::ushort3_t:
216 case Type::ushort4_t:
217 return Type::uint_t;
218 case Type::char_t:
219 case Type::char2_t:
220 case Type::char3_t:
221 case Type::char4_t:
222 case Type::short_t:
223 case Type::short2_t:
224 case Type::short3_t:
225 case Type::short4_t:
226 return Type::int_t;
228 return Type::float_t;
229 }
231 return Type::float_t;
232}
233
234static const char *to_string(const eGPUTextureFormat &type)
235{
236 switch (type) {
237 case GPU_RGBA8UI:
238 return "rgba8ui";
239 case GPU_RGBA8I:
240 return "rgba8i";
241 case GPU_RGBA8:
242 return "rgba8";
243 case GPU_RGBA32UI:
244 return "rgba32ui";
245 case GPU_RGBA32I:
246 return "rgba32i";
247 case GPU_RGBA32F:
248 return "rgba32f";
249 case GPU_RGBA16UI:
250 return "rgba16ui";
251 case GPU_RGBA16I:
252 return "rgba16i";
253 case GPU_RGBA16F:
254 return "rgba16f";
255 case GPU_RGBA16:
256 return "rgba16";
257 case GPU_RG8UI:
258 return "rg8ui";
259 case GPU_RG8I:
260 return "rg8i";
261 case GPU_RG8:
262 return "rg8";
263 case GPU_RG32UI:
264 return "rg32ui";
265 case GPU_RG32I:
266 return "rg32i";
267 case GPU_RG32F:
268 return "rg32f";
269 case GPU_RG16UI:
270 return "rg16ui";
271 case GPU_RG16I:
272 return "rg16i";
273 case GPU_RG16F:
274 return "rg16f";
275 case GPU_RG16:
276 return "rg16";
277 case GPU_R8UI:
278 return "r8ui";
279 case GPU_R8I:
280 return "r8i";
281 case GPU_R8:
282 return "r8";
283 case GPU_R32UI:
284 return "r32ui";
285 case GPU_R32I:
286 return "r32i";
287 case GPU_R32F:
288 return "r32f";
289 case GPU_R16UI:
290 return "r16ui";
291 case GPU_R16I:
292 return "r16i";
293 case GPU_R16F:
294 return "r16f";
295 case GPU_R16:
296 return "r16";
298 return "r11f_g11f_b10f";
299 case GPU_RGB10_A2:
300 return "rgb10_a2";
301 default:
302 return "unknown";
303 }
304}
305
306static const char *to_string(const PrimitiveIn &layout)
307{
308 switch (layout) {
310 return "points";
312 return "lines";
314 return "lines_adjacency";
316 return "triangles";
318 return "triangles_adjacency";
319 default:
320 return "unknown";
321 }
322}
323
324static const char *to_string(const PrimitiveOut &layout)
325{
326 switch (layout) {
328 return "points";
330 return "line_strip";
332 return "triangle_strip";
333 default:
334 return "unknown";
335 }
336}
337
338static const char *to_string(const DepthWrite &value)
339{
340 switch (value) {
341 case DepthWrite::ANY:
342 return "depth_any";
344 return "depth_greater";
345 case DepthWrite::LESS:
346 return "depth_less";
347 default:
348 return "depth_unchanged";
349 }
350}
351
352static void print_image_type(std::ostream &os,
353 const ImageType &type,
355{
356 switch (type) {
357 case ImageType::IntBuffer:
358 case ImageType::Int1D:
359 case ImageType::Int1DArray:
360 case ImageType::Int2D:
361 case ImageType::Int2DArray:
362 case ImageType::Int3D:
363 case ImageType::IntCube:
364 case ImageType::IntCubeArray:
365 case ImageType::AtomicInt2D:
366 case ImageType::AtomicInt2DArray:
367 case ImageType::AtomicInt3D:
368 os << "i";
369 break;
370 case ImageType::UintBuffer:
371 case ImageType::Uint1D:
372 case ImageType::Uint1DArray:
373 case ImageType::Uint2D:
374 case ImageType::Uint2DArray:
375 case ImageType::Uint3D:
376 case ImageType::UintCube:
377 case ImageType::UintCubeArray:
378 case ImageType::AtomicUint2D:
379 case ImageType::AtomicUint2DArray:
380 case ImageType::AtomicUint3D:
381 os << "u";
382 break;
383 default:
384 break;
385 }
386
388 os << "image";
389 }
390 else {
391 os << "sampler";
392 }
393
394 switch (type) {
395 case ImageType::FloatBuffer:
396 case ImageType::IntBuffer:
397 case ImageType::UintBuffer:
398 os << "Buffer";
399 break;
400 case ImageType::Float1D:
401 case ImageType::Float1DArray:
402 case ImageType::Int1D:
403 case ImageType::Int1DArray:
404 case ImageType::Uint1D:
405 case ImageType::Uint1DArray:
406 os << "1D";
407 break;
408 case ImageType::Float2D:
409 case ImageType::Float2DArray:
410 case ImageType::Int2D:
411 case ImageType::Int2DArray:
412 case ImageType::AtomicInt2D:
413 case ImageType::AtomicInt2DArray:
414 case ImageType::Uint2D:
415 case ImageType::Uint2DArray:
416 case ImageType::AtomicUint2D:
417 case ImageType::AtomicUint2DArray:
418 case ImageType::Shadow2D:
419 case ImageType::Shadow2DArray:
420 case ImageType::Depth2D:
421 case ImageType::Depth2DArray:
422 os << "2D";
423 break;
424 case ImageType::Float3D:
425 case ImageType::Int3D:
426 case ImageType::Uint3D:
427 case ImageType::AtomicInt3D:
428 case ImageType::AtomicUint3D:
429 os << "3D";
430 break;
431 case ImageType::FloatCube:
432 case ImageType::FloatCubeArray:
433 case ImageType::IntCube:
434 case ImageType::IntCubeArray:
435 case ImageType::UintCube:
436 case ImageType::UintCubeArray:
437 case ImageType::ShadowCube:
438 case ImageType::ShadowCubeArray:
439 case ImageType::DepthCube:
440 case ImageType::DepthCubeArray:
441 os << "Cube";
442 break;
443 default:
444 break;
445 }
446
447 switch (type) {
448 case ImageType::Float1DArray:
449 case ImageType::Float2DArray:
450 case ImageType::FloatCubeArray:
451 case ImageType::Int1DArray:
452 case ImageType::Int2DArray:
453 case ImageType::IntCubeArray:
454 case ImageType::Uint1DArray:
455 case ImageType::Uint2DArray:
456 case ImageType::AtomicUint2DArray:
457 case ImageType::UintCubeArray:
458 case ImageType::Shadow2DArray:
459 case ImageType::ShadowCubeArray:
460 case ImageType::Depth2DArray:
461 case ImageType::DepthCubeArray:
462 os << "Array";
463 break;
464 default:
465 break;
466 }
467
468 switch (type) {
469 case ImageType::Shadow2D:
470 case ImageType::Shadow2DArray:
471 case ImageType::ShadowCube:
472 case ImageType::ShadowCubeArray:
473 os << "Shadow";
474 break;
475 default:
476 break;
477 }
478 os << " ";
479}
480
481static std::ostream &print_qualifier(std::ostream &os, const Qualifier &qualifiers)
482{
483 if (bool(qualifiers & Qualifier::no_restrict) == false) {
484 os << "restrict ";
485 }
486 if (bool(qualifiers & Qualifier::read) == false) {
487 os << "writeonly ";
488 }
489 if (bool(qualifiers & Qualifier::write) == false) {
490 os << "readonly ";
491 }
492 return os;
493}
494
495static void print_resource(std::ostream &os,
497 bool auto_resource_location)
498{
499 if (auto_resource_location && res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
500 /* Skip explicit binding location for samplers when not needed, since drivers can usually
501 * handle more sampler declarations this way (as long as they're not actually used by the
502 * shader). See #105661. */
503 }
505 os << "layout(binding = " << res.slot;
507 os << ", " << to_string(res.image.format);
508 }
510 os << ", std140";
511 }
513 os << ", std430";
514 }
515 os << ") ";
516 }
518 os << "layout(std140) ";
519 }
520
521 int64_t array_offset;
522 StringRef name_no_array;
523
524 switch (res.bind_type) {
526 os << "uniform ";
528 os << res.sampler.name << ";\n";
529 break;
531 os << "uniform ";
533 print_image_type(os, res.image.type, res.bind_type);
534 os << res.image.name << ";\n";
535 break;
537 array_offset = res.uniformbuf.name.find_first_of("[");
538 name_no_array = (array_offset == -1) ? res.uniformbuf.name :
539 StringRef(res.uniformbuf.name.c_str(), array_offset);
540 os << "uniform " << name_no_array << " { " << res.uniformbuf.type_name << " _"
541 << res.uniformbuf.name << "; };\n";
542 break;
544 array_offset = res.storagebuf.name.find_first_of("[");
545 name_no_array = (array_offset == -1) ? res.storagebuf.name :
546 StringRef(res.storagebuf.name.c_str(), array_offset);
547 print_qualifier(os, res.storagebuf.qualifiers);
548 os << "buffer ";
549 os << name_no_array << " { " << res.storagebuf.type_name << " _" << res.storagebuf.name
550 << "; };\n";
551 break;
552 }
553}
554
555static void print_resource_alias(std::ostream &os, const ShaderCreateInfo::Resource &res)
556{
557 int64_t array_offset;
558 StringRef name_no_array;
559
560 switch (res.bind_type) {
562 array_offset = res.uniformbuf.name.find_first_of("[");
563 name_no_array = (array_offset == -1) ? res.uniformbuf.name :
564 StringRef(res.uniformbuf.name.c_str(), array_offset);
565 os << "#define " << name_no_array << " (_" << name_no_array << ")\n";
566 break;
568 array_offset = res.storagebuf.name.find_first_of("[");
569 name_no_array = (array_offset == -1) ? res.storagebuf.name :
570 StringRef(res.storagebuf.name.c_str(), array_offset);
571 os << "#define " << name_no_array << " (_" << name_no_array << ")\n";
572 break;
573 default:
574 break;
575 }
576}
577
578static void print_interface(std::ostream &os,
579 const StringRefNull &prefix,
580 const StageInterfaceInfo &iface,
581 const StringRefNull &suffix = "")
582{
583 /* TODO(@fclem): Move that to interface check. */
584 // if (iface.instance_name.is_empty()) {
585 // BLI_assert_msg(0, "Interfaces require an instance name for geometry shader.");
586 // std::cout << iface.name << ": Interfaces require an instance name for geometry shader.\n";
587 // continue;
588 // }
589 os << prefix << " " << iface.name << "{" << std::endl;
590 for (const StageInterfaceInfo::InOut &inout : iface.inouts) {
591 os << " " << to_string(inout.interp) << " " << to_string(inout.type) << " " << inout.name
592 << ";\n";
593 }
594 os << "}";
595 os << (iface.instance_name.is_empty() ? "" : "\n") << iface.instance_name << suffix << ";\n";
596}
597
598std::string GLShader::resources_declare(const ShaderCreateInfo &info) const
599{
600 std::stringstream ss;
601
602 ss << "\n/* Compilation Constants (pass-through). */\n";
603 for (const CompilationConstant &sc : info.compilation_constants_) {
604 ss << "const ";
605 switch (sc.type) {
606 case Type::int_t:
607 ss << "int " << sc.name << "=" << std::to_string(sc.value.i) << ";\n";
608 break;
609 case Type::uint_t:
610 ss << "uint " << sc.name << "=" << std::to_string(sc.value.u) << "u;\n";
611 break;
612 case Type::bool_t:
613 ss << "bool " << sc.name << "=" << (sc.value.u ? "true" : "false") << ";\n";
614 break;
615 default:
617 break;
618 }
619 }
620 /* NOTE: We define macros in GLSL to trigger compilation error if the resource names
621 * are reused for local variables. This is to match other backend behavior which needs accessors
622 * macros. */
623 ss << "\n/* Pass Resources. */\n";
624 for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
626 }
627 for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
628 print_resource_alias(ss, res);
629 }
630 ss << "\n/* Batch Resources. */\n";
631 for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
633 }
634 for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
635 print_resource_alias(ss, res);
636 }
637 ss << "\n/* Geometry Resources. */\n";
638 for (const ShaderCreateInfo::Resource &res : info.geometry_resources_) {
640 }
641 for (const ShaderCreateInfo::Resource &res : info.geometry_resources_) {
642 print_resource_alias(ss, res);
643 }
644 ss << "\n/* Push Constants. */\n";
645 int location = 0;
646 for (const ShaderCreateInfo::PushConst &uniform : info.push_constants_) {
647 /* See #131227: Work around legacy Intel bug when using layout locations. */
648 if (!info.specialization_constants_.is_empty()) {
649 ss << "layout(location = " << location << ") ";
650 location += std::max(1, uniform.array_size);
651 }
652 ss << "uniform " << to_string(uniform.type) << " " << uniform.name;
653 if (uniform.array_size > 0) {
654 ss << "[" << uniform.array_size << "]";
655 }
656 ss << ";\n";
657 }
658#if 0 /* #95278: This is not be enough to prevent some compilers think it is recursive. */
659 for (const ShaderCreateInfo::PushConst &uniform : info.push_constants_) {
660 /* #95278: Double macro to avoid some compilers think it is recursive. */
661 ss << "#define " << uniform.name << "_ " << uniform.name << "\n";
662 ss << "#define " << uniform.name << " (" << uniform.name << "_)\n";
663 }
664#endif
665 ss << "\n";
666 return ss.str();
667}
668
670 const shader::SpecializationConstants &constants_state) const
671{
672 std::stringstream ss;
673
674 ss << "/* Specialization Constants. */\n";
675 for (int constant_index : IndexRange(constants_state.types.size())) {
676 const StringRefNull name = specialization_constant_names_[constant_index];
677 gpu::shader::Type constant_type = constants_state.types[constant_index];
678 const SpecializationConstant::Value &value = constants_state.values[constant_index];
679
680 switch (constant_type) {
681 case Type::int_t:
682 ss << "const int " << name << "=" << std::to_string(value.i) << ";\n";
683 break;
684 case Type::uint_t:
685 ss << "const uint " << name << "=" << std::to_string(value.u) << "u;\n";
686 break;
687 case Type::bool_t:
688 ss << "const bool " << name << "=" << (value.u ? "true" : "false") << ";\n";
689 break;
690 case Type::float_t:
691 /* Use uint representation to allow exact same bit pattern even if NaN. */
692 ss << "const float " << name << "= uintBitsToFloat(" << std::to_string(value.u) << "u);\n";
693 break;
694 default:
696 break;
697 }
698 }
699 return ss.str();
700}
701
702static std::string main_function_wrapper(std::string &pre_main, std::string &post_main)
703{
704 std::stringstream ss;
705 /* Prototype for the original main. */
706 ss << "\n";
707 ss << "void main_function_();\n";
708 /* Wrapper to the main function in order to inject code processing on globals. */
709 ss << "void main() {\n";
710 ss << pre_main;
711 ss << " main_function_();\n";
712 ss << post_main;
713 ss << "}\n";
714 /* Rename the original main. */
715 ss << "#define main main_function_\n";
716 ss << "\n";
717 return ss.str();
718}
719
721{
722 std::stringstream ss;
723 std::string post_main;
724
725 ss << "\n/* Inputs. */\n";
726 for (const ShaderCreateInfo::VertIn &attr : info.vertex_inputs_) {
728 /* Fix issue with AMDGPU-PRO + workbench_prepass_mesh_vert.glsl being quantized. */
730 {
731 ss << "layout(location = " << attr.index << ") ";
732 }
733 ss << "in " << to_string(attr.type) << " " << attr.name << ";\n";
734 }
735 ss << "\n/* Interfaces. */\n";
736 for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
737 print_interface(ss, "out", *iface);
738 }
739 const bool has_geometry_stage = do_geometry_shader_injection(&info) ||
741 const bool do_layer_output = bool(info.builtins_ & BuiltinBits::LAYER);
742 const bool do_viewport_output = bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX);
743 if (has_geometry_stage) {
744 if (do_layer_output) {
745 ss << "out int gpu_Layer;\n";
746 }
747 if (do_viewport_output) {
748 ss << "out int gpu_ViewportIndex;\n";
749 }
750 }
751 else {
752 if (do_layer_output) {
753 ss << "#define gpu_Layer gl_Layer\n";
754 }
755 if (do_viewport_output) {
756 ss << "#define gpu_ViewportIndex gl_ViewportIndex\n";
757 }
758 }
759 if (bool(info.builtins_ & BuiltinBits::CLIP_CONTROL)) {
760 if (GLContext::clip_control_support && !has_geometry_stage) {
761 /* Assume clip range is set to 0..1 and remap the range just like Vulkan and Metal.
762 * If geometry stage is needed, do that remapping inside the geometry shader stage. */
763 post_main += "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n";
764 }
765 }
766 if (bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD)) {
768 /* Disabled or unsupported. */
769 }
770 else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) {
771 /* Need this for stable barycentric. */
772 ss << "flat out vec4 gpu_pos_flat;\n";
773 ss << "out vec4 gpu_pos;\n";
774
775 post_main += " gpu_pos = gpu_pos_flat = gl_Position;\n";
776 }
777 }
778 ss << "\n";
779
780 if (post_main.empty() == false) {
781 std::string pre_main;
782 ss << main_function_wrapper(pre_main, post_main);
783 }
784 return ss.str();
785}
786
788{
789 std::stringstream ss;
790 std::string pre_main, post_main;
791
792 ss << "\n/* Interfaces. */\n";
793 const Span<StageInterfaceInfo *> in_interfaces = info.geometry_source_.is_empty() ?
796 for (const StageInterfaceInfo *iface : in_interfaces) {
797 print_interface(ss, "in", *iface);
798 }
799 if (bool(info.builtins_ & BuiltinBits::LAYER)) {
800 ss << "#define gpu_Layer gl_Layer\n";
801 }
802 if (bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX)) {
803 ss << "#define gpu_ViewportIndex gl_ViewportIndex\n";
804 }
805 if (bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD)) {
807 ss << "flat in vec4 gpu_pos[3];\n";
808 ss << "smooth in vec3 gpu_BaryCoord;\n";
809 ss << "noperspective in vec3 gpu_BaryCoordNoPersp;\n";
810 }
811 else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) {
812 /* NOTE(fclem): This won't work with geometry shader. Hopefully, we don't need geometry
813 * shader workaround if this extension/feature is detected. */
814 ss << "\n/* Stable Barycentric Coordinates. */\n";
815 ss << "flat in vec4 gpu_pos_flat;\n";
816 ss << "__explicitInterpAMD in vec4 gpu_pos;\n";
817 /* Globals. */
818 ss << "vec3 gpu_BaryCoord;\n";
819 ss << "vec3 gpu_BaryCoordNoPersp;\n";
820 ss << "\n";
821 ss << "vec2 stable_bary_(vec2 in_bary) {\n";
822 ss << " vec3 bary = vec3(in_bary, 1.0 - in_bary.x - in_bary.y);\n";
823 ss << " if (interpolateAtVertexAMD(gpu_pos, 0) == gpu_pos_flat) { return bary.zxy; }\n";
824 ss << " if (interpolateAtVertexAMD(gpu_pos, 2) == gpu_pos_flat) { return bary.yzx; }\n";
825 ss << " return bary.xyz;\n";
826 ss << "}\n";
827 ss << "\n";
828
829 pre_main += " gpu_BaryCoord = stable_bary_(gl_BaryCoordSmoothAMD);\n";
830 pre_main += " gpu_BaryCoordNoPersp = stable_bary_(gl_BaryCoordNoPerspAMD);\n";
831 }
832 }
833 if (info.early_fragment_test_) {
834 ss << "layout(early_fragment_tests) in;\n";
835 }
836 ss << "layout(" << to_string(info.depth_write_) << ") out float gl_FragDepth;\n";
837
838 ss << "\n/* Sub-pass Inputs. */\n";
841 /* Declare as inout but do not write to it. */
842 ss << "layout(location = " << std::to_string(input.index) << ") inout "
843 << to_string(input.type) << " " << input.name << ";\n";
844 }
845 else {
846 std::string image_name = "gpu_subpass_img_";
847 image_name += std::to_string(input.index);
848
849 /* Declare global for input. */
850 ss << to_string(input.type) << " " << input.name << ";\n";
851
852 /* IMPORTANT: We assume that the frame-buffer will be layered or not based on the layer
853 * built-in flag. */
854 bool is_layered_fb = bool(info.builtins_ & BuiltinBits::LAYER);
855 bool is_layered_input = ELEM(
856 input.img_type, ImageType::Uint2DArray, ImageType::Int2DArray, ImageType::Float2DArray);
857
858 /* Declare image. */
859 using Resource = ShaderCreateInfo::Resource;
860 /* NOTE(fclem): Using the attachment index as resource index might be problematic as it might
861 * collide with other resources. */
862 Resource res(Resource::BindType::SAMPLER, input.index);
863 res.sampler.type = input.img_type;
864 res.sampler.sampler = GPUSamplerState::default_sampler();
865 res.sampler.name = image_name;
866 print_resource(ss, res, false);
867
868 char swizzle[] = "xyzw";
869 swizzle[to_component_count(input.type)] = '\0';
870
871 std::string texel_co = (is_layered_input) ?
872 ((is_layered_fb) ? "ivec3(gl_FragCoord.xy, gpu_Layer)" :
873 /* This should fetch the attached layer.
874 * But this is not simple to set. For now
875 * assume it is always the first layer. */
876 "ivec3(gl_FragCoord.xy, 0)") :
877 "ivec2(gl_FragCoord.xy)";
878
879 std::stringstream ss_pre;
880 /* Populate the global before main using imageLoad. */
881 ss_pre << " " << input.name << " = texelFetch(" << image_name << ", " << texel_co << ", 0)."
882 << swizzle << ";\n";
883
884 pre_main += ss_pre.str();
885 }
886 }
887 ss << "\n/* Outputs. */\n";
889 ss << "layout(location = " << output.index;
890 switch (output.blend) {
891 case DualBlend::SRC_0:
892 ss << ", index = 0";
893 break;
894 case DualBlend::SRC_1:
895 ss << ", index = 1";
896 break;
897 default:
898 break;
899 }
900 ss << ") ";
901 ss << "out " << to_string(output.type) << " " << output.name << ";\n";
902 }
903 ss << "\n";
904
905 if (!pre_main.empty() || !post_main.empty()) {
906 ss << main_function_wrapper(pre_main, post_main);
907 }
908 return ss.str();
909}
910
912{
913 int max_verts = info.geometry_layout_.max_vertices;
914 int invocations = info.geometry_layout_.invocations;
915
916 std::stringstream ss;
917 ss << "\n/* Geometry Layout. */\n";
918 ss << "layout(" << to_string(info.geometry_layout_.primitive_in);
919 if (invocations != -1) {
920 ss << ", invocations = " << invocations;
921 }
922 ss << ") in;\n";
923
924 ss << "layout(" << to_string(info.geometry_layout_.primitive_out)
925 << ", max_vertices = " << max_verts << ") out;\n";
926 ss << "\n";
927 return ss.str();
928}
929
931 const StringRefNull &name)
932{
933 for (auto *iface : ifaces) {
934 if (iface->instance_name == name) {
935 return iface;
936 }
937 }
938 return nullptr;
939}
940
942{
943 std::stringstream ss;
944
945 ss << "\n/* Interfaces. */\n";
946 for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
947 bool has_matching_output_iface = find_interface_by_name(info.geometry_out_interfaces_,
948 iface->instance_name) != nullptr;
949 const char *suffix = (has_matching_output_iface) ? "_in[]" : "[]";
950 print_interface(ss, "in", *iface, suffix);
951 }
952 ss << "\n";
953 for (const StageInterfaceInfo *iface : info.geometry_out_interfaces_) {
954 bool has_matching_input_iface = find_interface_by_name(info.vertex_out_interfaces_,
955 iface->instance_name) != nullptr;
956 const char *suffix = (has_matching_input_iface) ? "_out" : "";
957 print_interface(ss, "out", *iface, suffix);
958 }
959 ss << "\n";
960 return ss.str();
961}
962
964{
965 std::stringstream ss;
966 ss << "\n/* Compute Layout. */\n";
967 ss << "layout(";
968 ss << " local_size_x = " << info.compute_layout_.local_size_x;
969 ss << ", local_size_y = " << info.compute_layout_.local_size_y;
970 ss << ", local_size_z = " << info.compute_layout_.local_size_z;
971 ss << ") in;\n";
972 ss << "\n";
973 return ss.str();
974}
975
976
977/* -------------------------------------------------------------------- */
981
982std::string GLShader::workaround_geometry_shader_source_create(
983 const shader::ShaderCreateInfo &info)
984{
985 std::stringstream ss;
986
987 const bool do_layer_output = bool(info.builtins_ & BuiltinBits::LAYER);
988 const bool do_viewport_output = bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX);
989 const bool do_barycentric_workaround = !GLContext::native_barycentric_support &&
991
992 shader::ShaderCreateInfo info_modified = info;
993 info_modified.geometry_out_interfaces_ = info_modified.vertex_out_interfaces_;
999
1000 ss << geometry_layout_declare(info_modified);
1001 ss << geometry_interface_declare(info_modified);
1002 if (do_layer_output) {
1003 ss << "in int gpu_Layer[];\n";
1004 }
1005 if (do_viewport_output) {
1006 ss << "in int gpu_ViewportIndex[];\n";
1007 }
1008
1009 if (do_barycentric_workaround) {
1010 ss << "flat out vec4 gpu_pos[3];\n";
1011 ss << "smooth out vec3 gpu_BaryCoord;\n";
1012 ss << "noperspective out vec3 gpu_BaryCoordNoPersp;\n";
1013 }
1014 ss << "\n";
1015
1016 ss << "void main()\n";
1017 ss << "{\n";
1018 if (do_barycentric_workaround) {
1019 ss << " gpu_pos[0] = gl_in[0].gl_Position;\n";
1020 ss << " gpu_pos[1] = gl_in[1].gl_Position;\n";
1021 ss << " gpu_pos[2] = gl_in[2].gl_Position;\n";
1022 }
1023 for (auto i : IndexRange(3)) {
1024 for (const StageInterfaceInfo *iface : info_modified.vertex_out_interfaces_) {
1025 for (auto &inout : iface->inouts) {
1026 ss << " " << iface->instance_name << "_out." << inout.name;
1027 ss << " = " << iface->instance_name << "_in[" << i << "]." << inout.name << ";\n";
1028 }
1029 }
1030 if (do_barycentric_workaround) {
1031 ss << " gpu_BaryCoordNoPersp = gpu_BaryCoord =";
1032 ss << " vec3(" << int(i == 0) << ", " << int(i == 1) << ", " << int(i == 2) << ");\n";
1033 }
1034 ss << " gl_Position = gl_in[" << i << "].gl_Position;\n";
1035 if (bool(info.builtins_ & BuiltinBits::CLIP_CONTROL)) {
1037 /* Assume clip range is set to 0..1 and remap the range just like Vulkan and Metal. */
1038 ss << "gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n";
1039 }
1040 }
1041 if (do_layer_output) {
1042 ss << " gl_Layer = gpu_Layer[" << i << "];\n";
1043 }
1044 if (do_viewport_output) {
1045 ss << " gl_ViewportIndex = gpu_ViewportIndex[" << i << "];\n";
1046 }
1047 ss << " EmitVertex();\n";
1048 }
1049 ss << "}\n";
1050 return ss.str();
1051}
1052
1053bool GLShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info) const
1054{
1055 BuiltinBits builtins = info->builtins_;
1056 if (!GLContext::native_barycentric_support && bool(builtins & BuiltinBits::BARYCENTRIC_COORD)) {
1057 return true;
1058 }
1059 if (!GLContext::layered_rendering_support && bool(builtins & BuiltinBits::LAYER)) {
1060 return true;
1061 }
1062 if (!GLContext::layered_rendering_support && bool(builtins & BuiltinBits::VIEWPORT_INDEX)) {
1063 return true;
1064 }
1065 return false;
1066}
1067
1069
1070/* -------------------------------------------------------------------- */
1073
1075{
1077 static std::string patch = []() {
1078 std::stringstream ss;
1079 /* Version need to go first. */
1080 ss << "#version 430\n";
1081
1082 /* Enable extensions for features that are not part of our base GLSL version
1083 * don't use an extension for something already available! */
1085 ss << "#extension GL_ARB_shader_draw_parameters : enable\n";
1086 ss << "#define GPU_ARB_shader_draw_parameters\n";
1087 ss << "#define gpu_BaseInstance gl_BaseInstanceARB\n";
1088 }
1090 ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
1091 }
1093 ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
1094 }
1096 ss << "#define GPU_ARB_clip_control\n";
1097 }
1098
1099 /* Fallbacks. */
1101 ss << "uniform int gpu_BaseInstance;\n";
1102 }
1103
1104 /* Vulkan GLSL compatibility. */
1105 ss << "#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)\n";
1106
1107 /* Array compatibility. */
1108 ss << "#define gpu_Array(_type) _type[]\n";
1109
1110 /* Needs to have this defined upfront for configuring shader defines. */
1111 ss << "#define GPU_VERTEX_SHADER\n";
1112 /* GLSL Backend Lib. */
1114
1115 return ss.str();
1116 }();
1117 return patch;
1118}
1119
1121{
1123 static std::string patch = []() {
1124 std::stringstream ss;
1125 /* Version need to go first. */
1126 ss << "#version 430\n";
1127
1129 ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
1130 }
1132 ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
1133 }
1135 ss << "#define GPU_ARB_clip_control\n";
1136 }
1137
1138 /* Array compatibility. */
1139 ss << "#define gpu_Array(_type) _type[]\n";
1140
1141 /* Needs to have this defined upfront for configuring shader defines. */
1142 ss << "#define GPU_GEOMETRY_SHADER\n";
1143 /* GLSL Backend Lib. */
1145
1146 return ss.str();
1147 }();
1148 return patch;
1149}
1150
1152{
1154 static std::string patch = []() {
1155 std::stringstream ss;
1156 /* Version need to go first. */
1157 ss << "#version 430\n";
1158
1160 ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
1161 }
1163 ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
1164 }
1166 ss << "#extension GL_EXT_shader_framebuffer_fetch: enable\n";
1167 }
1169 ss << "#extension GL_ARB_shader_stencil_export: enable\n";
1170 ss << "#define GPU_ARB_shader_stencil_export\n";
1171 }
1173 ss << "#define GPU_ARB_clip_control\n";
1174 }
1175
1176 /* Array compatibility. */
1177 ss << "#define gpu_Array(_type) _type[]\n";
1178
1179 /* Needs to have this defined upfront for configuring shader defines. */
1180 ss << "#define GPU_FRAGMENT_SHADER\n";
1181 /* GLSL Backend Lib. */
1183
1184 return ss.str();
1185 }();
1186 return patch;
1187}
1188
1190{
1192 static std::string patch = []() {
1193 std::stringstream ss;
1194 /* Version need to go first. */
1195 ss << "#version 430\n";
1196
1197 /* Array compatibility. */
1198 ss << "#define gpu_Array(_type) _type[]\n";
1199
1200 /* Needs to have this defined upfront for configuring shader defines. */
1201 ss << "#define GPU_COMPUTE_SHADER\n";
1202
1204 ss << "#define GPU_ARB_clip_control\n";
1205 }
1206
1208
1209 return ss.str();
1210 }();
1211 return patch;
1212}
1213
1214StringRefNull GLShader::glsl_patch_get(GLenum gl_stage)
1215{
1216 if (gl_stage == GL_VERTEX_SHADER) {
1217 return glsl_patch_vertex_get();
1218 }
1219 if (gl_stage == GL_GEOMETRY_SHADER) {
1220 return glsl_patch_geometry_get();
1221 }
1222 if (gl_stage == GL_FRAGMENT_SHADER) {
1223 return glsl_patch_fragment_get();
1224 }
1225 if (gl_stage == GL_COMPUTE_SHADER) {
1226 return glsl_patch_compute_get();
1227 }
1229 return "";
1230}
1231
1232GLuint GLShader::create_shader_stage(GLenum gl_stage,
1233 MutableSpan<StringRefNull> sources,
1234 GLSources &gl_sources,
1235 const shader::SpecializationConstants &constants_state)
1236{
1237 /* Patch the shader sources to include specialization constants. */
1238 std::string constants_source;
1239 Vector<StringRefNull> recreated_sources;
1240 if (has_specialization_constants()) {
1241 constants_source = constants_declare(constants_state);
1242 if (sources.is_empty()) {
1243 recreated_sources = gl_sources.sources_get();
1244 sources = recreated_sources;
1245 }
1246 }
1247
1248 /* Patch the shader code using the first source slot. */
1249 sources[SOURCES_INDEX_VERSION] = glsl_patch_get(gl_stage);
1250 sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS] = constants_source;
1251
1252 if (async_compilation_) {
1253 gl_sources[SOURCES_INDEX_VERSION].source = std::string(sources[SOURCES_INDEX_VERSION]);
1254 gl_sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS].source = std::string(
1256 }
1257
1259 /* Store the generated source for printing in case the link fails. */
1260 StringRefNull source_type;
1261 switch (gl_stage) {
1262 case GL_VERTEX_SHADER:
1263 source_type = "VertShader";
1264 break;
1265 case GL_GEOMETRY_SHADER:
1266 source_type = "GeomShader";
1267 break;
1268 case GL_FRAGMENT_SHADER:
1269 source_type = "FragShader";
1270 break;
1271 case GL_COMPUTE_SHADER:
1272 source_type = "ComputeShader";
1273 break;
1274 }
1275
1276 debug_source += "\n\n----------" + source_type + "----------\n\n";
1277 for (StringRefNull source : sources) {
1278 debug_source.append(source);
1279 }
1280 }
1281
1282 if (async_compilation_) {
1283 /* Only build the sources. */
1284 return 0;
1285 }
1286
1287 GLuint shader = glCreateShader(gl_stage);
1288 if (shader == 0) {
1289 fprintf(stderr, "GLShader: Error: Could not create shader object.\n");
1290 return 0;
1291 }
1292
1293 Array<const char *, 16> c_str_sources(sources.size());
1294 for (const int i : sources.index_range()) {
1295 c_str_sources[i] = sources[i].c_str();
1296 }
1297 glShaderSource(shader, c_str_sources.size(), c_str_sources.data(), nullptr);
1298 glCompileShader(shader);
1299
1300 GLint status;
1301 glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
1302 if (!status || (G.debug & G_DEBUG_GPU)) {
1303 char log[5000] = "";
1304 glGetShaderInfoLog(shader, sizeof(log), nullptr, log);
1305 if (log[0] != '\0') {
1306 GLLogParser parser;
1307 switch (gl_stage) {
1308 case GL_VERTEX_SHADER:
1309 this->print_log(sources, log, "VertShader", !status, &parser);
1310 break;
1311 case GL_GEOMETRY_SHADER:
1312 this->print_log(sources, log, "GeomShader", !status, &parser);
1313 break;
1314 case GL_FRAGMENT_SHADER:
1315 this->print_log(sources, log, "FragShader", !status, &parser);
1316 break;
1317 case GL_COMPUTE_SHADER:
1318 this->print_log(sources, log, "ComputeShader", !status, &parser);
1319 break;
1320 }
1321 }
1322 }
1323 if (!status) {
1324 glDeleteShader(shader);
1325 compilation_failed_ = true;
1326 return 0;
1327 }
1328
1329 debug::object_label(gl_stage, shader, name);
1330 return shader;
1331}
1332
1333void GLShader::update_program_and_sources(GLSources &stage_sources,
1334 MutableSpan<StringRefNull> sources)
1335{
1336 const bool store_sources = has_specialization_constants() || async_compilation_;
1337 if (store_sources && stage_sources.is_empty()) {
1338 stage_sources = sources;
1339 }
1340}
1341
1343{
1344 update_program_and_sources(vertex_sources_, sources);
1345 main_program_->vert_shader = create_shader_stage(
1346 GL_VERTEX_SHADER, sources, vertex_sources_, *constants);
1347}
1348
1350{
1351 update_program_and_sources(geometry_sources_, sources);
1352 main_program_->geom_shader = create_shader_stage(
1353 GL_GEOMETRY_SHADER, sources, geometry_sources_, *constants);
1354}
1355
1357{
1358 update_program_and_sources(fragment_sources_, sources);
1359 main_program_->frag_shader = create_shader_stage(
1360 GL_FRAGMENT_SHADER, sources, fragment_sources_, *constants);
1361}
1362
1364{
1365 update_program_and_sources(compute_sources_, sources);
1366 main_program_->compute_shader = create_shader_stage(
1367 GL_COMPUTE_SHADER, sources, compute_sources_, *constants);
1368}
1369
1371{
1372 if (compilation_failed_) {
1373 return false;
1374 }
1375
1376 if (info && do_geometry_shader_injection(info)) {
1377 std::string source = workaround_geometry_shader_source_create(*info);
1378 Vector<StringRefNull> sources;
1379 sources.append("version");
1380 sources.append("/* Specialization Constants. */\n");
1381 sources.append(source);
1383 }
1384
1385 if (async_compilation_) {
1386 return true;
1387 }
1388
1389 main_program_->program_link(name);
1390 return post_finalize(info);
1391}
1392
1394{
1395 GLuint program_id = main_program_->program_id;
1396 GLint status;
1397 glGetProgramiv(program_id, GL_LINK_STATUS, &status);
1398 if (!status) {
1399 char log[5000];
1400 glGetProgramInfoLog(program_id, sizeof(log), nullptr, log);
1401 GLLogParser parser;
1402 print_log({debug_source}, log, "Linking", true, &parser);
1403 return false;
1404 }
1405
1406 /* Reset for specialization constants variations. */
1407 async_compilation_ = false;
1408
1409 if (info != nullptr) {
1410 interface = new GLShaderInterface(main_program_->program_id, *info);
1411 }
1412 else {
1413 interface = new GLShaderInterface(main_program_->program_id);
1414 }
1415
1416 return true;
1417}
1418
1420
1421/* -------------------------------------------------------------------- */
1424
1426{
1427 GLProgram &program = program_get(constants_state);
1428 glUseProgram(program.program_id);
1429}
1430
1432{
1433#ifndef NDEBUG
1434 glUseProgram(0);
1435#endif
1436}
1437
1439
1440/* -------------------------------------------------------------------- */
1443
1444void GLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
1445{
1446 switch (comp_len) {
1447 case 1:
1448 glUniform1fv(location, array_size, data);
1449 break;
1450 case 2:
1451 glUniform2fv(location, array_size, data);
1452 break;
1453 case 3:
1454 glUniform3fv(location, array_size, data);
1455 break;
1456 case 4:
1457 glUniform4fv(location, array_size, data);
1458 break;
1459 case 9:
1460 glUniformMatrix3fv(location, array_size, 0, data);
1461 break;
1462 case 16:
1463 glUniformMatrix4fv(location, array_size, 0, data);
1464 break;
1465 default:
1466 BLI_assert(0);
1467 break;
1468 }
1469}
1470
1471void GLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
1472{
1473 switch (comp_len) {
1474 case 1:
1475 glUniform1iv(location, array_size, data);
1476 break;
1477 case 2:
1478 glUniform2iv(location, array_size, data);
1479 break;
1480 case 3:
1481 glUniform3iv(location, array_size, data);
1482 break;
1483 case 4:
1484 glUniform4iv(location, array_size, data);
1485 break;
1486 default:
1487 BLI_assert(0);
1488 break;
1489 }
1490}
1491
1493
1494/* -------------------------------------------------------------------- */
1498{
1500 source = "";
1501 source_ref = other;
1502 }
1503 else {
1504 source = other;
1505 source_ref = std::nullopt;
1506 }
1507}
1508
1510{
1511 clear();
1512 reserve(other.size());
1513
1514 for (StringRefNull other_source : other) {
1515 /* Don't store empty string as compilers can optimize these away and result in pointing to a
1516 * string that isn't c-str compliant anymore. */
1517 if (other_source.is_empty()) {
1518 continue;
1519 }
1520 append(GLSource(other_source));
1521 }
1522
1523 return *this;
1524}
1525
1527{
1529 result.reserve(size());
1530
1531 for (const GLSource &source : *this) {
1532 if (source.source_ref) {
1533 result.append(*source.source_ref);
1534 }
1535 else {
1536 result.append(source.source);
1537 }
1538 }
1539 return result;
1540}
1541
1542std::string GLSources::to_string() const
1543{
1544 std::string result;
1545 for (const GLSource &source : *this) {
1546 if (source.source_ref) {
1547 result.append(*source.source_ref);
1548 }
1549 else {
1550 result.append(source.source);
1551 }
1552 }
1553 return result;
1554}
1555
1557{
1558 size_t result = 0;
1559 result += comp.empty() ? 0 : comp.size() + sizeof('\0');
1560 result += vert.empty() ? 0 : vert.size() + sizeof('\0');
1561 result += geom.empty() ? 0 : geom.size() + sizeof('\0');
1562 result += frag.empty() ? 0 : frag.size() + sizeof('\0');
1563 return result;
1564}
1565
1567
1568/* -------------------------------------------------------------------- */
1571
1572GLShader::GLProgram::~GLProgram()
1573{
1574 /* Invalid handles are silently ignored. */
1575 glDeleteShader(vert_shader);
1576 glDeleteShader(geom_shader);
1577 glDeleteShader(frag_shader);
1578 glDeleteShader(compute_shader);
1579 glDeleteProgram(program_id);
1580}
1581
1582void GLShader::GLProgram::program_link(StringRefNull shader_name)
1583{
1584 if (this->program_id == 0) {
1585 this->program_id = glCreateProgram();
1586 debug::object_label(GL_PROGRAM, this->program_id, shader_name.c_str());
1587 }
1588
1589 GLuint program_id = this->program_id;
1590
1591 if (this->vert_shader) {
1592 glAttachShader(program_id, this->vert_shader);
1593 }
1594 if (this->geom_shader) {
1595 glAttachShader(program_id, this->geom_shader);
1596 }
1597 if (this->frag_shader) {
1598 glAttachShader(program_id, this->frag_shader);
1599 }
1600 if (this->compute_shader) {
1601 glAttachShader(program_id, this->compute_shader);
1602 }
1603 glLinkProgram(program_id);
1604}
1605
1606GLShader::GLProgram &GLShader::program_get(const shader::SpecializationConstants *constants_state)
1607{
1608 BLI_assert(constants_state == nullptr || this->has_specialization_constants() == true);
1609
1610 if (constants_state == nullptr) {
1611 /* Early exit for shaders that doesn't use specialization constants. */
1612 BLI_assert(main_program_);
1613 return *main_program_;
1614 }
1615
1616 program_cache_mutex_.lock();
1617
1618 GLProgram &program = *program_cache_.lookup_or_add_cb(
1619 constants_state->values, []() { return std::make_unique<GLProgram>(); });
1620
1621 program_cache_mutex_.unlock();
1622
1623 /* Avoid two threads trying to specialize the same shader at the same time. */
1624 std::scoped_lock lock(program.compilation_mutex);
1625
1626 if (program.program_id != 0) {
1627 /* Specialization is already compiled. */
1628 return program;
1629 }
1630
1631 if (!vertex_sources_.is_empty()) {
1632 program.vert_shader = create_shader_stage(
1633 GL_VERTEX_SHADER, {}, vertex_sources_, *constants_state);
1634 }
1635 if (!geometry_sources_.is_empty()) {
1636 program.geom_shader = create_shader_stage(
1637 GL_GEOMETRY_SHADER, {}, geometry_sources_, *constants_state);
1638 }
1639 if (!fragment_sources_.is_empty()) {
1640 program.frag_shader = create_shader_stage(
1641 GL_FRAGMENT_SHADER, {}, fragment_sources_, *constants_state);
1642 }
1643 if (!compute_sources_.is_empty()) {
1644 program.compute_shader = create_shader_stage(
1645 GL_COMPUTE_SHADER, {}, compute_sources_, *constants_state);
1646 }
1647
1648 if (async_compilation_) {
1649 program.program_id = glCreateProgram();
1650 debug::object_label(GL_PROGRAM, program.program_id, name);
1651 return program;
1652 }
1653
1656
1657 program.program_link(name);
1658
1659 /* Ensure the specialization compiled correctly.
1660 * Specialization compilation should never fail, but adding this check seems to bypass an
1661 * internal Nvidia driver issue (See #142046). */
1662 GLint status;
1663 glGetProgramiv(program.program_id, GL_LINK_STATUS, &status);
1664 BLI_assert(status);
1665
1668
1669 return program;
1670}
1671
1673{
1675 result.comp = compute_sources_.to_string();
1676 result.vert = vertex_sources_.to_string();
1677 result.geom = geometry_sources_.to_string();
1678 result.frag = fragment_sources_.to_string();
1679 return result;
1680}
1681
1683
1684/* -------------------------------------------------------------------- */
1687
1689{
1690 dynamic_cast<GLShader *>(unwrap(specialization.shader))->program_get(&specialization.constants);
1691}
1692
1694
1695#if BLI_SUBPROCESS_SUPPORT
1696
1697/* -------------------------------------------------------------------- */
1700
1701GLCompilerWorker::GLCompilerWorker()
1702{
1703 static size_t pipe_id = 0;
1704 pipe_id++;
1705
1706 std::string name = "BLENDER_SHADER_COMPILER_" + std::to_string(getpid()) + "_" +
1707 std::to_string(pipe_id);
1708
1709 shared_mem_ = std::make_unique<SharedMemory>(
1710 name, compilation_subprocess_shared_memory_size, true);
1711 start_semaphore_ = std::make_unique<SharedSemaphore>(name + "_START", false);
1712 end_semaphore_ = std::make_unique<SharedSemaphore>(name + "_END", false);
1713 close_semaphore_ = std::make_unique<SharedSemaphore>(name + "_CLOSE", false);
1714
1715 subprocess_.create({"--compilation-subprocess", name.c_str()});
1716}
1717
1718GLCompilerWorker::~GLCompilerWorker()
1719{
1720 close_semaphore_->increment();
1721 /* Flag start so the subprocess can reach the close semaphore. */
1722 start_semaphore_->increment();
1723}
1724
1725void GLCompilerWorker::compile(const GLSourcesBaked &sources)
1726{
1727 BLI_assert(state_ == AVAILABLE);
1728
1729 ShaderSourceHeader *shared_src = reinterpret_cast<ShaderSourceHeader *>(shared_mem_->get_data());
1730 char *next_src = shared_src->sources;
1731
1732 auto add_src = [&](const std::string &src) {
1733 if (!src.empty()) {
1734 const size_t src_size = src.size() + 1;
1735 memcpy(next_src, src.c_str(), src_size);
1736 next_src += src_size;
1737 }
1738 };
1739
1740 add_src(sources.comp);
1741 add_src(sources.vert);
1742 add_src(sources.geom);
1743 add_src(sources.frag);
1744
1745 BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size);
1746
1747 if (!sources.comp.empty()) {
1748 BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty());
1749 shared_src->type = ShaderSourceHeader::Type::COMPUTE;
1750 }
1751 else {
1752 BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty());
1753 shared_src->type = sources.geom.empty() ?
1754 ShaderSourceHeader::Type::GRAPHICS :
1755 ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE;
1756 }
1757
1758 start_semaphore_->increment();
1759
1760 state_ = COMPILATION_REQUESTED;
1761 compilation_start = BLI_time_now_seconds();
1762}
1763
1764bool GLCompilerWorker::block_until_ready()
1765{
1766 BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY));
1767 if (state_ == COMPILATION_READY) {
1768 return true;
1769 }
1770
1771 auto delete_cached_binary = [&]() {
1772 /* If the subprocess crashed when loading the binary,
1773 * its name should be stored in shared memory.
1774 * Delete it to prevent more crashes in the future. */
1775 char str_start[] = "SOURCE_HASH:";
1776 char *shared_mem = reinterpret_cast<char *>(shared_mem_->get_data());
1777 if (BLI_str_startswith(shared_mem, str_start)) {
1778 std::string path = GL_shader_cache_dir_get() + SEP_STR +
1779 std::string(shared_mem + sizeof(str_start) - 1);
1780 if (BLI_exists(path.c_str())) {
1781 BLI_delete(path.c_str(), false, false);
1782 }
1783 }
1784 };
1785
1786 while (!end_semaphore_->try_decrement(1000)) {
1787 if (is_lost()) {
1788 delete_cached_binary();
1789 return false;
1790 }
1791 }
1792
1793 state_ = COMPILATION_READY;
1794 return true;
1795}
1796
1797bool GLCompilerWorker::is_lost()
1798{
1799 /* Use a timeout for hanged processes. */
1800 float max_timeout_seconds = 30.0f;
1801 return !subprocess_.is_running() ||
1802 (state_ == COMPILATION_REQUESTED &&
1803 (BLI_time_now_seconds() - compilation_start) > max_timeout_seconds);
1804}
1805
1806bool GLCompilerWorker::load_program_binary(GLint program)
1807{
1808 if (!block_until_ready()) {
1809 return false;
1810 }
1811
1812 ShaderBinaryHeader *binary = (ShaderBinaryHeader *)shared_mem_->get_data();
1813
1814 state_ = COMPILATION_FINISHED;
1815
1816 if (binary->size > 0) {
1817 GPU_debug_group_begin("Load Binary");
1818 glProgramBinary(program, binary->format, binary->data, binary->size);
1820 return true;
1821 }
1822
1823 return false;
1824}
1825
1826void GLCompilerWorker::release()
1827{
1828 state_ = AVAILABLE;
1829}
1830
1832
1833/* -------------------------------------------------------------------- */
1836
1837GLSubprocessShaderCompiler::~GLSubprocessShaderCompiler()
1838{
1839 /* Must be called before we destruct the GLCompilerWorkers. */
1840 destruct_compilation_worker();
1841
1842 for (GLCompilerWorker *worker : workers_) {
1843 delete worker;
1844 }
1845}
1846
1847GLCompilerWorker *GLSubprocessShaderCompiler::get_compiler_worker()
1848{
1849 auto new_worker = [&]() {
1850 GLCompilerWorker *result = new GLCompilerWorker();
1851 std::lock_guard lock(workers_mutex_);
1852 workers_.append(result);
1853 return result;
1854 };
1855
1856 static thread_local GLCompilerWorker *worker = new_worker();
1857
1858 if (worker->is_lost()) {
1859 std::cerr << "ERROR: Compilation subprocess lost\n";
1860 {
1861 std::lock_guard lock(workers_mutex_);
1862 workers_.remove_first_occurrence_and_reorder(worker);
1863 }
1864 delete worker;
1865 worker = new_worker();
1866 }
1867
1868 return worker;
1869}
1870
1871Shader *GLSubprocessShaderCompiler::compile_shader(const shader::ShaderCreateInfo &info)
1872{
1873 const_cast<ShaderCreateInfo *>(&info)->finalize();
1874 GLShader *shader = static_cast<GLShader *>(compile(info, true));
1875 GLSourcesBaked sources = shader->get_sources();
1876
1877 size_t required_size = sources.size();
1878 bool do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
1879 if (!do_async_compilation) {
1880 /* TODO: Can't reuse? */
1881 delete shader;
1882 return compile(info, false);
1883 }
1884
1885 GLCompilerWorker *worker = get_compiler_worker();
1886 worker->compile(sources);
1887
1888 GPU_debug_group_begin("Subprocess Compilation");
1889
1890 /* This path is always called for the default shader compilation. Not for specialization.
1891 * Use the default constant template.*/
1892 const shader::SpecializationConstants &constants = GPU_shader_get_default_constant_state(
1893 wrap(shader));
1894
1895 if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values)->program_id) ||
1896 !shader->post_finalize(&info))
1897 {
1898 /* Compilation failed, try to compile it locally. */
1899 delete shader;
1900 shader = nullptr;
1901 }
1902
1904
1905 worker->release();
1906
1907 if (!shader) {
1908 return compile(info, false);
1909 }
1910
1911 return shader;
1912}
1913
1914void GLSubprocessShaderCompiler::specialize_shader(ShaderSpecialization &specialization)
1915{
1916 static std::mutex mutex;
1917
1918 GLShader *shader = static_cast<GLShader *>(unwrap(specialization.shader));
1919
1920 auto program_get = [&]() -> GLShader::GLProgram * {
1921 if (shader->program_cache_.contains(specialization.constants.values)) {
1922 return shader->program_cache_.lookup(specialization.constants.values).get();
1923 }
1924 return nullptr;
1925 };
1926
1927 auto program_release = [&]() {
1928 /* Compilation failed, local compilation will be tried later on shader bind. */
1929 GLShader::GLProgram *program = program_get();
1930 glDeleteProgram(program->program_id);
1931 program->program_id = 0;
1932 };
1933
1934 GLSourcesBaked sources;
1935 {
1936 std::lock_guard lock(mutex);
1937
1938 if (program_get()) {
1939 /*Already compiled*/
1940 return;
1941 }
1942
1944 shader->async_compilation_ = true;
1945 shader->program_get(&specialization.constants);
1946 shader->async_compilation_ = false;
1947 sources = shader->get_sources();
1948
1949 size_t required_size = sources.size();
1950 bool do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
1951 if (!do_async_compilation) {
1952 program_release();
1953 return;
1954 }
1955 }
1956
1957 GPU_debug_group_begin("Subprocess Specialization");
1958
1959 GLCompilerWorker *worker = get_compiler_worker();
1960 worker->compile(sources);
1961 worker->block_until_ready();
1962
1963 std::lock_guard lock(mutex);
1964
1965 if (!worker->load_program_binary(program_get()->program_id)) {
1966 program_release();
1967 }
1968
1970
1971 worker->release();
1972}
1973
1975
1976#endif
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
File and directory operations.
int BLI_exists(const char *path) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition storage.cc:373
int BLI_delete(const char *path, bool dir, bool recursive) ATTR_NONNULL()
KDTree *BLI_kdtree_nd_ new(unsigned int nodes_len_capacity)
Definition kdtree_impl.h:97
int bool BLI_str_startswith(const char *__restrict str, const char *__restrict start) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
Platform independent time functions.
double BLI_time_now_seconds(void)
Definition time.cc:65
#define UNUSED_FUNCTION(x)
#define ELEM(...)
float[3] Vector
bool GPU_stencil_export_support()
void GPU_debug_group_end()
Definition gpu_debug.cc:33
void GPU_debug_group_begin(const char *name)
Definition gpu_debug.cc:22
#define GPU_DEBUG_SHADER_SPECIALIZATION_GROUP
Definition GPU_debug.hh:65
@ GPU_DRIVER_OFFICIAL
@ GPU_OS_ANY
@ GPU_DEVICE_ATI
bool GPU_type_matches(eGPUDeviceType device, eGPUOSType os, eGPUDriverType driver)
const blender::gpu::shader::SpecializationConstants & GPU_shader_get_default_constant_state(GPUShader *sh)
eGPUTextureFormat
@ GPU_R16UI
@ GPU_RG16F
@ GPU_R32F
@ GPU_R16I
@ GPU_RGB10_A2
@ GPU_R32I
@ GPU_RG8UI
@ GPU_R16F
@ GPU_RG8I
@ GPU_RG16I
@ GPU_RG32UI
@ GPU_RGBA32F
@ GPU_RGBA16F
@ GPU_RG8
@ GPU_RG32I
@ GPU_RG16
@ GPU_RGBA32UI
@ GPU_R8I
@ GPU_R16
@ GPU_RG16UI
@ GPU_RGBA8I
@ GPU_RGBA8UI
@ GPU_RGBA16UI
@ GPU_RGBA16I
@ GPU_R8UI
@ GPU_RGBA16
@ GPU_RG32F
@ GPU_R8
@ GPU_R32UI
@ GPU_RGBA32I
@ GPU_R11F_G11F_B10F
@ GPU_RGBA8
volatile int lock
BMesh const char void * data
long long int int64_t
int64_t size() const
void append(const GLSource &value)
bool is_empty() const
void reserve(const int64_t min_capacity)
constexpr bool is_empty() const
Definition BLI_span.hh:509
constexpr int64_t size() const
Definition BLI_span.hh:252
constexpr bool is_empty() const
constexpr const char * c_str() const
void append(const T &value)
static bool layered_rendering_support
Definition gl_context.hh:56
static bool framebuffer_fetch_support
Definition gl_context.hh:55
static bool shader_draw_parameters_support
Definition gl_context.hh:61
static bool explicit_location_support
Definition gl_context.hh:54
static GLContext * get()
static bool clip_control_support
Definition gl_context.hh:51
static bool native_barycentric_support
Definition gl_context.hh:57
virtual void specialize_shader(ShaderSpecialization &specialization) override
std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:911
std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:720
void fragment_shader_from_glsl(MutableSpan< StringRefNull > sources) override
std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:941
void geometry_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void compute_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void unbind() override
void uniform_float(int location, int comp_len, int array_size, const float *data) override
bool post_finalize(const shader::ShaderCreateInfo *info=nullptr)
GLShader(const char *name)
Definition gl_shader.cc:53
void init() override
Definition gl_shader.cc:90
GLSourcesBaked get_sources()
std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:963
std::string constants_declare(const shader::SpecializationConstants &constants_state) const
Definition gl_shader.cc:669
std::string resources_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:598
void uniform_int(int location, int comp_len, int array_size, const int *data) override
std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override
Definition gl_shader.cc:787
bool finalize(const shader::ShaderCreateInfo *info=nullptr) override
void bind(const shader::SpecializationConstants *constants_state) override
void vertex_shader_from_glsl(MutableSpan< StringRefNull > sources) override
GLSources & operator=(Span< StringRefNull > other)
std::string to_string() const
Vector< StringRefNull > sources_get() const
virtual void specialize_shader(ShaderSpecialization &)
virtual Shader * compile_shader(const shader::ShaderCreateInfo &info)
std::unique_ptr< const shader::SpecializationConstants > constants
void print_log(Span< StringRefNull > sources, const char *log, const char *stage, bool error, GPULogParser *parser)
Shader(const char *name)
Definition gpu_shader.cc:56
static StringRefNull glsl_patch_geometry_get()
static void print_resource_alias(std::ostream &os, const ShaderCreateInfo::Resource &res)
Definition gl_shader.cc:555
static StringRefNull glsl_patch_compute_get()
static StringRefNull glsl_patch_vertex_get()
static StringRefNull glsl_patch_fragment_get()
char datatoc_glsl_shader_defines_glsl[]
Definition gl_shader.cc:47
static Type UNUSED_FUNCTION to_component_type(const Type &type)
Definition gl_shader.cc:187
ThreadMutex mutex
#define inout
#define log
#define input
#define output
#define DEBUG_LOG_SHADER_SRC_ON_ERROR
#define SOURCES_INDEX_SPECIALIZATION_CONSTANTS
#define SOURCES_INDEX_VERSION
ccl_device_inline float interp(const float a, const float b, const float t)
Definition math_base.h:502
#define G(x, y, z)
void object_label(GLenum type, GLuint object, const char *name)
Definition gl_debug.cc:329
BLI_INLINE int to_component_count(const Type &type)
StringRefNull gpu_shader_dependency_get_filename_from_source_string(const StringRef source_string)
Find the name of the file from which the given string was generated.
static void print_image_type(std::ostream &os, const ImageType &type, const ShaderCreateInfo::Resource::BindType bind_type)
Definition vk_shader.cc:207
const char * to_string(ShaderStage stage)
Definition mtl_shader.mm:52
static Context * unwrap(GPUContext *ctx)
static StageInterfaceInfo * find_interface_by_name(const Span< StageInterfaceInfo * > ifaces, const StringRefNull name)
static void print_interface(std::ostream &os, const std::string &prefix, const StageInterfaceInfo &iface, int &location, const StringRefNull &suffix="")
Definition vk_shader.cc:453
static GPUContext * wrap(Context *ctx)
static std::ostream & print_qualifier(std::ostream &os, const Qualifier &qualifiers)
Definition vk_shader.cc:336
static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res)
static std::string main_function_wrapper(std::string &pre_main, std::string &post_main)
Definition vk_shader.cc:468
static constexpr GPUSamplerState default_sampler()
blender::gpu::shader::SpecializationConstants constants
std::optional< StringRefNull > source_ref
Definition gl_shader.hh:40
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Vector< StageInterfaceInfo * > vertex_out_interfaces_
Self & geometry_layout(PrimitiveIn prim_in, PrimitiveOut prim_out, int max_vertices, int invocations=-1)
Vector< CompilationConstant, 0 > compilation_constants_
Vector< StageInterfaceInfo * > geometry_out_interfaces_
Vector< SpecializationConstant > specialization_constants_
Vector< SpecializationConstant::Value, 8 > values
i
Definition text_draw.cc:230
#define SEP_STR
Definition unit.cc:39