Blender V4.5
mtl_shader.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "BKE_global.hh"
10
11#include "DNA_userdef_types.h"
12
13#include "BLI_string.h"
14#include "BLI_time.h"
15
16#include <algorithm>
17#include <fstream>
18#include <iostream>
19#include <map>
20#include <mutex>
21#include <regex>
22#include <sstream>
23#include <string>
24
25#include <cstring>
26
27#include "GPU_platform.hh"
28#include "GPU_vertex_format.hh"
29
31#include "mtl_common.hh"
32#include "mtl_context.hh"
33#include "mtl_debug.hh"
35#include "mtl_shader.hh"
38#include "mtl_shader_log.hh"
39#include "mtl_texture.hh"
40#include "mtl_vertex_buffer.hh"
41
42#include "GHOST_C-api.h"
43
44extern const char datatoc_mtl_shader_common_msl[];
45
46using namespace blender;
47using namespace blender::gpu;
48using namespace blender::gpu::shader;
49
50namespace blender::gpu {
51
52const char *to_string(ShaderStage stage)
53{
54 switch (stage) {
56 return "Vertex Shader";
58 return "Fragment Shader";
60 return "Compute Shader";
62 break;
63 }
64 return "Unknown Shader Stage";
65}
66
67/* -------------------------------------------------------------------- */
70
71/* Create empty shader to be populated later. */
73{
74 context_ = ctx;
75
76 /* Create SHD builder to hold temporary resources until compilation is complete. */
77 shd_builder_ = new MTLShaderBuilder();
78
79#ifndef NDEBUG
80 /* Remove invalid symbols from shader name to ensure debug entry-point function name is valid. */
81 for (uint i : IndexRange(strlen(this->name))) {
82 char c = this->name[i];
83 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
84 }
85 else {
86 this->name[i] = '_';
87 }
88 }
89#endif
90}
91
92/* Create shader from MSL source. */
95 const char *name,
96 NSString *input_vertex_source,
97 NSString *input_fragment_source,
98 NSString *vert_function_name,
99 NSString *frag_function_name)
100 : MTLShader(ctx, name)
101{
102 BLI_assert([vert_function_name length]);
103 BLI_assert([frag_function_name length]);
104
105 this->set_vertex_function_name(vert_function_name);
106 this->set_fragment_function_name(frag_function_name);
107 this->shader_source_from_msl(input_vertex_source, input_fragment_source);
108 this->set_interface(interface);
109 this->finalize(nullptr);
110}
111
113{
114 if (this->is_valid()) {
115
116 /* Free uniform data block. */
117 if (push_constant_data_ != nullptr) {
118 MEM_freeN(push_constant_data_);
119 push_constant_data_ = nullptr;
120 }
121
122 /* Free Metal resources.
123 * This is done in the order of:
124 * 1. PipelineState objects
125 * 2. MTLFunctions
126 * 3. MTLLibraries
127 * So that each object releases it's references to the one following it. */
128 if (pso_descriptor_ != nil) {
129 [pso_descriptor_ release];
130 pso_descriptor_ = nil;
131 }
132
133 /* Free Pipeline Cache. */
134 pso_cache_lock_.lock();
135 for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) {
136 /* Free pipeline state object. */
137 if (pso_inst->pso) {
138 [pso_inst->pso release];
139 }
140 /* Free vertex function. */
141 if (pso_inst->vert) {
142 [pso_inst->vert release];
143 }
144 /* Free fragment function. */
145 if (pso_inst->frag) {
146 [pso_inst->frag release];
147 }
148 delete pso_inst;
149 }
150 pso_cache_.clear();
151
152 /* Free Compute pipeline cache. */
153 for (const MTLComputePipelineStateInstance *pso_inst : compute_pso_cache_.values()) {
154 /* Free pipeline state object. */
155 if (pso_inst->pso) {
156 [pso_inst->pso release];
157 }
158 /* Free compute function. */
159 if (pso_inst->compute) {
160 [pso_inst->compute release];
161 }
162 }
163 compute_pso_cache_.clear();
164 pso_cache_lock_.unlock();
165
166 /* Free shader libraries. */
167 if (shader_library_vert_ != nil) {
168 [shader_library_vert_ release];
169 shader_library_vert_ = nil;
170 }
171 if (shader_library_frag_ != nil) {
172 [shader_library_frag_ release];
173 shader_library_frag_ = nil;
174 }
175 if (shader_library_compute_ != nil) {
176 [shader_library_compute_ release];
177 shader_library_compute_ = nil;
178 }
179
180 /* NOTE(Metal): #ShaderInterface deletion is handled in the super destructor `~Shader()`. */
181 }
182 valid_ = false;
183
184 if (shd_builder_ != nullptr) {
185 delete shd_builder_;
186 shd_builder_ = nullptr;
187 }
188}
189
190void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation)
191{
192 async_compilation_ = is_batch_compilation;
193}
194
196
197/* -------------------------------------------------------------------- */
200
202{
203 /* Flag source as not being compiled from native MSL. */
204 BLI_assert(shd_builder_ != nullptr);
205 shd_builder_->source_from_msl_ = false;
206
207 /* Remove #version tag entry. */
208 sources[SOURCES_INDEX_VERSION] = "";
209
210 /* Consolidate GLSL vertex sources. */
211 std::stringstream ss;
212 for (int i = 0; i < sources.size(); i++) {
213 ss << sources[i] << std::endl;
214 }
215 shd_builder_->glsl_vertex_source_ = ss.str();
216}
217
219{
220 MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!");
221}
222
224{
225 /* Flag source as not being compiled from native MSL. */
226 BLI_assert(shd_builder_ != nullptr);
227 shd_builder_->source_from_msl_ = false;
228
229 /* Remove #version tag entry. */
230 sources[SOURCES_INDEX_VERSION] = "";
231
232 /* Consolidate GLSL fragment sources. */
233 std::stringstream ss;
234 int i;
235 for (i = 0; i < sources.size(); i++) {
236 ss << sources[i] << '\n';
237 }
238 shd_builder_->glsl_fragment_source_ = ss.str();
239}
240
242{
243 /* Flag source as not being compiled from native MSL. */
244 BLI_assert(shd_builder_ != nullptr);
245 shd_builder_->source_from_msl_ = false;
246
247 /* Remove #version tag entry. */
248 sources[SOURCES_INDEX_VERSION] = "";
249
250 /* Consolidate GLSL compute sources. */
251 std::stringstream ss;
252 for (int i = 0; i < sources.size(); i++) {
253 ss << sources[i] << std::endl;
254 }
255 shd_builder_->glsl_compute_source_ = ss.str();
256}
257
259{
260 /* Check if Shader has already been finalized. */
261 if (this->is_valid()) {
262 MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!", this, this->name_get().c_str());
263 }
264
265 /* Compute shaders. */
266 bool is_compute = false;
267 if (shd_builder_->glsl_compute_source_.empty() == false) {
268 BLI_assert_msg(info != nullptr, "Compute shaders must use CreateInfo.\n");
269 BLI_assert_msg(!shd_builder_->source_from_msl_, "Compute shaders must compile from GLSL.");
270 is_compute = true;
271 }
272
273 /* Perform GLSL to MSL source translation. */
274 BLI_assert(shd_builder_ != nullptr);
275 if (!shd_builder_->source_from_msl_) {
276 bool success = generate_msl_from_glsl(info);
277 if (!success) {
278 /* GLSL to MSL translation has failed, or is unsupported for this shader. */
279 valid_ = false;
280 BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n");
281
282 /* Create empty interface to allow shader to be silently used. */
283 MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get().c_str());
284 this->set_interface(mtl_interface);
285
286 /* Release temporary compilation resources. */
287 delete shd_builder_;
288 shd_builder_ = nullptr;
289 return false;
290 }
291 }
292
294 /* Tuning parameters for compute kernels. */
295 if (is_compute) {
296 int threadgroup_tuning_param = info->mtl_max_threads_per_threadgroup_;
297 if (threadgroup_tuning_param > 0) {
298 maxTotalThreadsPerThreadgroup_Tuning_ = threadgroup_tuning_param;
299 }
300 }
301
302 /* Ensure we have a valid shader interface. */
303 MTLShaderInterface *mtl_interface = this->get_interface();
304 BLI_assert(mtl_interface != nullptr);
305
306 /* Verify Context handle, fetch device and compile shader. */
307 BLI_assert(context_);
308 id<MTLDevice> device = context_->device;
309 BLI_assert(device != nil);
310
311 /* Ensure source and stage entry-point names are set. */
312 BLI_assert(shd_builder_ != nullptr);
313 if (is_compute) {
314 /* Compute path. */
315 BLI_assert([compute_function_name_ length] > 0);
316 BLI_assert([shd_builder_->msl_source_compute_ length] > 0);
317 }
318 else {
319 /* Vertex/Fragment path. */
320 BLI_assert([vertex_function_name_ length] > 0);
321 BLI_assert([fragment_function_name_ length] > 0);
322 BLI_assert([shd_builder_->msl_source_vert_ length] > 0);
323 }
324
325 @autoreleasepool {
326 MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
327 options.languageVersion = MTLLanguageVersion2_2;
328 options.fastMathEnabled = YES;
329 options.preserveInvariance = YES;
330
331 /* Raster order groups for tile data in struct require Metal 2.3.
332 * Retaining Metal 2.2. for old shaders to maintain backwards
333 * compatibility for existing features. */
334 if (info->subpass_inputs_.is_empty() == false) {
335 options.languageVersion = MTLLanguageVersion2_3;
336 }
337#if defined(MAC_OS_VERSION_14_0)
338 if (@available(macOS 14.00, *)) {
339 /* Texture atomics require Metal 3.1. */
340 if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
341 options.languageVersion = MTLLanguageVersion3_1;
342 }
343 }
344#endif
345
346 NSString *source_to_compile = shd_builder_->msl_source_vert_;
347
348 /* Vertex/Fragment compile stages 0 and/or 1.
349 * Compute shaders compile as stage 2. */
350 ShaderStage initial_stage = (is_compute) ? ShaderStage::COMPUTE : ShaderStage::VERTEX;
351 ShaderStage src_stage = initial_stage;
352 uint8_t total_stages = (is_compute) ? 1 : 2;
353
354 for (int stage_count = 0; stage_count < total_stages; stage_count++) {
355
356 source_to_compile = (src_stage == ShaderStage::VERTEX) ?
357 shd_builder_->msl_source_vert_ :
358 ((src_stage == ShaderStage::COMPUTE) ?
359 shd_builder_->msl_source_compute_ :
360 shd_builder_->msl_source_frag_);
361
362 /* Concatenate common source. */
363 NSString *str = [NSString stringWithUTF8String:datatoc_mtl_shader_common_msl];
364 NSString *source_with_header_a = [str stringByAppendingString:source_to_compile];
365
366 /* Inject unique context ID to avoid cross-context shader cache collisions.
367 * Required on macOS 11.0. */
368 NSString *source_with_header = source_with_header_a;
369 [source_with_header retain];
370
371 /* Prepare Shader Library. */
372 NSError *error = nullptr;
373 id<MTLLibrary> library = [device newLibraryWithSource:source_with_header
375 error:&error];
376 if (error) {
377 /* Only exit out if genuine error and not warning. */
378 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
379 NSNotFound)
380 {
381 const char *errors_c_str = [[error localizedDescription] UTF8String];
382 const StringRefNull source = (is_compute) ? shd_builder_->glsl_compute_source_ :
383 shd_builder_->glsl_fragment_source_;
384
385 MTLLogParser parser;
386 print_log({source}, errors_c_str, to_string(src_stage), true, &parser);
387
388 /* Release temporary compilation resources. */
389 delete shd_builder_;
390 shd_builder_ = nullptr;
391 return false;
392 }
393 }
394
395 BLI_assert(library != nil);
396
397 switch (src_stage) {
398 case ShaderStage::VERTEX: {
399 /* Store generated library and assign debug name. */
400 shader_library_vert_ = library;
401 shader_library_vert_.label = [NSString stringWithUTF8String:this->name];
402 } break;
404 /* Store generated library for fragment shader and assign debug name. */
405 shader_library_frag_ = library;
406 shader_library_frag_.label = [NSString stringWithUTF8String:this->name];
407 } break;
409 /* Store generated library for fragment shader and assign debug name. */
410 shader_library_compute_ = library;
411 shader_library_compute_.label = [NSString stringWithUTF8String:this->name];
412 } break;
413 case ShaderStage::ANY: {
414 /* Suppress warnings. */
416 } break;
417 }
418
419 [source_with_header autorelease];
420
421 /* Move onto next compilation stage. */
422 if (!is_compute) {
423 src_stage = ShaderStage::FRAGMENT;
424 }
425 else {
426 break;
427 }
428 }
429
430 /* Create descriptors.
431 * Each shader type requires a differing descriptor. */
432 if (!is_compute) {
433 /* Prepare Render pipeline descriptor. */
434 pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init];
435 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
436 }
437
438 /* Shader has successfully been created. */
439 valid_ = true;
440
441 /* Prepare backing data storage for local uniforms. */
442 const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
443 if (push_constant_block.size > 0) {
444 push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
446 }
447 else {
448 push_constant_data_ = nullptr;
449 }
450
451 /* If this is a compute shader, bake base PSO for compute straight-away.
452 * NOTE: This will compile the base unspecialized variant. */
453 if (is_compute) {
454 /* Set descriptor to default shader constants */
455 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants->values);
456
457 this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
458 }
459 }
460
461 /* Release temporary compilation resources. */
462 delete shd_builder_;
463 shd_builder_ = nullptr;
464 return true;
465}
466
468
469/* -------------------------------------------------------------------- */
472
474{
476 /* Copy constants state. */
477 ctx->specialization_constants_set(constants_state);
478
479 if (interface == nullptr || !this->is_valid()) {
481 "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be "
482 "skipped.",
483 this->name_get().c_str());
484 }
485 ctx->pipeline_state.active_shader = this;
486}
487
489{
491 ctx->pipeline_state.active_shader = nullptr;
492}
493
494void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
495{
496 BLI_assert(this);
497 if (!this->is_valid()) {
498 return;
499 }
500 MTLShaderInterface *mtl_interface = get_interface();
501 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
503 "Uniform location %d is not valid in Shader %s", location, this->name_get().c_str());
504 return;
505 }
506
507 /* Fetch more information about uniform from interface. */
508 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
509
510 /* Prepare to copy data into local shader push constant memory block. */
511 BLI_assert(push_constant_data_ != nullptr);
512 uint8_t *dest_ptr = (uint8_t *)push_constant_data_;
513 dest_ptr += uniform.byte_offset;
514 uint32_t copy_size = sizeof(float) * comp_len * array_size;
515
516 /* Test per-element size. It is valid to copy less array elements than the total, but each
517 * array element needs to match. */
518 uint32_t source_per_element_size = sizeof(float) * comp_len;
519 uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len;
521 source_per_element_size <= dest_per_element_size,
522 "source Per-array-element size must be smaller than destination storage capacity for "
523 "that data");
524
525 if (source_per_element_size < dest_per_element_size) {
526 switch (uniform.type) {
527
528 /* Special case for handling 'vec3' array upload. */
529 case MTL_DATATYPE_FLOAT3: {
530 int numvecs = uniform.array_len;
531 uint8_t *data_c = (uint8_t *)data;
532
533 /* It is more efficient on the host to only modify data if it has changed.
534 * Data modifications are small, so memory comparison is cheap.
535 * If uniforms have remained unchanged, then we avoid both copying
536 * data into the local uniform struct, and upload of the modified uniform
537 * contents in the command stream. */
538 bool changed = false;
539 for (int i = 0; i < numvecs; i++) {
540 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
541 if (changed) {
542 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
543 }
544 data_c += sizeof(float) * 3;
545 dest_ptr += sizeof(float) * 4;
546 }
547 if (changed) {
549 }
550 return;
551 }
552
553 /* Special case for handling 'mat3' upload. */
555 int numvecs = 3 * uniform.array_len;
556 uint8_t *data_c = (uint8_t *)data;
557
558 /* It is more efficient on the host to only modify data if it has changed.
559 * Data modifications are small, so memory comparison is cheap.
560 * If uniforms have remained unchanged, then we avoid both copying
561 * data into the local uniform struct, and upload of the modified uniform
562 * contents in the command stream. */
563 bool changed = false;
564 for (int i = 0; i < numvecs; i++) {
565 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
566 if (changed) {
567 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
568 }
569 data_c += sizeof(float) * 3;
570 dest_ptr += sizeof(float) * 4;
571 }
572 if (changed) {
574 }
575 return;
576 }
577 default:
578 shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type);
579 break;
580 }
581 }
582
583 /* Debug checks. */
585 copy_size <= uniform.size_in_bytes,
586 "Size of provided uniform data is greater than size specified in Shader interface\n");
587
588 /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified
589 * local uniform data. */
590 bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0);
591 if (data_changed) {
593 memcpy((void *)dest_ptr, (void *)data, copy_size);
594 }
595}
596
597void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
598{
599 BLI_assert(this);
600 if (!this->is_valid()) {
601 return;
602 }
603
604 /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in
605 * Metal, as we cannot point a texture binding at a different slot. */
606 MTLShaderInterface *mtl_interface = this->get_interface();
607 if (location >= mtl_interface->get_total_uniforms() &&
608 location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures()))
609 {
611 "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform "
612 "location %d)",
613 location);
614 return;
615 }
616
617 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
619 "Uniform is not valid at location %d - Shader %s", location, this->name_get().c_str());
620 return;
621 }
622
623 /* Fetch more information about uniform from interface. */
624 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
625
626 /* Determine data location in uniform block. */
627 BLI_assert(push_constant_data_ != nullptr);
628 uint8_t *ptr = (uint8_t *)push_constant_data_;
629 ptr += uniform.byte_offset;
630
632 const char *data_to_copy = (char *)data;
633 uint data_size_to_copy = sizeof(int) * comp_len * array_size;
634
635 /* Special cases for small types support where storage is shader push constant buffer is smaller
636 * than the incoming data. */
637 ushort us;
638 uchar uc;
639 if (uniform.size_in_bytes == 1) {
640 /* Convert integer storage value down to uchar. */
641 data_size_to_copy = uniform.size_in_bytes;
642 uc = *data;
643 data_to_copy = (char *)&uc;
644 }
645 else if (uniform.size_in_bytes == 2) {
646 /* Convert integer storage value down to ushort. */
647 data_size_to_copy = uniform.size_in_bytes;
648 us = *data;
649 data_to_copy = (char *)&us;
650 }
651 else {
653 (mtl_get_data_type_alignment(uniform.type) % sizeof(int)) == 0,
654 "When uniform inputs are provided as integers, the underlying type must adhere "
655 "to alignment per-component. If this test fails, the input data cannot be directly copied "
656 "to the buffer. e.g. Array of small types uchar/bool/ushort etc; are currently not "
657 "handled.");
658 }
659
660 /* Copy data into local block. Only flag UBO as modified if data is different
661 * This can avoid re-binding of unmodified local uniform data, reducing
662 * the total number of copy operations needed and data transfers between
663 * CPU and GPU. */
664 bool data_changed = (memcmp((void *)ptr, (void *)data_to_copy, data_size_to_copy) != 0);
665 if (data_changed) {
667 memcpy((void *)ptr, (void *)data_to_copy, data_size_to_copy);
668 }
669}
670
672{
673 return push_constant_modified_;
674}
675
677{
678 push_constant_modified_ = is_dirty;
679}
680
681/* Attempts to pre-generate a PSO based on the parent shaders PSO
682 * (Render shaders only) */
684{
685 if (parent_shader_ != nullptr) {
687 MTLShader *parent_mtl = static_cast<MTLShader *>(parent_shader_);
688
689 /* Extract PSO descriptors from parent shader. */
692
693 parent_mtl->pso_cache_lock_.lock();
694 for (const auto &pso_entry : parent_mtl->pso_cache_.items()) {
695 const MTLRenderPipelineStateDescriptor &pso_descriptor = pso_entry.key;
696 const MTLRenderPipelineStateInstance *pso_inst = pso_entry.value;
697 descriptors.append(pso_descriptor);
698 prim_classes.append(pso_inst->prim_type);
699 }
700 parent_mtl->pso_cache_lock_.unlock();
701
702 /* Warm shader cache with applied limit.
703 * If limit is <= 0, compile all PSO permutations. */
704 limit = (limit > 0) ? limit : descriptors.size();
705 for (int i : IndexRange(min_ii(descriptors.size(), limit))) {
706 const MTLRenderPipelineStateDescriptor &pso_descriptor = descriptors[i];
707 const MTLPrimitiveTopologyClass &prim_class = prim_classes[i];
708 bake_pipeline_state(ctx, prim_class, pso_descriptor);
709 }
710 }
711}
712
714
715/* -------------------------------------------------------------------- */
718
719void MTLShader::set_vertex_function_name(NSString *vert_function_name)
720{
721 vertex_function_name_ = vert_function_name;
722}
723
724void MTLShader::set_fragment_function_name(NSString *frag_function_name)
725{
726 fragment_function_name_ = frag_function_name;
727}
728
729void MTLShader::set_compute_function_name(NSString *compute_function_name)
730{
731 compute_function_name_ = compute_function_name;
732}
733
734void MTLShader::shader_source_from_msl(NSString *input_vertex_source,
735 NSString *input_fragment_source)
736{
737 BLI_assert(shd_builder_ != nullptr);
738 shd_builder_->msl_source_vert_ = input_vertex_source;
739 shd_builder_->msl_source_frag_ = input_fragment_source;
740 shd_builder_->source_from_msl_ = true;
741}
742
743void MTLShader::shader_compute_source_from_msl(NSString *input_compute_source)
744{
745 BLI_assert(shd_builder_ != nullptr);
746 shd_builder_->msl_source_compute_ = input_compute_source;
747 shd_builder_->source_from_msl_ = true;
748}
749
751{
752 /* Assign gpu::Shader super-class interface. */
753 BLI_assert(Shader::interface == nullptr);
755}
756
758
759/* -------------------------------------------------------------------- */
763
768 MTLFunctionConstantValues *values,
769 const shader::SpecializationConstants &shader_constants,
770 const SpecializationStateDescriptor &specialization_descriptor)
771{
772 for (auto i : shader_constants.types.index_range()) {
773 const shader::SpecializationConstant::Value &value = specialization_descriptor.values[i];
774
776 switch (shader_constants.types[i]) {
777 case Type::int_t:
778 [values setConstantValue:&value.i type:MTLDataTypeInt atIndex:index];
779 break;
780 case Type::uint_t:
781 [values setConstantValue:&value.u type:MTLDataTypeUInt atIndex:index];
782 break;
783 case Type::bool_t:
784 [values setConstantValue:&value.u type:MTLDataTypeBool atIndex:index];
785 break;
786 case Type::float_t:
787 [values setConstantValue:&value.f type:MTLDataTypeFloat atIndex:index];
788 break;
789 default:
790 BLI_assert_msg(false, "Unsupported custom constant type.");
791 break;
792 }
793 }
794}
795
796
797/* -------------------------------------------------------------------- */
800
814 MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
815{
817 /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should
818 * be thread-safe due to organization of high-level renderer. If there are any issues, then
819 * access can be guarded as appropriate. */
820 BLI_assert(this->is_valid());
821
822 /* NOTE(Metal): Vertex input assembly description will have been populated externally
823 * via #MTLBatch or #MTLImmediate during binding or draw. */
824
825 /* Resolve Context Frame-buffer state. */
826 MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer();
827
828 /* Update global pipeline descriptor. */
829 MTLStateManager *state_manager = static_cast<MTLStateManager *>(
831 MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor();
832
833 pipeline_descriptor.num_color_attachments = 0;
834 for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) {
835 MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment);
836
837 if (color_attachment.used) {
838 /* If SRGB is disabled and format is SRGB, use color data directly with no conversions
839 * between linear and SRGB. */
840 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(
841 color_attachment.texture->format_get());
842 if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) {
843 mtl_format = MTLPixelFormatRGBA8Unorm;
844 }
845 pipeline_descriptor.color_attachment_format[attachment] = mtl_format;
846 }
847 else {
848 pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid;
849 }
850
851 pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0;
852 }
853 MTLAttachment depth_attachment = framebuffer->get_depth_attachment();
854 MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment();
855 pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ?
857 depth_attachment.texture->format_get()) :
858 MTLPixelFormatInvalid;
859 pipeline_descriptor.stencil_attachment_format =
860 (stencil_attachment.used) ?
861 gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) :
862 MTLPixelFormatInvalid;
863
864 /* Resolve Context Pipeline State (required by PSO). */
865 pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask;
866 pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled;
867 pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op;
868 pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op;
873 pipeline_descriptor.point_size = ctx->pipeline_state.point_size;
874
875 /* Resolve clipping plane enablement. */
876 pipeline_descriptor.clipping_plane_enable_mask = 0;
877 for (const int plane : IndexRange(6)) {
878 pipeline_descriptor.clipping_plane_enable_mask =
879 pipeline_descriptor.clipping_plane_enable_mask |
880 ((ctx->pipeline_state.clip_distance_enabled[plane]) ? (1 << plane) : 0);
881 }
882
883 /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */
884 bool requires_specific_topology_class = uses_gpu_layer || uses_gpu_viewport_index ||
885 prim_type == MTLPrimitiveTopologyClassPoint;
886 pipeline_descriptor.vertex_descriptor.prim_topology_class =
887 (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
888
889 /* Specialization configuration. */
890 pipeline_descriptor.specialization_state = {ctx->constants_state.values};
891
892 /* Bake pipeline state using global descriptor. */
893 return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
894}
895
896/* Variant which bakes a pipeline state based on an existing MTLRenderPipelineStateDescriptor.
897 * This function should be callable from a secondary compilation thread. */
899 MTLContext *ctx,
900 MTLPrimitiveTopologyClass prim_type,
901 const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
902{
903 /* Fetch shader interface. */
904 MTLShaderInterface *mtl_interface = this->get_interface();
905 BLI_assert(mtl_interface);
906 BLI_assert(this->is_valid());
907
908 /* Check if current PSO exists in the cache. */
909 pso_cache_lock_.lock();
910 MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor);
911 MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
912 pso_cache_lock_.unlock();
913
914 if (pipeline_state != nullptr) {
915 return pipeline_state;
916 }
917
918 /* TODO: When fetching a specialized variant of a shader, if this does not yet exist, verify
919 * whether the base unspecialized variant exists:
920 * - If unspecialized version exists: Compile specialized PSO asynchronously, returning base PSO
921 * and flagging state of specialization in cache as being built.
922 * - If unspecialized does NOT exist, build specialized version straight away, as we pay the
923 * cost of compilation in both cases regardless. */
924
925 /* Generate new Render Pipeline State Object (PSO). */
926 @autoreleasepool {
927 /* Prepare Render Pipeline Descriptor. */
928
929 /* Setup function specialization constants, used to modify and optimize
930 * generated code based on current render pipeline configuration. */
931 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
932
933 /* Custom function constant values: */
935 values, *this->constants, pipeline_descriptor.specialization_state);
936
937 /* Prepare Vertex descriptor based on current pipeline vertex binding state. */
938 MTLRenderPipelineDescriptor *desc = pso_descriptor_;
939 [desc reset];
940 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
941
942 /* Offset the bind index for Uniform buffers such that they begin after the VBO
943 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
944 * specialization constant, customized per unique pipeline state permutation.
945 *
946 * NOTE: For binding point compaction, we could use the number of VBOs present
947 * in the current PSO configuration `pipeline_descriptors.vertex_descriptor.num_vert_buffers`).
948 * However, it is more efficient to simply offset the uniform buffer base index to the
949 * maximal number of VBO bind-points, as then UBO bind-points for similar draw calls
950 * will align and avoid the requirement for additional binding. */
951 int MTL_uniform_buffer_base_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers + 1;
952
953 /* Null buffer index is used if an attribute is not found in the
954 * bound VBOs #VertexFormat. */
955 int null_buffer_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers;
956 bool using_null_buffer = false;
957
958 {
959 for (const uint i :
960 IndexRange(pipeline_descriptor.vertex_descriptor.max_attribute_value + 1))
961 {
962
963 /* Metal back-end attribute descriptor state. */
964 const MTLVertexAttributeDescriptorPSO &attribute_desc =
965 pipeline_descriptor.vertex_descriptor.attributes[i];
966
967 /* Flag format conversion */
968 /* In some cases, Metal cannot implicitly convert between data types.
969 * In these instances, the fetch mode #GPUVertFetchMode as provided in the vertex format
970 * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15.
971 *
972 * It is then the responsibility of the vertex shader to perform any necessary type
973 * casting.
974 *
975 * See `mtl_shader.hh` for more information. Relevant Metal API documentation:
976 * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
977 */
978 if (attribute_desc.format == MTLVertexFormatInvalid) {
979#if 0 /* Disable warning as it is too verbose and is supported. */
981 "MTLShader: baking pipeline state for '%s'- skipping input attribute at "
982 "index '%d' but none was specified in the current vertex state",
983 mtl_interface->get_name(),
984 i);
985#endif
986 /* Write out null conversion constant if attribute unused. */
987 int MTL_attribute_conversion_mode = 0;
988 [values setConstantValue:&MTL_attribute_conversion_mode
989 type:MTLDataTypeInt
990 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
991 continue;
992 }
993
994 int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode;
995 [values setConstantValue:&MTL_attribute_conversion_mode
996 type:MTLDataTypeInt
997 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
998 if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT) {
1000 "TODO(Metal): Shader %s needs to support internal format conversion\n",
1001 mtl_interface->get_name());
1002 }
1003
1004 /* Copy metal back-end attribute descriptor state into PSO descriptor.
1005 * NOTE: need to copy each element due to direct assignment restrictions.
1006 * Also note */
1007 MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i];
1008
1009 mtl_attribute.format = attribute_desc.format;
1010 mtl_attribute.offset = attribute_desc.offset;
1011 mtl_attribute.bufferIndex = attribute_desc.buffer_index;
1012 }
1013
1014 for (const uint i : IndexRange(pipeline_descriptor.vertex_descriptor.num_vert_buffers)) {
1015 /* Metal back-end state buffer layout. */
1016 const MTLVertexBufferLayoutDescriptorPSO &buf_layout =
1017 pipeline_descriptor.vertex_descriptor.buffer_layouts[i];
1018 /* Copy metal back-end buffer layout state into PSO descriptor.
1019 * NOTE: need to copy each element due to copying from internal
1020 * back-end descriptor to Metal API descriptor. */
1021 MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i];
1022
1023 mtl_buf_layout.stepFunction = buf_layout.step_function;
1024 mtl_buf_layout.stepRate = buf_layout.step_rate;
1025 mtl_buf_layout.stride = buf_layout.stride;
1026 }
1027
1028 /* Mark empty attribute conversion. */
1029 for (int i = pipeline_descriptor.vertex_descriptor.max_attribute_value + 1;
1031 i++)
1032 {
1033 int MTL_attribute_conversion_mode = 0;
1034 [values setConstantValue:&MTL_attribute_conversion_mode
1035 type:MTLDataTypeInt
1036 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1037 }
1038
1039 /* DEBUG: Missing/empty attributes. */
1040 /* Attributes are normally mapped as part of the state setting based on the used
1041 * #GPUVertFormat, however, if attributes have not been set, we can sort them out here. */
1042 for (const uint i : IndexRange(mtl_interface->get_total_attributes())) {
1043 const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i);
1044 MTLVertexAttributeDescriptor *current_attribute =
1045 desc.vertexDescriptor.attributes[attribute.location];
1046
1047 if (current_attribute.format == MTLVertexFormatInvalid) {
1048#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1049 printf("-> Filling in unbound attribute '%s' for shader PSO '%s' with location: %u\n",
1050 mtl_interface->get_name_at_offset(attribute.name_offset),
1051 mtl_interface->get_name(),
1052 attribute.location);
1053#endif
1054 current_attribute.format = attribute.format;
1055 current_attribute.offset = 0;
1056 current_attribute.bufferIndex = null_buffer_index;
1057
1058 /* Add Null vert buffer binding for invalid attributes. */
1059 if (!using_null_buffer) {
1060 MTLVertexBufferLayoutDescriptor *null_buf_layout =
1061 desc.vertexDescriptor.layouts[null_buffer_index];
1062
1063 /* Use constant step function such that null buffer can
1064 * contain just a singular dummy attribute. */
1065 null_buf_layout.stepFunction = MTLVertexStepFunctionConstant;
1066 null_buf_layout.stepRate = 0;
1067 null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size);
1068
1069 /* If we are using the maximum number of vertex buffers, or tight binding indices,
1070 * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer
1071 * index. */
1072 if (null_buffer_index >= MTL_uniform_buffer_base_index) {
1073 MTL_uniform_buffer_base_index = null_buffer_index + 1;
1074 }
1075 using_null_buffer = true;
1076#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1077 MTL_LOG_INFO("Setting up buffer binding for null attribute with buffer index %d",
1078 null_buffer_index);
1079#endif
1080 }
1081 }
1082 }
1083
1084 /* Primitive Topology. */
1085 desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class;
1086 }
1087
1088 /* Update constant value for 'MTL_uniform_buffer_base_index'. */
1089 [values setConstantValue:&MTL_uniform_buffer_base_index
1090 type:MTLDataTypeInt
1091 withName:@"MTL_uniform_buffer_base_index"];
1092
1093 /* Storage buffer bind index.
1094 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1095 * and an additional space for the push constant block.
1096 * If the shader does not have any uniform blocks, then we can place directly after the push
1097 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1098 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1099 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1100 mtl_interface->get_total_uniform_blocks() :
1101 0);
1102 [values setConstantValue:&MTL_storage_buffer_base_index
1103 type:MTLDataTypeInt
1104 withName:@"MTL_storage_buffer_base_index"];
1105
1106 /* Clipping planes. */
1107 int MTL_clip_distances_enabled = (pipeline_descriptor.clipping_plane_enable_mask > 0) ? 1 : 0;
1108
1109 /* Only define specialization constant if planes are required.
1110 * We guard clip_planes usage on this flag. */
1111 [values setConstantValue:&MTL_clip_distances_enabled
1112 type:MTLDataTypeInt
1113 withName:@"MTL_clip_distances_enabled"];
1114
1115 if (MTL_clip_distances_enabled > 0) {
1116 /* Assign individual enablement flags. Only define a flag function constant
1117 * if it is used. */
1118 for (const int plane : IndexRange(6)) {
1119 int plane_enabled = ctx->pipeline_state.clip_distance_enabled[plane] ? 1 : 0;
1120 if (plane_enabled) {
1121 [values
1122 setConstantValue:&plane_enabled
1123 type:MTLDataTypeInt
1124 withName:[NSString stringWithFormat:@"MTL_clip_distance_enabled%d", plane]];
1125 }
1126 }
1127 }
1128
1129 /* gl_PointSize constant. */
1130 bool null_pointsize = true;
1131 float MTL_pointsize = pipeline_descriptor.point_size;
1132 if (pipeline_descriptor.vertex_descriptor.prim_topology_class ==
1133 MTLPrimitiveTopologyClassPoint)
1134 {
1135 /* `if pointsize is > 0.0`, PROGRAM_POINT_SIZE is enabled, and `gl_PointSize` shader keyword
1136 * overrides the value. Otherwise, if < 0.0, use global constant point size. */
1137 if (MTL_pointsize < 0.0) {
1138 MTL_pointsize = fabsf(MTL_pointsize);
1139 [values setConstantValue:&MTL_pointsize
1140 type:MTLDataTypeFloat
1141 withName:@"MTL_global_pointsize"];
1142 null_pointsize = false;
1143 }
1144 }
1145
1146 if (null_pointsize) {
1147 MTL_pointsize = 0.0f;
1148 [values setConstantValue:&MTL_pointsize
1149 type:MTLDataTypeFloat
1150 withName:@"MTL_global_pointsize"];
1151 }
1152
1153 /* Compile functions */
1154 NSError *error = nullptr;
1155 desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_
1156 constantValues:values
1157 error:&error];
1158 if (error) {
1159 bool has_error = (
1160 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1161 NSNotFound);
1162
1163 const char *errors_c_str = [[error localizedDescription] UTF8String];
1164 const StringRefNull source = shd_builder_->glsl_fragment_source_.c_str();
1165
1166 MTLLogParser parser;
1167 print_log({source}, errors_c_str, "VertShader", has_error, &parser);
1168
1169 /* Only exit out if genuine error and not warning */
1170 if (has_error) {
1171 return nullptr;
1172 }
1173 }
1174
1175 desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_
1176 constantValues:values
1177 error:&error];
1178 if (error) {
1179 bool has_error = (
1180 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1181 NSNotFound);
1182
1183 const char *errors_c_str = [[error localizedDescription] UTF8String];
1184 const StringRefNull source = shd_builder_->glsl_fragment_source_;
1185
1186 MTLLogParser parser;
1187 print_log({source}, errors_c_str, "FragShader", has_error, &parser);
1188
1189 /* Only exit out if genuine error and not warning */
1190 if (has_error) {
1191 return nullptr;
1192 }
1193 }
1194
1195 /* Setup pixel format state */
1196 for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT;
1197 color_attachment++)
1198 {
1199 /* Fetch color attachment pixel format in back-end pipeline state. */
1200 MTLPixelFormat pixel_format = pipeline_descriptor.color_attachment_format[color_attachment];
1201 /* Populate MTL API PSO attachment descriptor. */
1202 MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
1203 desc.colorAttachments[color_attachment];
1204
1205 col_attachment.pixelFormat = pixel_format;
1206 if (pixel_format != MTLPixelFormatInvalid) {
1207 bool format_supports_blending = mtl_format_supports_blending(pixel_format);
1208
1209 col_attachment.writeMask = pipeline_descriptor.color_write_mask;
1210 col_attachment.blendingEnabled = pipeline_descriptor.blending_enabled &&
1211 format_supports_blending;
1212 if (format_supports_blending && pipeline_descriptor.blending_enabled) {
1213 col_attachment.alphaBlendOperation = pipeline_descriptor.alpha_blend_op;
1214 col_attachment.rgbBlendOperation = pipeline_descriptor.rgb_blend_op;
1215 col_attachment.destinationAlphaBlendFactor = pipeline_descriptor.dest_alpha_blend_factor;
1216 col_attachment.destinationRGBBlendFactor = pipeline_descriptor.dest_rgb_blend_factor;
1217 col_attachment.sourceAlphaBlendFactor = pipeline_descriptor.src_alpha_blend_factor;
1218 col_attachment.sourceRGBBlendFactor = pipeline_descriptor.src_rgb_blend_factor;
1219 }
1220 else {
1221 if (pipeline_descriptor.blending_enabled && !format_supports_blending) {
1223 "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
1224 "blending\n",
1225 *((int *)&pixel_format));
1226 }
1227 }
1228 }
1229 }
1230 desc.depthAttachmentPixelFormat = pipeline_descriptor.depth_attachment_format;
1231 desc.stencilAttachmentPixelFormat = pipeline_descriptor.stencil_attachment_format;
1232
1233 /* Bind-point range validation.
1234 * We need to ensure that the PSO will have valid bind-point ranges, or is using the
1235 * appropriate bindless fallback path if any bind limits are exceeded. */
1236#ifdef NDEBUG
1237 /* Ensure Buffer bindings are within range. */
1238 BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
1240 "UBO and SSBO bindings exceed the fragment bind table limit.");
1241
1242 /* Argument buffer. */
1243 if (mtl_interface->uses_argument_buffer_for_samplers()) {
1245 "Argument buffer binding exceeds the fragment bind table limit.");
1246 }
1247#endif
1248
1249 /* Compile PSO */
1250 MTLAutoreleasedRenderPipelineReflection reflection_data;
1251 id<MTLRenderPipelineState> pso = [ctx->device
1252 newRenderPipelineStateWithDescriptor:desc
1253 options:MTLPipelineOptionBufferTypeInfo
1254 reflection:&reflection_data
1255 error:&error];
1256 if (error) {
1257 NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error);
1258 BLI_assert(false);
1259 return nullptr;
1260 }
1261 else if (!pso) {
1262 NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name);
1263 BLI_assert(false);
1264 return nullptr;
1265 }
1266 else {
1267#if 0
1268 NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx);
1269#endif
1270 }
1271
1272 /* Prepare pipeline state instance. */
1274 pso_inst->vert = desc.vertexFunction;
1275 pso_inst->frag = desc.fragmentFunction;
1276 pso_inst->pso = pso;
1277 pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1278 pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
1279 pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
1280 pso_inst->prim_type = prim_type;
1281
1282 pso_inst->reflection_data_available = (reflection_data != nil);
1283 if (reflection_data != nil) {
1284
1285 /* Extract shader reflection data for buffer bindings.
1286 * This reflection data is used to contrast the binding information
1287 * we know about in the interface against the bindings in the finalized
1288 * PSO. This accounts for bindings which have been stripped out during
1289 * optimization, and allows us to both avoid over-binding and also
1290 * allows us to verify size-correctness for bindings, to ensure
1291 * that buffers bound are not smaller than the size of expected data. */
1292 NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments];
1293
1294 pso_inst->buffer_bindings_reflection_data_vert.clear();
1295 int buffer_binding_max_ind = 0;
1296
1297 for (int i = 0; i < [vert_args count]; i++) {
1298 MTLArgument *arg = [vert_args objectAtIndex:i];
1299 if ([arg type] == MTLArgumentTypeBuffer) {
1300 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1301 if (buf_index >= 0) {
1302 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1303 }
1304 }
1305 }
1306 pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1);
1307 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1308 pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false};
1309 }
1310
1311 for (int i = 0; i < [vert_args count]; i++) {
1312 MTLArgument *arg = [vert_args objectAtIndex:i];
1313 if ([arg type] == MTLArgumentTypeBuffer) {
1314 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1315
1316 if (buf_index >= 0) {
1317 pso_inst->buffer_bindings_reflection_data_vert[buf_index] = {
1318 (uint32_t)([arg index]),
1319 (uint32_t)([arg bufferDataSize]),
1320 (uint32_t)([arg bufferAlignment]),
1321 ([arg isActive] == YES) ? true : false};
1322 }
1323 }
1324 }
1325
1326 NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments];
1327
1328 pso_inst->buffer_bindings_reflection_data_frag.clear();
1329 buffer_binding_max_ind = 0;
1330
1331 for (int i = 0; i < [frag_args count]; i++) {
1332 MTLArgument *arg = [frag_args objectAtIndex:i];
1333 if ([arg type] == MTLArgumentTypeBuffer) {
1334 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1335 if (buf_index >= 0) {
1336 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1337 }
1338 }
1339 }
1340 pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1);
1341 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1342 pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false};
1343 }
1344
1345 for (int i = 0; i < [frag_args count]; i++) {
1346 MTLArgument *arg = [frag_args objectAtIndex:i];
1347 if ([arg type] == MTLArgumentTypeBuffer) {
1348 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1349 shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]);
1350 if (buf_index >= 0) {
1351 pso_inst->buffer_bindings_reflection_data_frag[buf_index] = {
1352 (uint32_t)([arg index]),
1353 (uint32_t)([arg bufferDataSize]),
1354 (uint32_t)([arg bufferAlignment]),
1355 ([arg isActive] == YES) ? true : false};
1356 }
1357 }
1358 }
1359 }
1360
1361 /* Insert into pso cache. */
1362 pso_cache_lock_.lock();
1363 pso_inst->shader_pso_index = pso_cache_.size();
1364 pso_cache_.add(pipeline_descriptor, pso_inst);
1365 pso_cache_lock_.unlock();
1367 "PSO CACHE: Stored new variant in PSO cache for shader '%s' Hash: '%llu'\n",
1368 this->name,
1369 pipeline_descriptor.hash());
1370 return pso_inst;
1371 }
1372}
1373
1375 MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
1376{
1377 /* NOTE(Metal): Bakes and caches a PSO for compute. */
1378 BLI_assert(this);
1379 MTLShaderInterface *mtl_interface = this->get_interface();
1380 BLI_assert(mtl_interface);
1381 BLI_assert(this->is_valid());
1382 BLI_assert(shader_library_compute_ != nil);
1383
1384 /* Check if current PSO exists in the cache. */
1385 pso_cache_lock_.lock();
1386 MTLComputePipelineStateInstance *const *pso_lookup = compute_pso_cache_.lookup_ptr(
1387 compute_pipeline_descriptor);
1388 MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
1389 pso_cache_lock_.unlock();
1390
1391 if (pipeline_state != nullptr) {
1392 /* Return cached PSO state. */
1393 BLI_assert(pipeline_state->pso != nil);
1394 return pipeline_state;
1395 }
1396 else {
1397 /* Prepare Compute Pipeline Descriptor. */
1398
1399 /* Setup function specialization constants, used to modify and optimize
1400 * generated code based on current render pipeline configuration. */
1401 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
1402
1403 /* TODO: Compile specialized shader variants asynchronously. */
1404
1405 /* Custom function constant values: */
1407 values, *this->constants, compute_pipeline_descriptor.specialization_state);
1408
1409 /* Offset the bind index for Uniform buffers such that they begin after the VBO
1410 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
1411 * specialization constant, customized per unique pipeline state permutation.
1412 *
1413 * For Compute shaders, this offset is always zero, but this needs setting as
1414 * it is expected as part of the common Metal shader header. */
1415 int MTL_uniform_buffer_base_index = 0;
1416 [values setConstantValue:&MTL_uniform_buffer_base_index
1417 type:MTLDataTypeInt
1418 withName:@"MTL_uniform_buffer_base_index"];
1419
1420 /* Storage buffer bind index.
1421 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1422 * and an additional space for the push constant block.
1423 * If the shader does not have any uniform blocks, then we can place directly after the push
1424 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1425 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1426 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1427 mtl_interface->get_total_uniform_blocks() :
1428 0);
1429
1430 [values setConstantValue:&MTL_storage_buffer_base_index
1431 type:MTLDataTypeInt
1432 withName:@"MTL_storage_buffer_base_index"];
1433
1434 /* Compile compute function. */
1435 NSError *error = nullptr;
1436 id<MTLFunction> compute_function = [shader_library_compute_
1437 newFunctionWithName:compute_function_name_
1438 constantValues:values
1439 error:&error];
1440 compute_function.label = [NSString stringWithUTF8String:this->name];
1441
1442 if (error) {
1443 NSLog(@"Compile Error - Metal Shader compute function, error %@", error);
1444
1445 /* Only exit out if genuine error and not warning */
1446 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1447 NSNotFound)
1448 {
1449 BLI_assert(false);
1450 return nullptr;
1451 }
1452 }
1453
1454 /* Compile PSO. */
1455 MTLComputePipelineDescriptor *desc = [[MTLComputePipelineDescriptor alloc] init];
1456 desc.label = [NSString stringWithUTF8String:this->name];
1457 desc.computeFunction = compute_function;
1458
1467 const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
1468 if (ELEM(capabilities.gpu, APPLE_GPU_M1, APPLE_GPU_M2)) {
1469 if (maxTotalThreadsPerThreadgroup_Tuning_ > 0) {
1470 desc.maxTotalThreadsPerThreadgroup = this->maxTotalThreadsPerThreadgroup_Tuning_;
1471 MTL_LOG_INFO("Using custom parameter for shader %s value %u\n",
1472 this->name,
1473 maxTotalThreadsPerThreadgroup_Tuning_);
1474 }
1475 }
1476
1477 id<MTLComputePipelineState> pso = [ctx->device
1478 newComputePipelineStateWithDescriptor:desc
1479 options:MTLPipelineOptionNone
1480 reflection:nullptr
1481 error:&error];
1482
1483 /* If PSO has compiled but max theoretical threads-per-threadgroup is lower than required
1484 * dispatch size, recompile with increased limit. NOTE: This will result in a performance drop,
1485 * ideally the source shader should be modified to reduce local register pressure, or, local
1486 * work-group size should be reduced.
1487 * Similarly, the custom tuning parameter "mtl_max_total_threads_per_threadgroup" can be
1488 * specified to a sufficiently large value to avoid this. */
1489 if (pso) {
1490 uint num_required_threads_per_threadgroup = compute_pso_common_state_.threadgroup_x_len *
1491 compute_pso_common_state_.threadgroup_y_len *
1492 compute_pso_common_state_.threadgroup_z_len;
1493 if (pso.maxTotalThreadsPerThreadgroup < num_required_threads_per_threadgroup) {
1495 "Shader '%s' requires %u threads per threadgroup, but PSO limit is: %lu. Recompiling "
1496 "with increased limit on descriptor.\n",
1497 this->name,
1498 num_required_threads_per_threadgroup,
1499 (unsigned long)pso.maxTotalThreadsPerThreadgroup);
1500 [pso release];
1501 pso = nil;
1502 desc.maxTotalThreadsPerThreadgroup = 1024;
1503 pso = [ctx->device newComputePipelineStateWithDescriptor:desc
1504 options:MTLPipelineOptionNone
1505 reflection:nullptr
1506 error:&error];
1507 }
1508 }
1509
1510 if (error) {
1511 NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
1512 BLI_assert(false);
1513 return nullptr;
1514 }
1515 else if (!pso) {
1516 NSLog(@"Failed to create PSO for compute shader: %s, but no error was provided!\n",
1517 this->name);
1518 BLI_assert(false);
1519 return nullptr;
1520 }
1521 else {
1522#if 0
1523 NSLog(@"Successfully compiled compute PSO for shader: %s (Metal Context: %p)\n",
1524 this->name,
1525 ctx);
1526#endif
1527 }
1528
1529 [desc release];
1530
1531 /* Gather reflection data and create MTLComputePipelineStateInstance to store results. */
1533 compute_pso_instance->compute = compute_function;
1534 compute_pso_instance->pso = pso;
1535 compute_pso_instance->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1536 compute_pso_instance->base_storage_buffer_index = MTL_storage_buffer_base_index;
1537 pso_cache_lock_.lock();
1538 compute_pso_instance->shader_pso_index = compute_pso_cache_.size();
1539 compute_pso_cache_.add(compute_pipeline_descriptor, compute_pso_instance);
1540 pso_cache_lock_.unlock();
1541
1542 return compute_pso_instance;
1543 }
1544}
1545
1546
1547/* -------------------------------------------------------------------- */
1550
1555
1557{
1558 MTLShader *shader = static_cast<MTLShader *>(compile(info, true));
1559
1560 if (shader) {
1561 /* Generate and cache any render PSOs if possible (typically materials only)
1562 * (Finalize() will already bake a Compute PSO if possible) */
1563 shader->warm_cache(-1);
1564 }
1565
1566 return shader;
1567}
1568
1570{
1571 MTLShader *shader = static_cast<MTLShader *>(unwrap(specialization.shader));
1572
1573 BLI_assert_msg(shader->is_valid(),
1574 "Shader must be finalized before precompiling specializations");
1575
1576 if (!shader->has_compute_shader_lib()) {
1577 /* Currently only support Compute */
1578 return;
1579 }
1580
1581 /* Create descriptor using these specialization constants. */
1582 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(specialization.constants.values);
1583
1584 MTLContext *metal_context = static_cast<MTLContext *>(Context::get());
1585 shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
1586}
1587
1589
1590} // namespace blender::gpu
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int min_ii(int a, int b)
MINLINE int max_ii(int a, int b)
unsigned char uchar
unsigned int uint
unsigned short ushort
Platform independent time functions.
#define ELEM(...)
GHOST C-API function and type declarations.
int GPU_max_parallel_compilations()
static constexpr int GPU_VERT_ATTR_MAX_LEN
@ GPU_FETCH_INT_TO_FLOAT_UNIT
BMesh const char void * data
return true
SIMD_FORCE_INLINE bool isActive() const
void reset()
clear internal cached data and reset random seed
IndexRange index_range() const
constexpr int64_t size() const
Definition BLI_span.hh:493
constexpr const char * c_str() const
int64_t size() const
void append(const T &value)
static Context * get()
static MTLCapabilities & get_capabilities()
MTLFrameBuffer * get_current_framebuffer()
shader::SpecializationConstants constants_state
static MTLContext * get()
MTLContextGlobalShaderPipelineState pipeline_state
void specialization_constants_set(const shader::SpecializationConstants *constants_state)
MTLAttachment get_color_attachment(uint slot)
Shader * compile_shader(const shader::ShaderCreateInfo &info) override
void specialize_shader(ShaderSpecialization &specialization) override
const MTLShaderBufferBlock & get_push_constant_block() const
const char * get_name_at_offset(uint32_t offset) const
int get_argument_buffer_bind_index(ShaderStage stage) const
const MTLShaderInputAttribute & get_attribute(uint index) const
const MTLShaderUniform & get_uniform(uint index) const
void set_fragment_function_name(NSString *fragment_function_name)
void warm_cache(int limit) override
void geometry_shader_from_glsl(MutableSpan< StringRefNull > sources) override
MTLRenderPipelineStateInstance * bake_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type, const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
MTLComputePipelineStateInstance * bake_compute_pipeline_state(MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
void shader_compute_source_from_msl(NSString *input_compute_source)
void uniform_int(int location, int comp_len, int array_size, const int *data) override
void bind(const shader::SpecializationConstants *constants_state) override
bool finalize(const shader::ShaderCreateInfo *info=nullptr) override
void fragment_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void compute_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void set_vertex_function_name(NSString *vetex_function_name)
void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source)
void unbind() override
MTLShaderInterface * get_interface()
void uniform_float(int location, int comp_len, int array_size, const float *data) override
void vertex_shader_from_glsl(MutableSpan< StringRefNull > sources) override
void init() override
MTLRenderPipelineStateInstance * bake_current_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
MTLShader(MTLContext *ctx, const char *name)
Definition mtl_shader.mm:72
void init(const shader::ShaderCreateInfo &, bool is_batch_compilation) override
void set_interface(MTLShaderInterface *interface)
void push_constant_bindstate_mark_dirty(bool is_dirty)
void set_compute_function_name(NSString *compute_function_name)
MTLRenderPipelineStateDescriptor & get_pipeline_descriptor()
Definition mtl_state.hh:59
Shader * compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
ShaderCompiler(uint32_t threads_count=1, GPUWorker::ContextType context_type=GPUWorker::ContextType::PerThread, bool support_specializations=false)
std::unique_ptr< const shader::SpecializationConstants > constants
ShaderInterface * interface
StringRefNull name_get() const
void print_log(Span< StringRefNull > sources, const char *log, const char *stage, bool error, GPULogParser *parser)
Shader(const char *name)
Definition gpu_shader.cc:56
eGPUTextureFormat format_get() const
CCL_NAMESPACE_BEGIN struct Options options
#define fabsf(x)
#define str(s)
#define GPU_FB_MAX_COLOR_ATTACHMENT
#define printf(...)
float length(VecOp< float, D >) RET
#define SOURCES_INDEX_VERSION
int count
void * MEM_callocN(size_t len, const char *str)
Definition mallocn.cc:118
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
static void error(const char *str)
#define MTL_MAX_BUFFER_BINDINGS
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:49
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:34
#define shader_debug_printf(...)
Definition mtl_shader.hh:49
#define MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID
Definition mtl_shader.hh:53
const char datatoc_mtl_shader_common_msl[]
uint mtl_get_data_type_alignment(eMTLDataType type)
const char * to_string(ShaderStage stage)
Definition mtl_shader.mm:52
static Context * unwrap(GPUContext *ctx)
MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
static void populate_specialization_constant_values(MTLFunctionConstantValues *values, const shader::SpecializationConstants &shader_constants, const SpecializationStateDescriptor &specialization_descriptor)
bool mtl_format_supports_blending(MTLPixelFormat format)
blender::gpu::shader::SpecializationConstants constants
MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT]
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_frag
Definition mtl_shader.hh:98
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_vert
Definition mtl_shader.hh:97
MTLVertexBufferLayoutDescriptorPSO buffer_layouts[GPU_BATCH_VBO_MAX_LEN+GPU_BATCH_INST_VBO_MAX_LEN]
MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]
Vector< shader::SpecializationConstant::Value > values
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
Vector< SpecializationConstant::Value, 8 > values
i
Definition text_draw.cc:230
PointerRNA * ptr
Definition wm_files.cc:4226