Blender V4.3
mtl_shader.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "BKE_global.hh"
10
11#include "DNA_userdef_types.h"
12
13#include "BLI_string.h"
14#include "BLI_time.h"
15
16#include <algorithm>
17#include <fstream>
18#include <iostream>
19#include <map>
20#include <mutex>
21#include <regex>
22#include <sstream>
23#include <string>
24
25#include <cstring>
26
27#include "GPU_platform.hh"
28#include "GPU_vertex_format.hh"
29
31#include "mtl_common.hh"
32#include "mtl_context.hh"
33#include "mtl_debug.hh"
35#include "mtl_shader.hh"
38#include "mtl_shader_log.hh"
39#include "mtl_texture.hh"
40#include "mtl_vertex_buffer.hh"
41
42#include "GHOST_C-api.h"
43
44extern const char datatoc_mtl_shader_common_msl[];
45
46using namespace blender;
47using namespace blender::gpu;
48using namespace blender::gpu::shader;
49
50namespace blender::gpu {
51
52const char *to_string(ShaderStage stage)
53{
54 switch (stage) {
56 return "Vertex Shader";
58 return "Fragment Shader";
60 return "Compute Shader";
62 break;
63 }
64 return "Unknown Shader Stage";
65}
66
67/* -------------------------------------------------------------------- */
70
71/* Create empty shader to be populated later. */
73{
74 context_ = ctx;
75
76 /* Create SHD builder to hold temporary resources until compilation is complete. */
77 shd_builder_ = new MTLShaderBuilder();
78
79#ifndef NDEBUG
80 /* Remove invalid symbols from shader name to ensure debug entry-point function name is valid. */
81 for (uint i : IndexRange(strlen(this->name))) {
82 char c = this->name[i];
83 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
84 }
85 else {
86 this->name[i] = '_';
87 }
88 }
89#endif
90}
91
92/* Create shader from MSL source. */
95 const char *name,
96 NSString *input_vertex_source,
97 NSString *input_fragment_source,
98 NSString *vert_function_name,
99 NSString *frag_function_name)
100 : MTLShader(ctx, name)
101{
102 BLI_assert([vert_function_name length]);
103 BLI_assert([frag_function_name length]);
104
105 this->set_vertex_function_name(vert_function_name);
106 this->set_fragment_function_name(frag_function_name);
107 this->shader_source_from_msl(input_vertex_source, input_fragment_source);
108 this->set_interface(interface);
109 this->finalize(nullptr);
110}
111
113{
114 if (this->is_valid()) {
115
116 /* Free uniform data block. */
117 if (push_constant_data_ != nullptr) {
118 MEM_freeN(push_constant_data_);
119 push_constant_data_ = nullptr;
120 }
121
122 /* Free Metal resources.
123 * This is done in the order of:
124 * 1. PipelineState objects
125 * 2. MTLFunctions
126 * 3. MTLLibraries
127 * So that each object releases it's references to the one following it. */
128 if (pso_descriptor_ != nil) {
129 [pso_descriptor_ release];
130 pso_descriptor_ = nil;
131 }
132
133 /* Free Pipeline Cache. */
134 pso_cache_lock_.lock();
135 for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) {
136 /* Free pipeline state object. */
137 if (pso_inst->pso) {
138 [pso_inst->pso release];
139 }
140 /* Free vertex function. */
141 if (pso_inst->vert) {
142 [pso_inst->vert release];
143 }
144 /* Free fragment function. */
145 if (pso_inst->frag) {
146 [pso_inst->frag release];
147 }
148 delete pso_inst;
149 }
150 pso_cache_.clear();
151
152 /* Free Compute pipeline cache. */
153 for (const MTLComputePipelineStateInstance *pso_inst : compute_pso_cache_.values()) {
154 /* Free pipeline state object. */
155 if (pso_inst->pso) {
156 [pso_inst->pso release];
157 }
158 /* Free compute function. */
159 if (pso_inst->compute) {
160 [pso_inst->compute release];
161 }
162 }
163 compute_pso_cache_.clear();
164 pso_cache_lock_.unlock();
165
166 /* Free shader libraries. */
167 if (shader_library_vert_ != nil) {
168 [shader_library_vert_ release];
169 shader_library_vert_ = nil;
170 }
171 if (shader_library_frag_ != nil) {
172 [shader_library_frag_ release];
173 shader_library_frag_ = nil;
174 }
175 if (shader_library_compute_ != nil) {
176 [shader_library_compute_ release];
177 shader_library_compute_ = nil;
178 }
179
180 /* NOTE(Metal): #ShaderInterface deletion is handled in the super destructor `~Shader()`. */
181 }
182 valid_ = false;
183
184 if (shd_builder_ != nullptr) {
185 delete shd_builder_;
186 shd_builder_ = nullptr;
187 }
188}
189
190void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation)
191{
192 async_compilation_ = is_batch_compilation;
193}
194
196
197/* -------------------------------------------------------------------- */
200
202{
203 /* Flag source as not being compiled from native MSL. */
204 BLI_assert(shd_builder_ != nullptr);
205 shd_builder_->source_from_msl_ = false;
206
207 /* Remove #version tag entry. */
208 sources[SOURCES_INDEX_VERSION] = "";
209
210 /* Consolidate GLSL vertex sources. */
211 std::stringstream ss;
212 for (int i = 0; i < sources.size(); i++) {
213 ss << sources[i] << std::endl;
214 }
215 shd_builder_->glsl_vertex_source_ = ss.str();
216}
217
219{
220 MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!");
221}
222
224{
225 /* Flag source as not being compiled from native MSL. */
226 BLI_assert(shd_builder_ != nullptr);
227 shd_builder_->source_from_msl_ = false;
228
229 /* Remove #version tag entry. */
230 sources[SOURCES_INDEX_VERSION] = "";
231
232 /* Consolidate GLSL fragment sources. */
233 std::stringstream ss;
234 int i;
235 for (i = 0; i < sources.size(); i++) {
236 /* Output preprocessor directive to improve shader log. */
238 if (name.is_empty()) {
239 ss << "#line 1 \"generated_code_" << i << "\"\n";
240 }
241 else {
242 ss << "#line 1 \"" << name << "\"\n";
243 }
244
245 ss << sources[i] << '\n';
246 }
247 ss << "#line 1 \"msl_wrapper_code\"\n";
248 shd_builder_->glsl_fragment_source_ = ss.str();
249}
250
252{
253 /* Flag source as not being compiled from native MSL. */
254 BLI_assert(shd_builder_ != nullptr);
255 shd_builder_->source_from_msl_ = false;
256
257 /* Remove #version tag entry. */
258 sources[SOURCES_INDEX_VERSION] = "";
259
260 /* Consolidate GLSL compute sources. */
261 std::stringstream ss;
262 for (int i = 0; i < sources.size(); i++) {
263 /* Output preprocessor directive to improve shader log. */
265 if (name.is_empty()) {
266 ss << "#line 1 \"generated_code_" << i << "\"\n";
267 }
268 else {
269 ss << "#line 1 \"" << name << "\"\n";
270 }
271 ss << sources[i] << std::endl;
272 }
273 shd_builder_->glsl_compute_source_ = ss.str();
274}
275
277{
278 /* Check if Shader has already been finalized. */
279 if (this->is_valid()) {
280 MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!", this, this->name_get());
281 }
282
283 /* Compute shaders. */
284 bool is_compute = false;
285 if (shd_builder_->glsl_compute_source_.size() > 0) {
286 BLI_assert_msg(info != nullptr, "Compute shaders must use CreateInfo.\n");
287 BLI_assert_msg(!shd_builder_->source_from_msl_, "Compute shaders must compile from GLSL.");
288 is_compute = true;
289 }
290
291 /* Perform GLSL to MSL source translation. */
292 BLI_assert(shd_builder_ != nullptr);
293 if (!shd_builder_->source_from_msl_) {
294 bool success = generate_msl_from_glsl(info);
295 if (!success) {
296 /* GLSL to MSL translation has failed, or is unsupported for this shader. */
297 valid_ = false;
298 BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n");
299
300 /* Create empty interface to allow shader to be silently used. */
301 MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get());
302 this->set_interface(mtl_interface);
303
304 /* Release temporary compilation resources. */
305 delete shd_builder_;
306 shd_builder_ = nullptr;
307 return false;
308 }
309 }
310
312 /* Tuning parameters for compute kernels. */
313 if (is_compute) {
314 int threadgroup_tuning_param = info->mtl_max_threads_per_threadgroup_;
315 if (threadgroup_tuning_param > 0) {
316 maxTotalThreadsPerThreadgroup_Tuning_ = threadgroup_tuning_param;
317 }
318 }
319
320 /* Ensure we have a valid shader interface. */
321 MTLShaderInterface *mtl_interface = this->get_interface();
322 BLI_assert(mtl_interface != nullptr);
323
324 /* Verify Context handle, fetch device and compile shader. */
325 BLI_assert(context_);
326 id<MTLDevice> device = context_->device;
327 BLI_assert(device != nil);
328
329 /* Ensure source and stage entry-point names are set. */
330 BLI_assert(shd_builder_ != nullptr);
331 if (is_compute) {
332 /* Compute path. */
333 BLI_assert([compute_function_name_ length] > 0);
334 BLI_assert([shd_builder_->msl_source_compute_ length] > 0);
335 }
336 else {
337 /* Vertex/Fragment path. */
338 BLI_assert([vertex_function_name_ length] > 0);
339 if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
340 BLI_assert([fragment_function_name_ length] > 0);
341 }
342 BLI_assert([shd_builder_->msl_source_vert_ length] > 0);
343 }
344
345 @autoreleasepool {
346 MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
347 options.languageVersion = MTLLanguageVersion2_2;
348 options.fastMathEnabled = YES;
349 options.preserveInvariance = YES;
350
351 /* Raster order groups for tile data in struct require Metal 2.3.
352 * Retaining Metal 2.2. for old shaders to maintain backwards
353 * compatibility for existing features. */
354 if (info->subpass_inputs_.size() > 0) {
355 options.languageVersion = MTLLanguageVersion2_3;
356 }
357#if defined(MAC_OS_VERSION_14_0)
358 if (@available(macOS 14.00, *)) {
359 /* Texture atomics require Metal 3.1. */
360 if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
361 options.languageVersion = MTLLanguageVersion3_1;
362 }
363 }
364#endif
365
366 NSString *source_to_compile = shd_builder_->msl_source_vert_;
367
368 /* Vertex/Fragment compile stages 0 and/or 1.
369 * Compute shaders compile as stage 2. */
370 ShaderStage initial_stage = (is_compute) ? ShaderStage::COMPUTE : ShaderStage::VERTEX;
371 ShaderStage src_stage = initial_stage;
372 uint8_t total_stages = (is_compute) ? 1 : 2;
373
374 for (int stage_count = 0; stage_count < total_stages; stage_count++) {
375
376 source_to_compile = (src_stage == ShaderStage::VERTEX) ?
377 shd_builder_->msl_source_vert_ :
378 ((src_stage == ShaderStage::COMPUTE) ?
379 shd_builder_->msl_source_compute_ :
380 shd_builder_->msl_source_frag_);
381
382 /* Transform feedback, skip compilation. */
383 if (src_stage == ShaderStage::FRAGMENT && (transform_feedback_type_ != GPU_SHADER_TFB_NONE))
384 {
385 shader_library_frag_ = nil;
386 break;
387 }
388
389 /* Concatenate common source. */
390 NSString *str = [NSString stringWithUTF8String:datatoc_mtl_shader_common_msl];
391 NSString *source_with_header_a = [str stringByAppendingString:source_to_compile];
392
393 /* Inject unique context ID to avoid cross-context shader cache collisions.
394 * Required on macOS 11.0. */
395 NSString *source_with_header = source_with_header_a;
396 [source_with_header retain];
397
398 /* Prepare Shader Library. */
399 NSError *error = nullptr;
400 id<MTLLibrary> library = [device newLibraryWithSource:source_with_header
402 error:&error];
403 if (error) {
404 /* Only exit out if genuine error and not warning. */
405 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
406 NSNotFound)
407 {
408 const char *errors_c_str = [[error localizedDescription] UTF8String];
409 const char *sources_c_str = (is_compute) ? shd_builder_->glsl_compute_source_.c_str() :
410 shd_builder_->glsl_fragment_source_.c_str();
411
412 MTLLogParser parser;
413 print_log(Span<const char *>(&sources_c_str, 1),
414 errors_c_str,
415 to_string(src_stage),
416 true,
417 &parser);
418
419 /* Release temporary compilation resources. */
420 delete shd_builder_;
421 shd_builder_ = nullptr;
422 return false;
423 }
424 }
425
426 BLI_assert(library != nil);
427
428 switch (src_stage) {
429 case ShaderStage::VERTEX: {
430 /* Store generated library and assign debug name. */
431 shader_library_vert_ = library;
432 shader_library_vert_.label = [NSString stringWithUTF8String:this->name];
433 } break;
435 /* Store generated library for fragment shader and assign debug name. */
436 shader_library_frag_ = library;
437 shader_library_frag_.label = [NSString stringWithUTF8String:this->name];
438 } break;
440 /* Store generated library for fragment shader and assign debug name. */
441 shader_library_compute_ = library;
442 shader_library_compute_.label = [NSString stringWithUTF8String:this->name];
443 } break;
444 case ShaderStage::ANY: {
445 /* Suppress warnings. */
447 } break;
448 }
449
450 [source_with_header autorelease];
451
452 /* Move onto next compilation stage. */
453 if (!is_compute) {
454 src_stage = ShaderStage::FRAGMENT;
455 }
456 else {
457 break;
458 }
459 }
460
461 /* Create descriptors.
462 * Each shader type requires a differing descriptor. */
463 if (!is_compute) {
464 /* Prepare Render pipeline descriptor. */
465 pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init];
466 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
467 }
468
469 /* Shader has successfully been created. */
470 valid_ = true;
471
472 /* Prepare backing data storage for local uniforms. */
473 const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
474 if (push_constant_block.size > 0) {
475 push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
477 }
478 else {
479 push_constant_data_ = nullptr;
480 }
481
482 /* If this is a compute shader, bake base PSO for compute straight-away.
483 * NOTE: This will compile the base unspecialized variant. */
484 if (is_compute) {
485 /* Set descriptor to default shader constants */
486 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values);
487
488 this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
489 }
490 }
491
492 /* Release temporary compilation resources. */
493 delete shd_builder_;
494 shd_builder_ = nullptr;
495 return true;
496}
497
499 const eGPUShaderTFBType geom_type)
500{
501 tf_output_name_list_.clear();
502 for (int i = 0; i < name_list.size(); i++) {
503 tf_output_name_list_.append(std::string(name_list[i]));
504 }
505 transform_feedback_type_ = geom_type;
506}
507
509{
510 BLI_assert(transform_feedback_type_ != GPU_SHADER_TFB_NONE);
511 BLI_assert(buf);
512 transform_feedback_active_ = true;
513 transform_feedback_vertbuf_ = buf;
514 BLI_assert(static_cast<MTLVertBuf *>(transform_feedback_vertbuf_)->get_usage_type() ==
516 return true;
517}
518
520{
521 transform_feedback_active_ = false;
522 transform_feedback_vertbuf_ = nullptr;
523}
524
526
527/* -------------------------------------------------------------------- */
530
532{
534 if (interface == nullptr || !this->is_valid()) {
536 "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be "
537 "skipped.",
538 this->name_get());
539 }
540 ctx->pipeline_state.active_shader = this;
541}
542
544{
546 ctx->pipeline_state.active_shader = nullptr;
547}
548
549void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
550{
551 BLI_assert(this);
552 if (!this->is_valid()) {
553 return;
554 }
555 MTLShaderInterface *mtl_interface = get_interface();
556 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
557 MTL_LOG_WARNING("Uniform location %d is not valid in Shader %s", location, this->name_get());
558 return;
559 }
560
561 /* Fetch more information about uniform from interface. */
562 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
563
564 /* Prepare to copy data into local shader push constant memory block. */
565 BLI_assert(push_constant_data_ != nullptr);
566 uint8_t *dest_ptr = (uint8_t *)push_constant_data_;
567 dest_ptr += uniform.byte_offset;
568 uint32_t copy_size = sizeof(float) * comp_len * array_size;
569
570 /* Test per-element size. It is valid to copy less array elements than the total, but each
571 * array element needs to match. */
572 uint32_t source_per_element_size = sizeof(float) * comp_len;
573 uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len;
575 source_per_element_size <= dest_per_element_size,
576 "source Per-array-element size must be smaller than destination storage capacity for "
577 "that data");
578
579 if (source_per_element_size < dest_per_element_size) {
580 switch (uniform.type) {
581
582 /* Special case for handling 'vec3' array upload. */
583 case MTL_DATATYPE_FLOAT3: {
584 int numvecs = uniform.array_len;
585 uint8_t *data_c = (uint8_t *)data;
586
587 /* It is more efficient on the host to only modify data if it has changed.
588 * Data modifications are small, so memory comparison is cheap.
589 * If uniforms have remained unchanged, then we avoid both copying
590 * data into the local uniform struct, and upload of the modified uniform
591 * contents in the command stream. */
592 bool changed = false;
593 for (int i = 0; i < numvecs; i++) {
594 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
595 if (changed) {
596 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
597 }
598 data_c += sizeof(float) * 3;
599 dest_ptr += sizeof(float) * 4;
600 }
601 if (changed) {
603 }
604 return;
605 }
606
607 /* Special case for handling 'mat3' upload. */
609 int numvecs = 3 * uniform.array_len;
610 uint8_t *data_c = (uint8_t *)data;
611
612 /* It is more efficient on the host to only modify data if it has changed.
613 * Data modifications are small, so memory comparison is cheap.
614 * If uniforms have remained unchanged, then we avoid both copying
615 * data into the local uniform struct, and upload of the modified uniform
616 * contents in the command stream. */
617 bool changed = false;
618 for (int i = 0; i < numvecs; i++) {
619 changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
620 if (changed) {
621 memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
622 }
623 data_c += sizeof(float) * 3;
624 dest_ptr += sizeof(float) * 4;
625 }
626 if (changed) {
628 }
629 return;
630 }
631 default:
632 shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type);
633 break;
634 }
635 }
636
637 /* Debug checks. */
639 copy_size <= uniform.size_in_bytes,
640 "Size of provided uniform data is greater than size specified in Shader interface\n");
641
642 /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified
643 * local uniform data. */
644 bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0);
645 if (data_changed) {
647 memcpy((void *)dest_ptr, (void *)data, copy_size);
648 }
649}
650
651void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
652{
653 BLI_assert(this);
654 if (!this->is_valid()) {
655 return;
656 }
657
658 /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in
659 * Metal, as we cannot point a texture binding at a different slot. */
660 MTLShaderInterface *mtl_interface = this->get_interface();
661 if (location >= mtl_interface->get_total_uniforms() &&
662 location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures()))
663 {
665 "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform "
666 "location %d)",
667 location);
668 return;
669 }
670
671 if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
672 MTL_LOG_WARNING("Uniform is not valid at location %d - Shader %s", location, this->name_get());
673 return;
674 }
675
676 /* Fetch more information about uniform from interface. */
677 const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
678
679 /* Determine data location in uniform block. */
680 BLI_assert(push_constant_data_ != nullptr);
681 uint8_t *ptr = (uint8_t *)push_constant_data_;
682 ptr += uniform.byte_offset;
683
685 const char *data_to_copy = (char *)data;
686 uint data_size_to_copy = sizeof(int) * comp_len * array_size;
687
688 /* Special cases for small types support where storage is shader push constant buffer is smaller
689 * than the incoming data. */
690 ushort us;
691 uchar uc;
692 if (uniform.size_in_bytes == 1) {
693 /* Convert integer storage value down to uchar. */
694 data_size_to_copy = uniform.size_in_bytes;
695 uc = *data;
696 data_to_copy = (char *)&uc;
697 }
698 else if (uniform.size_in_bytes == 2) {
699 /* Convert integer storage value down to ushort. */
700 data_size_to_copy = uniform.size_in_bytes;
701 us = *data;
702 data_to_copy = (char *)&us;
703 }
704 else {
706 (mtl_get_data_type_alignment(uniform.type) % sizeof(int)) == 0,
707 "When uniform inputs are provided as integers, the underlying type must adhere "
708 "to alignment per-component. If this test fails, the input data cannot be directly copied "
709 "to the buffer. e.g. Array of small types uchar/bool/ushort etc; are currently not "
710 "handled.");
711 }
712
713 /* Copy data into local block. Only flag UBO as modified if data is different
714 * This can avoid re-binding of unmodified local uniform data, reducing
715 * the total number of copy operations needed and data transfers between
716 * CPU and GPU. */
717 bool data_changed = (memcmp((void *)ptr, (void *)data_to_copy, data_size_to_copy) != 0);
718 if (data_changed) {
720 memcpy((void *)ptr, (void *)data_to_copy, data_size_to_copy);
721 }
722}
723
725{
726 return push_constant_modified_;
727}
728
730{
731 push_constant_modified_ = is_dirty;
732}
733
734/* Attempts to pre-generate a PSO based on the parent shaders PSO
735 * (Render shaders only) */
737{
738 if (parent_shader_ != nullptr) {
740 MTLShader *parent_mtl = static_cast<MTLShader *>(parent_shader_);
741
742 /* Extract PSO descriptors from parent shader. */
745
746 parent_mtl->pso_cache_lock_.lock();
747 for (const auto &pso_entry : parent_mtl->pso_cache_.items()) {
748 const MTLRenderPipelineStateDescriptor &pso_descriptor = pso_entry.key;
749 const MTLRenderPipelineStateInstance *pso_inst = pso_entry.value;
750 descriptors.append(pso_descriptor);
751 prim_classes.append(pso_inst->prim_type);
752 }
753 parent_mtl->pso_cache_lock_.unlock();
754
755 /* Warm shader cache with applied limit.
756 * If limit is <= 0, compile all PSO permutations. */
757 limit = (limit > 0) ? limit : descriptors.size();
758 for (int i : IndexRange(min_ii(descriptors.size(), limit))) {
759 const MTLRenderPipelineStateDescriptor &pso_descriptor = descriptors[i];
760 const MTLPrimitiveTopologyClass &prim_class = prim_classes[i];
761 bake_pipeline_state(ctx, prim_class, pso_descriptor);
762 }
763 }
764}
765
767
768/* -------------------------------------------------------------------- */
771
772void MTLShader::set_vertex_function_name(NSString *vert_function_name)
773{
774 vertex_function_name_ = vert_function_name;
775}
776
777void MTLShader::set_fragment_function_name(NSString *frag_function_name)
778{
779 fragment_function_name_ = frag_function_name;
780}
781
782void MTLShader::set_compute_function_name(NSString *compute_function_name)
783{
784 compute_function_name_ = compute_function_name;
785}
786
787void MTLShader::shader_source_from_msl(NSString *input_vertex_source,
788 NSString *input_fragment_source)
789{
790 BLI_assert(shd_builder_ != nullptr);
791 shd_builder_->msl_source_vert_ = input_vertex_source;
792 shd_builder_->msl_source_frag_ = input_fragment_source;
793 shd_builder_->source_from_msl_ = true;
794}
795
796void MTLShader::shader_compute_source_from_msl(NSString *input_compute_source)
797{
798 BLI_assert(shd_builder_ != nullptr);
799 shd_builder_->msl_source_compute_ = input_compute_source;
800 shd_builder_->source_from_msl_ = true;
801}
802
804{
805 /* Assign gpu::Shader super-class interface. */
806 BLI_assert(Shader::interface == nullptr);
808}
809
811
812/* -------------------------------------------------------------------- */
816
821 MTLFunctionConstantValues *values,
822 const Shader::Constants &shader_constants,
823 const SpecializationStateDescriptor &specialization_descriptor)
824{
825 for (auto i : shader_constants.types.index_range()) {
826 const Shader::Constants::Value &value = specialization_descriptor.values[i];
827
829 switch (shader_constants.types[i]) {
830 case Type::INT:
831 [values setConstantValue:&value.i type:MTLDataTypeInt atIndex:index];
832 break;
833 case Type::UINT:
834 [values setConstantValue:&value.u type:MTLDataTypeUInt atIndex:index];
835 break;
836 case Type::BOOL:
837 [values setConstantValue:&value.u type:MTLDataTypeBool atIndex:index];
838 break;
839 case Type::FLOAT:
840 [values setConstantValue:&value.f type:MTLDataTypeFloat atIndex:index];
841 break;
842 default:
843 BLI_assert_msg(false, "Unsupported custom constant type.");
844 break;
845 }
846 }
847}
848
849
850/* -------------------------------------------------------------------- */
853
867 MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
868{
870 /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should
871 * be thread-safe due to organization of high-level renderer. If there are any issues, then
872 * access can be guarded as appropriate. */
873 BLI_assert(this->is_valid());
874
875 /* NOTE(Metal): Vertex input assembly description will have been populated externally
876 * via #MTLBatch or #MTLImmediate during binding or draw. */
877
878 /* Resolve Context Frame-buffer state. */
879 MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer();
880
881 /* Update global pipeline descriptor. */
882 MTLStateManager *state_manager = static_cast<MTLStateManager *>(
884 MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor();
885
886 pipeline_descriptor.num_color_attachments = 0;
887 for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) {
888 MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment);
889
890 if (color_attachment.used) {
891 /* If SRGB is disabled and format is SRGB, use color data directly with no conversions
892 * between linear and SRGB. */
893 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(
894 color_attachment.texture->format_get());
895 if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) {
896 mtl_format = MTLPixelFormatRGBA8Unorm;
897 }
898 pipeline_descriptor.color_attachment_format[attachment] = mtl_format;
899 }
900 else {
901 pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid;
902 }
903
904 pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0;
905 }
906 MTLAttachment depth_attachment = framebuffer->get_depth_attachment();
907 MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment();
908 pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ?
910 depth_attachment.texture->format_get()) :
911 MTLPixelFormatInvalid;
912 pipeline_descriptor.stencil_attachment_format =
913 (stencil_attachment.used) ?
914 gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) :
915 MTLPixelFormatInvalid;
916
917 /* Resolve Context Pipeline State (required by PSO). */
918 pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask;
919 pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled;
920 pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op;
921 pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op;
926 pipeline_descriptor.point_size = ctx->pipeline_state.point_size;
927
928 /* Resolve clipping plane enablement. */
929 pipeline_descriptor.clipping_plane_enable_mask = 0;
930 for (const int plane : IndexRange(6)) {
931 pipeline_descriptor.clipping_plane_enable_mask =
932 pipeline_descriptor.clipping_plane_enable_mask |
933 ((ctx->pipeline_state.clip_distance_enabled[plane]) ? (1 << plane) : 0);
934 }
935
936 /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */
937 bool requires_specific_topology_class = uses_gpu_layer || uses_gpu_viewport_index ||
938 prim_type == MTLPrimitiveTopologyClassPoint;
939 pipeline_descriptor.vertex_descriptor.prim_topology_class =
940 (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
941
942 /* Specialization configuration. */
943 pipeline_descriptor.specialization_state = {this->constants.values};
944
945 /* Bake pipeline state using global descriptor. */
946 return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
947}
948
949/* Variant which bakes a pipeline state based on an existing MTLRenderPipelineStateDescriptor.
950 * This function should be callable from a secondary compilation thread. */
952 MTLContext *ctx,
953 MTLPrimitiveTopologyClass prim_type,
954 const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
955{
956 /* Fetch shader interface. */
957 MTLShaderInterface *mtl_interface = this->get_interface();
958 BLI_assert(mtl_interface);
959 BLI_assert(this->is_valid());
960
961 /* Check if current PSO exists in the cache. */
962 pso_cache_lock_.lock();
963 MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor);
964 MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
965 pso_cache_lock_.unlock();
966
967 if (pipeline_state != nullptr) {
968 return pipeline_state;
969 }
970
971 /* TODO: When fetching a specialized variant of a shader, if this does not yet exist, verify
972 * whether the base unspecialized variant exists:
973 * - If unspecialized version exists: Compile specialized PSO asynchronously, returning base PSO
974 * and flagging state of specialization in cache as being built.
975 * - If unspecialized does NOT exist, build specialized version straight away, as we pay the
976 * cost of compilation in both cases regardless. */
977
978 /* Generate new Render Pipeline State Object (PSO). */
979 @autoreleasepool {
980 /* Prepare Render Pipeline Descriptor. */
981
982 /* Setup function specialization constants, used to modify and optimize
983 * generated code based on current render pipeline configuration. */
984 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
985
986 /* Custom function constant values: */
988 values, this->constants, pipeline_descriptor.specialization_state);
989
990 /* Prepare Vertex descriptor based on current pipeline vertex binding state. */
991 MTLRenderPipelineDescriptor *desc = pso_descriptor_;
992 [desc reset];
993 pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
994
995 /* Offset the bind index for Uniform buffers such that they begin after the VBO
996 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
997 * specialization constant, customized per unique pipeline state permutation.
998 *
999 * NOTE: For binding point compaction, we could use the number of VBOs present
1000 * in the current PSO configuration `pipeline_descriptors.vertex_descriptor.num_vert_buffers`).
1001 * However, it is more efficient to simply offset the uniform buffer base index to the
1002 * maximal number of VBO bind-points, as then UBO bind-points for similar draw calls
1003 * will align and avoid the requirement for additional binding. */
1004 int MTL_uniform_buffer_base_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers + 1;
1005
1006 /* Null buffer index is used if an attribute is not found in the
1007 * bound VBOs #VertexFormat. */
1008 int null_buffer_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers;
1009 bool using_null_buffer = false;
1010
1011 if (this->get_uses_ssbo_vertex_fetch()) {
1012 /* If using SSBO Vertex fetch mode, no vertex descriptor is required
1013 * as we wont be using stage-in. */
1014 desc.vertexDescriptor = nil;
1015 desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified;
1016
1017 /* We want to offset the uniform buffer base to allow for sufficient VBO binding slots - We
1018 * also require +1 slot for the Index buffer. */
1019 MTL_uniform_buffer_base_index = MTL_SSBO_VERTEX_FETCH_IBO_INDEX + 1;
1020 }
1021 else {
1022 for (const uint i :
1023 IndexRange(pipeline_descriptor.vertex_descriptor.max_attribute_value + 1))
1024 {
1025
1026 /* Metal back-end attribute descriptor state. */
1027 const MTLVertexAttributeDescriptorPSO &attribute_desc =
1028 pipeline_descriptor.vertex_descriptor.attributes[i];
1029
1030 /* Flag format conversion */
1031 /* In some cases, Metal cannot implicitly convert between data types.
1032 * In these instances, the fetch mode #GPUVertFetchMode as provided in the vertex format
1033 * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15.
1034 *
1035 * It is then the responsibility of the vertex shader to perform any necessary type
1036 * casting.
1037 *
1038 * See `mtl_shader.hh` for more information. Relevant Metal API documentation:
1039 * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
1040 */
1041 if (attribute_desc.format == MTLVertexFormatInvalid) {
1042 /* If attributes are non-contiguous, we can skip over gaps. */
1044 "MTLShader: baking pipeline state for '%s'- skipping input attribute at "
1045 "index '%d' but none was specified in the current vertex state",
1046 mtl_interface->get_name(),
1047 i);
1048
1049 /* Write out null conversion constant if attribute unused. */
1050 int MTL_attribute_conversion_mode = 0;
1051 [values setConstantValue:&MTL_attribute_conversion_mode
1052 type:MTLDataTypeInt
1053 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1054 continue;
1055 }
1056
1057 int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode;
1058 [values setConstantValue:&MTL_attribute_conversion_mode
1059 type:MTLDataTypeInt
1060 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1061 if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT ||
1062 MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT)
1063 {
1065 "TODO(Metal): Shader %s needs to support internal format conversion\n",
1066 mtl_interface->get_name());
1067 }
1068
1069 /* Copy metal back-end attribute descriptor state into PSO descriptor.
1070 * NOTE: need to copy each element due to direct assignment restrictions.
1071 * Also note */
1072 MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i];
1073
1074 mtl_attribute.format = attribute_desc.format;
1075 mtl_attribute.offset = attribute_desc.offset;
1076 mtl_attribute.bufferIndex = attribute_desc.buffer_index;
1077 }
1078
1079 for (const uint i : IndexRange(pipeline_descriptor.vertex_descriptor.num_vert_buffers)) {
1080 /* Metal back-end state buffer layout. */
1081 const MTLVertexBufferLayoutDescriptorPSO &buf_layout =
1082 pipeline_descriptor.vertex_descriptor.buffer_layouts[i];
1083 /* Copy metal back-end buffer layout state into PSO descriptor.
1084 * NOTE: need to copy each element due to copying from internal
1085 * back-end descriptor to Metal API descriptor. */
1086 MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i];
1087
1088 mtl_buf_layout.stepFunction = buf_layout.step_function;
1089 mtl_buf_layout.stepRate = buf_layout.step_rate;
1090 mtl_buf_layout.stride = buf_layout.stride;
1091 }
1092
1093 /* Mark empty attribute conversion. */
1094 for (int i = pipeline_descriptor.vertex_descriptor.max_attribute_value + 1;
1096 i++)
1097 {
1098 int MTL_attribute_conversion_mode = 0;
1099 [values setConstantValue:&MTL_attribute_conversion_mode
1100 type:MTLDataTypeInt
1101 withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
1102 }
1103
1104 /* DEBUG: Missing/empty attributes. */
1105 /* Attributes are normally mapped as part of the state setting based on the used
1106 * #GPUVertFormat, however, if attributes have not been set, we can sort them out here. */
1107 for (const uint i : IndexRange(mtl_interface->get_total_attributes())) {
1108 const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i);
1109 MTLVertexAttributeDescriptor *current_attribute =
1110 desc.vertexDescriptor.attributes[attribute.location];
1111
1112 if (current_attribute.format == MTLVertexFormatInvalid) {
1113#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1114 printf("-> Filling in unbound attribute '%s' for shader PSO '%s' with location: %u\n",
1115 mtl_interface->get_name_at_offset(attribute.name_offset),
1116 mtl_interface->get_name(),
1117 attribute.location);
1118#endif
1119 current_attribute.format = attribute.format;
1120 current_attribute.offset = 0;
1121 current_attribute.bufferIndex = null_buffer_index;
1122
1123 /* Add Null vert buffer binding for invalid attributes. */
1124 if (!using_null_buffer) {
1125 MTLVertexBufferLayoutDescriptor *null_buf_layout =
1126 desc.vertexDescriptor.layouts[null_buffer_index];
1127
1128 /* Use constant step function such that null buffer can
1129 * contain just a singular dummy attribute. */
1130 null_buf_layout.stepFunction = MTLVertexStepFunctionConstant;
1131 null_buf_layout.stepRate = 0;
1132 null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size);
1133
1134 /* If we are using the maximum number of vertex buffers, or tight binding indices,
1135 * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer
1136 * index. */
1137 if (null_buffer_index >= MTL_uniform_buffer_base_index) {
1138 MTL_uniform_buffer_base_index = null_buffer_index + 1;
1139 }
1140 using_null_buffer = true;
1141#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1142 MTL_LOG_INFO("Setting up buffer binding for null attribute with buffer index %d",
1143 null_buffer_index);
1144#endif
1145 }
1146 }
1147 }
1148
1149 /* Primitive Topology. */
1150 desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class;
1151 }
1152
1153 /* Update constant value for 'MTL_uniform_buffer_base_index'. */
1154 [values setConstantValue:&MTL_uniform_buffer_base_index
1155 type:MTLDataTypeInt
1156 withName:@"MTL_uniform_buffer_base_index"];
1157
1158 /* Storage buffer bind index.
1159 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1160 * and an additional space for the push constant block.
1161 * If the shader does not have any uniform blocks, then we can place directly after the push
1162 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1163 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1164 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1165 mtl_interface->get_total_uniform_blocks() :
1166 0);
1167 [values setConstantValue:&MTL_storage_buffer_base_index
1168 type:MTLDataTypeInt
1169 withName:@"MTL_storage_buffer_base_index"];
1170
1171 /* Transform feedback constant.
1172 * Ensure buffer is placed after existing buffers, including default buffers. */
1173 int MTL_transform_feedback_buffer_index = -1;
1174 if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
1175 /* If using argument buffers, insert index after argument buffer index. Otherwise, insert
1176 * after uniform buffer bindings. */
1177 MTL_transform_feedback_buffer_index =
1178 MTL_uniform_buffer_base_index +
1179 ((mtl_interface->uses_argument_buffer_for_samplers()) ?
1181 (mtl_interface->get_max_buffer_index() + 2));
1182 }
1183
1184 if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
1185 [values setConstantValue:&MTL_transform_feedback_buffer_index
1186 type:MTLDataTypeInt
1187 withName:@"MTL_transform_feedback_buffer_index"];
1188 }
1189
1190 /* Clipping planes. */
1191 int MTL_clip_distances_enabled = (pipeline_descriptor.clipping_plane_enable_mask > 0) ? 1 : 0;
1192
1193 /* Only define specialization constant if planes are required.
1194 * We guard clip_planes usage on this flag. */
1195 [values setConstantValue:&MTL_clip_distances_enabled
1196 type:MTLDataTypeInt
1197 withName:@"MTL_clip_distances_enabled"];
1198
1199 if (MTL_clip_distances_enabled > 0) {
1200 /* Assign individual enablement flags. Only define a flag function constant
1201 * if it is used. */
1202 for (const int plane : IndexRange(6)) {
1203 int plane_enabled = ctx->pipeline_state.clip_distance_enabled[plane] ? 1 : 0;
1204 if (plane_enabled) {
1205 [values
1206 setConstantValue:&plane_enabled
1207 type:MTLDataTypeInt
1208 withName:[NSString stringWithFormat:@"MTL_clip_distance_enabled%d", plane]];
1209 }
1210 }
1211 }
1212
1213 /* gl_PointSize constant. */
1214 bool null_pointsize = true;
1215 float MTL_pointsize = pipeline_descriptor.point_size;
1216 if (pipeline_descriptor.vertex_descriptor.prim_topology_class ==
1217 MTLPrimitiveTopologyClassPoint)
1218 {
1219 /* `if pointsize is > 0.0`, PROGRAM_POINT_SIZE is enabled, and `gl_PointSize` shader keyword
1220 * overrides the value. Otherwise, if < 0.0, use global constant point size. */
1221 if (MTL_pointsize < 0.0) {
1222 MTL_pointsize = fabsf(MTL_pointsize);
1223 [values setConstantValue:&MTL_pointsize
1224 type:MTLDataTypeFloat
1225 withName:@"MTL_global_pointsize"];
1226 null_pointsize = false;
1227 }
1228 }
1229
1230 if (null_pointsize) {
1231 MTL_pointsize = 0.0f;
1232 [values setConstantValue:&MTL_pointsize
1233 type:MTLDataTypeFloat
1234 withName:@"MTL_global_pointsize"];
1235 }
1236
1237 /* Compile functions */
1238 NSError *error = nullptr;
1239 desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_
1240 constantValues:values
1241 error:&error];
1242 if (error) {
1243 bool has_error = (
1244 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1245 NSNotFound);
1246
1247 const char *errors_c_str = [[error localizedDescription] UTF8String];
1248 const char *sources_c_str = shd_builder_->glsl_fragment_source_.c_str();
1249
1250 MTLLogParser parser;
1251 print_log(
1252 Span<const char *>(&sources_c_str, 1), errors_c_str, "VertShader", has_error, &parser);
1253
1254 /* Only exit out if genuine error and not warning */
1255 if (has_error) {
1256 return nullptr;
1257 }
1258 }
1259
1260 /* If transform feedback is used, Vertex-only stage */
1261 if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
1262 desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_
1263 constantValues:values
1264 error:&error];
1265 if (error) {
1266 bool has_error = (
1267 [[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1268 NSNotFound);
1269
1270 const char *errors_c_str = [[error localizedDescription] UTF8String];
1271 const char *sources_c_str = shd_builder_->glsl_fragment_source_.c_str();
1272
1273 MTLLogParser parser;
1274 print_log(
1275 Span<const char *>(&sources_c_str, 1), errors_c_str, "FragShader", has_error, &parser);
1276
1277 /* Only exit out if genuine error and not warning */
1278 if (has_error) {
1279 return nullptr;
1280 }
1281 }
1282 }
1283 else {
1284 desc.fragmentFunction = nil;
1285 desc.rasterizationEnabled = false;
1286 }
1287
1288 /* Setup pixel format state */
1289 for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT;
1290 color_attachment++)
1291 {
1292 /* Fetch color attachment pixel format in back-end pipeline state. */
1293 MTLPixelFormat pixel_format = pipeline_descriptor.color_attachment_format[color_attachment];
1294 /* Populate MTL API PSO attachment descriptor. */
1295 MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
1296 desc.colorAttachments[color_attachment];
1297
1298 col_attachment.pixelFormat = pixel_format;
1299 if (pixel_format != MTLPixelFormatInvalid) {
1300 bool format_supports_blending = mtl_format_supports_blending(pixel_format);
1301
1302 col_attachment.writeMask = pipeline_descriptor.color_write_mask;
1303 col_attachment.blendingEnabled = pipeline_descriptor.blending_enabled &&
1304 format_supports_blending;
1305 if (format_supports_blending && pipeline_descriptor.blending_enabled) {
1306 col_attachment.alphaBlendOperation = pipeline_descriptor.alpha_blend_op;
1307 col_attachment.rgbBlendOperation = pipeline_descriptor.rgb_blend_op;
1308 col_attachment.destinationAlphaBlendFactor = pipeline_descriptor.dest_alpha_blend_factor;
1309 col_attachment.destinationRGBBlendFactor = pipeline_descriptor.dest_rgb_blend_factor;
1310 col_attachment.sourceAlphaBlendFactor = pipeline_descriptor.src_alpha_blend_factor;
1311 col_attachment.sourceRGBBlendFactor = pipeline_descriptor.src_rgb_blend_factor;
1312 }
1313 else {
1314 if (pipeline_descriptor.blending_enabled && !format_supports_blending) {
1316 "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
1317 "blending\n",
1318 *((int *)&pixel_format));
1319 }
1320 }
1321 }
1322 }
1323 desc.depthAttachmentPixelFormat = pipeline_descriptor.depth_attachment_format;
1324 desc.stencilAttachmentPixelFormat = pipeline_descriptor.stencil_attachment_format;
1325
1326 /* Bind-point range validation.
1327 * We need to ensure that the PSO will have valid bind-point ranges, or is using the
1328 * appropriate bindless fallback path if any bind limits are exceeded. */
1329#ifdef NDEBUG
1330 /* Ensure Buffer bindings are within range. */
1331 BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
1333 "UBO and SSBO bindings exceed the fragment bind table limit.");
1334
1335 /* Transform feedback buffer. */
1336 if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
1337 BLI_assert_msg(MTL_transform_feedback_buffer_index < MTL_MAX_BUFFER_BINDINGS,
1338 "Transform feedback buffer binding exceeds the fragment bind table limit.");
1339 }
1340
1341 /* Argument buffer. */
1342 if (mtl_interface->uses_argument_buffer_for_samplers()) {
1344 "Argument buffer binding exceeds the fragment bind table limit.");
1345 }
1346#endif
1347
1348 /* Compile PSO */
1349 MTLAutoreleasedRenderPipelineReflection reflection_data;
1350 id<MTLRenderPipelineState> pso = [ctx->device
1351 newRenderPipelineStateWithDescriptor:desc
1352 options:MTLPipelineOptionBufferTypeInfo
1353 reflection:&reflection_data
1354 error:&error];
1355 if (error) {
1356 NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error);
1357 BLI_assert(false);
1358 return nullptr;
1359 }
1360 else if (!pso) {
1361 NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name);
1362 BLI_assert(false);
1363 return nullptr;
1364 }
1365 else {
1366#if 0
1367 NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx);
1368#endif
1369 }
1370
1371 /* Prepare pipeline state instance. */
1373 pso_inst->vert = desc.vertexFunction;
1374 pso_inst->frag = desc.fragmentFunction;
1375 pso_inst->pso = pso;
1376 pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1377 pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
1378 pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
1379 pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
1380 pso_inst->prim_type = prim_type;
1381
1382 pso_inst->reflection_data_available = (reflection_data != nil);
1383 if (reflection_data != nil) {
1384
1385 /* Extract shader reflection data for buffer bindings.
1386 * This reflection data is used to contrast the binding information
1387 * we know about in the interface against the bindings in the finalized
1388 * PSO. This accounts for bindings which have been stripped out during
1389 * optimization, and allows us to both avoid over-binding and also
1390 * allows us to verify size-correctness for bindings, to ensure
1391 * that buffers bound are not smaller than the size of expected data. */
1392 NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments];
1393
1394 pso_inst->buffer_bindings_reflection_data_vert.clear();
1395 int buffer_binding_max_ind = 0;
1396
1397 for (int i = 0; i < [vert_args count]; i++) {
1398 MTLArgument *arg = [vert_args objectAtIndex:i];
1399 if ([arg type] == MTLArgumentTypeBuffer) {
1400 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1401 if (buf_index >= 0) {
1402 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1403 }
1404 }
1405 }
1406 pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1);
1407 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1408 pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false};
1409 }
1410
1411 for (int i = 0; i < [vert_args count]; i++) {
1412 MTLArgument *arg = [vert_args objectAtIndex:i];
1413 if ([arg type] == MTLArgumentTypeBuffer) {
1414 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1415
1416 if (buf_index >= 0) {
1417 pso_inst->buffer_bindings_reflection_data_vert[buf_index] = {
1418 (uint32_t)([arg index]),
1419 (uint32_t)([arg bufferDataSize]),
1420 (uint32_t)([arg bufferAlignment]),
1421 ([arg isActive] == YES) ? true : false};
1422 }
1423 }
1424 }
1425
1426 NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments];
1427
1428 pso_inst->buffer_bindings_reflection_data_frag.clear();
1429 buffer_binding_max_ind = 0;
1430
1431 for (int i = 0; i < [frag_args count]; i++) {
1432 MTLArgument *arg = [frag_args objectAtIndex:i];
1433 if ([arg type] == MTLArgumentTypeBuffer) {
1434 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1435 if (buf_index >= 0) {
1436 buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
1437 }
1438 }
1439 }
1440 pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1);
1441 for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
1442 pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false};
1443 }
1444
1445 for (int i = 0; i < [frag_args count]; i++) {
1446 MTLArgument *arg = [frag_args objectAtIndex:i];
1447 if ([arg type] == MTLArgumentTypeBuffer) {
1448 int buf_index = [arg index] - MTL_uniform_buffer_base_index;
1449 shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]);
1450 if (buf_index >= 0) {
1451 pso_inst->buffer_bindings_reflection_data_frag[buf_index] = {
1452 (uint32_t)([arg index]),
1453 (uint32_t)([arg bufferDataSize]),
1454 (uint32_t)([arg bufferAlignment]),
1455 ([arg isActive] == YES) ? true : false};
1456 }
1457 }
1458 }
1459 }
1460
1461 /* Insert into pso cache. */
1462 pso_cache_lock_.lock();
1463 pso_inst->shader_pso_index = pso_cache_.size();
1464 pso_cache_.add(pipeline_descriptor, pso_inst);
1465 pso_cache_lock_.unlock();
1467 "PSO CACHE: Stored new variant in PSO cache for shader '%s' Hash: '%llu'\n",
1468 this->name,
1469 pipeline_descriptor.hash());
1470 return pso_inst;
1471 }
1472}
1473
1475 MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
1476{
1477 /* NOTE(Metal): Bakes and caches a PSO for compute. */
1478 BLI_assert(this);
1479 MTLShaderInterface *mtl_interface = this->get_interface();
1480 BLI_assert(mtl_interface);
1481 BLI_assert(this->is_valid());
1482 BLI_assert(shader_library_compute_ != nil);
1483
1484 /* Check if current PSO exists in the cache. */
1485 pso_cache_lock_.lock();
1486 MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr(
1487 compute_pipeline_descriptor);
1488 MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
1489 pso_cache_lock_.unlock();
1490
1491 if (pipeline_state != nullptr) {
1492 /* Return cached PSO state. */
1493 BLI_assert(pipeline_state->pso != nil);
1494 return pipeline_state;
1495 }
1496 else {
1497 /* Prepare Compute Pipeline Descriptor. */
1498
1499 /* Setup function specialization constants, used to modify and optimize
1500 * generated code based on current render pipeline configuration. */
1501 MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
1502
1503 /* TODO: Compile specialized shader variants asynchronously. */
1504
1505 /* Custom function constant values: */
1507 values, this->constants, compute_pipeline_descriptor.specialization_state);
1508
1509 /* Offset the bind index for Uniform buffers such that they begin after the VBO
1510 * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
1511 * specialization constant, customized per unique pipeline state permutation.
1512 *
1513 * For Compute shaders, this offset is always zero, but this needs setting as
1514 * it is expected as part of the common Metal shader header. */
1515 int MTL_uniform_buffer_base_index = 0;
1516 [values setConstantValue:&MTL_uniform_buffer_base_index
1517 type:MTLDataTypeInt
1518 withName:@"MTL_uniform_buffer_base_index"];
1519
1520 /* Storage buffer bind index.
1521 * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
1522 * and an additional space for the push constant block.
1523 * If the shader does not have any uniform blocks, then we can place directly after the push
1524 * constant block. As we do not need an extra spot for the UBO at index '0'. */
1525 int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
1526 ((mtl_interface->get_total_uniform_blocks() > 0) ?
1527 mtl_interface->get_total_uniform_blocks() :
1528 0);
1529
1530 [values setConstantValue:&MTL_storage_buffer_base_index
1531 type:MTLDataTypeInt
1532 withName:@"MTL_storage_buffer_base_index"];
1533
1534 /* Compile compute function. */
1535 NSError *error = nullptr;
1536 id<MTLFunction> compute_function = [shader_library_compute_
1537 newFunctionWithName:compute_function_name_
1538 constantValues:values
1539 error:&error];
1540 compute_function.label = [NSString stringWithUTF8String:this->name];
1541
1542 if (error) {
1543 NSLog(@"Compile Error - Metal Shader compute function, error %@", error);
1544
1545 /* Only exit out if genuine error and not warning */
1546 if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
1547 NSNotFound)
1548 {
1549 BLI_assert(false);
1550 return nullptr;
1551 }
1552 }
1553
1554 /* Compile PSO. */
1555 MTLComputePipelineDescriptor *desc = [[MTLComputePipelineDescriptor alloc] init];
1556 desc.label = [NSString stringWithUTF8String:this->name];
1557 desc.computeFunction = compute_function;
1558
1567 const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
1568 if (ELEM(capabilities.gpu, APPLE_GPU_M1, APPLE_GPU_M2)) {
1569 if (maxTotalThreadsPerThreadgroup_Tuning_ > 0) {
1570 desc.maxTotalThreadsPerThreadgroup = this->maxTotalThreadsPerThreadgroup_Tuning_;
1571 MTL_LOG_INFO("Using custom parameter for shader %s value %u\n",
1572 this->name,
1573 maxTotalThreadsPerThreadgroup_Tuning_);
1574 }
1575 }
1576
1577 id<MTLComputePipelineState> pso = [ctx->device
1578 newComputePipelineStateWithDescriptor:desc
1579 options:MTLPipelineOptionNone
1580 reflection:nullptr
1581 error:&error];
1582
1583 /* If PSO has compiled but max theoretical threads-per-threadgroup is lower than required
1584 * dispatch size, recompile with increased limit. NOTE: This will result in a performance drop,
1585 * ideally the source shader should be modified to reduce local register pressure, or, local
1586 * work-group size should be reduced.
1587 * Similarly, the custom tuning parameter "mtl_max_total_threads_per_threadgroup" can be
1588 * specified to a sufficiently large value to avoid this. */
1589 if (pso) {
1590 uint num_required_threads_per_threadgroup = compute_pso_common_state_.threadgroup_x_len *
1591 compute_pso_common_state_.threadgroup_y_len *
1592 compute_pso_common_state_.threadgroup_z_len;
1593 if (pso.maxTotalThreadsPerThreadgroup < num_required_threads_per_threadgroup) {
1595 "Shader '%s' requires %u threads per threadgroup, but PSO limit is: %lu. Recompiling "
1596 "with increased limit on descriptor.\n",
1597 this->name,
1598 num_required_threads_per_threadgroup,
1599 (unsigned long)pso.maxTotalThreadsPerThreadgroup);
1600 [pso release];
1601 pso = nil;
1602 desc.maxTotalThreadsPerThreadgroup = 1024;
1603 pso = [ctx->device newComputePipelineStateWithDescriptor:desc
1604 options:MTLPipelineOptionNone
1605 reflection:nullptr
1606 error:&error];
1607 }
1608 }
1609
1610 if (error) {
1611 NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
1612 BLI_assert(false);
1613 return nullptr;
1614 }
1615 else if (!pso) {
1616 NSLog(@"Failed to create PSO for compute shader: %s, but no error was provided!\n",
1617 this->name);
1618 BLI_assert(false);
1619 return nullptr;
1620 }
1621 else {
1622#if 0
1623 NSLog(@"Successfully compiled compute PSO for shader: %s (Metal Context: %p)\n",
1624 this->name,
1625 ctx);
1626#endif
1627 }
1628
1629 [desc release];
1630
1631 /* Gather reflection data and create MTLComputePipelineStateInstance to store results. */
1633 compute_pso_instance->compute = compute_function;
1634 compute_pso_instance->pso = pso;
1635 compute_pso_instance->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
1636 compute_pso_instance->base_storage_buffer_index = MTL_storage_buffer_base_index;
1637 pso_cache_lock_.lock();
1638 compute_pso_instance->shader_pso_index = compute_pso_cache_.size();
1639 compute_pso_cache_.add(compute_pipeline_descriptor, compute_pso_instance);
1640 pso_cache_lock_.unlock();
1641
1642 return compute_pso_instance;
1643 }
1644}
1645
1646
1647/* -------------------------------------------------------------------- */
1650
1651int MTLShader::ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type)
1652{
1653 switch (attribute_type) {
1654 case MTLVertexFormatFloat:
1656 case MTLVertexFormatInt:
1658 case MTLVertexFormatUInt:
1660 case MTLVertexFormatShort:
1662 case MTLVertexFormatUChar:
1664 case MTLVertexFormatUChar2:
1666 case MTLVertexFormatUChar3:
1668 case MTLVertexFormatUChar4:
1670 case MTLVertexFormatFloat2:
1672 case MTLVertexFormatFloat3:
1674 case MTLVertexFormatFloat4:
1676 case MTLVertexFormatUInt2:
1678 case MTLVertexFormatUInt3:
1680 case MTLVertexFormatUInt4:
1682 case MTLVertexFormatInt2:
1684 case MTLVertexFormatInt3:
1686 case MTLVertexFormatInt4:
1688 case MTLVertexFormatUCharNormalized:
1690 case MTLVertexFormatUChar2Normalized:
1692 case MTLVertexFormatUChar3Normalized:
1694 case MTLVertexFormatUChar4Normalized:
1696 case MTLVertexFormatInt1010102Normalized:
1698 case MTLVertexFormatShort3Normalized:
1700 default:
1701 BLI_assert_msg(false,
1702 "Not yet supported attribute type for SSBO vertex fetch -- Add entry "
1703 "GPU_SHADER_ATTR_TYPE_** to shader defines, and in this table");
1704 return -1;
1705 }
1706 return -1;
1707}
1708
1710{
1711 MTLShaderInterface *mtl_interface = this->get_interface();
1712 ssbo_vertex_attribute_bind_active_ = true;
1713 ssbo_vertex_attribute_bind_mask_ = (1 << mtl_interface->get_total_attributes()) - 1;
1714
1715 /* Reset tracking of actively used VBO bind slots for SSBO vertex fetch mode. */
1716 for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
1717 ssbo_vbo_slot_used_[i] = false;
1718 }
1719}
1720
1722{
1723 /* Fetch attribute. */
1724 MTLShaderInterface *mtl_interface = this->get_interface();
1725 BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
1726 ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
1727 UNUSED_VARS_NDEBUG(mtl_interface);
1728
1729 /* Update bind-mask to verify this attribute has been used. */
1730 BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
1731 (1 << ssbo_attr.mtl_attribute_index) &&
1732 "Attribute has already been bound");
1733 ssbo_vertex_attribute_bind_mask_ &= ~(1 << ssbo_attr.mtl_attribute_index);
1734
1735 /* Fetch attribute uniform addresses from cache. */
1736 ShaderSSBOAttributeBinding &cached_ssbo_attribute =
1737 cached_ssbo_attribute_bindings_[ssbo_attr.mtl_attribute_index];
1738 BLI_assert(cached_ssbo_attribute.attribute_index >= 0);
1739
1740 /* Write attribute descriptor properties to shader uniforms. */
1741 this->uniform_int(cached_ssbo_attribute.uniform_offset, 1, 1, &ssbo_attr.attribute_offset);
1742 this->uniform_int(cached_ssbo_attribute.uniform_stride, 1, 1, &ssbo_attr.per_vertex_stride);
1743 int inst_val = (ssbo_attr.is_instance ? 1 : 0);
1744 this->uniform_int(cached_ssbo_attribute.uniform_fetchmode, 1, 1, &inst_val);
1745 this->uniform_int(cached_ssbo_attribute.uniform_vbo_id, 1, 1, &ssbo_attr.vbo_id);
1746 BLI_assert(ssbo_attr.attribute_format >= 0);
1747 this->uniform_int(cached_ssbo_attribute.uniform_attr_type, 1, 1, &ssbo_attr.attribute_format);
1748 ssbo_vbo_slot_used_[ssbo_attr.vbo_id] = true;
1749}
1750
1752 id<MTLRenderCommandEncoder> /*active_encoder*/)
1753{
1754 ssbo_vertex_attribute_bind_active_ = false;
1755
1756 /* If our mask is non-zero, we have unassigned attributes. */
1757 if (ssbo_vertex_attribute_bind_mask_ != 0) {
1758 MTLShaderInterface *mtl_interface = this->get_interface();
1759
1760 /* Determine if there is a free slot we can bind the null buffer to -- We should have at
1761 * least ONE free slot in this instance. */
1762 int null_attr_buffer_slot = -1;
1763 for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
1764 if (!ssbo_vbo_slot_used_[i]) {
1765 null_attr_buffer_slot = i;
1766 break;
1767 }
1768 }
1769 BLI_assert_msg(null_attr_buffer_slot >= 0,
1770 "No suitable bind location for a NULL buffer was found");
1771
1772 for (int i = 0; i < mtl_interface->get_total_attributes(); i++) {
1773 if (ssbo_vertex_attribute_bind_mask_ & (1 << i)) {
1774 const MTLShaderInputAttribute *mtl_shader_attribute = &mtl_interface->get_attribute(i);
1775#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
1777 "SSBO Vertex Fetch missing attribute with index: %d. Shader: %s, Attr "
1778 "Name: "
1779 "%s - Null buffer bound",
1780 i,
1781 this->name_get(),
1782 mtl_shader_attribute->name);
1783#endif
1784 /* Bind Attribute with NULL buffer index and stride zero (for constant access). */
1785 MTLSSBOAttribute ssbo_attr(
1786 i, null_attr_buffer_slot, 0, 0, GPU_SHADER_ATTR_TYPE_FLOAT, false);
1789 "Unassigned Shader attribute: %s, Attr Name: %s -- Binding NULL BUFFER to "
1790 "slot %d",
1791 this->name_get(),
1792 mtl_interface->get_name_at_offset(mtl_shader_attribute->name_offset),
1793 null_attr_buffer_slot);
1794 }
1795 }
1796
1797 /* Bind NULL buffer to given VBO slot. */
1798 MTLContext *ctx = MTLContext::get();
1799 id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer();
1800 BLI_assert(null_buf);
1801
1803 rps.bind_vertex_buffer(null_buf, 0, null_attr_buffer_slot);
1804 }
1805}
1806
1808{
1809 if (transform_feedback_type_ == GPU_SHADER_TFB_NONE || !transform_feedback_active_) {
1810 return nullptr;
1811 }
1812 return transform_feedback_vertbuf_;
1813}
1814
1816{
1817 if (this->transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
1818 return false;
1819 }
1820
1821 return (std::find(tf_output_name_list_.begin(), tf_output_name_list_.end(), str) !=
1822 tf_output_name_list_.end());
1823}
1824
1826
1827/* Since this is going to be compiling shaders in a multi-threaded fashion we
1828 * don't want to create an instance per context as we want to restrict the
1829 * number of simultaneous compilation threads to ensure system responsiveness.
1830 * Hence the global shared instance. */
1833
1846
1848{
1850
1852 return;
1853 }
1854
1855 g_shared_parallel_shader_compiler->decrement_ref_count();
1856 if (g_shared_parallel_shader_compiler->get_ref_count() == 0) {
1859 }
1860}
1861
1862/* -------------------------------------------------------------------- */
1865
1867{
1869
1870 terminate_compile_threads = false;
1871}
1872
1874{
1875 /* Shutdown the compiler threads. */
1876 terminate_compile_threads = true;
1877 cond_var.notify_all();
1878
1879 for (auto &thread : compile_threads) {
1880 thread.join();
1881 }
1882
1883 /* Mark any unprocessed work items as ready so we can move
1884 * them into a batch for cleanup. */
1885 if (!parallel_work_queue.empty()) {
1886 std::unique_lock<std::mutex> lock(queue_mutex);
1887 while (!parallel_work_queue.empty()) {
1888 ParallelWork *work_item = parallel_work_queue.front();
1889 work_item->is_ready = true;
1890 parallel_work_queue.pop_front();
1891 }
1892 }
1893
1894 /* Clean up any outstanding batches. */
1895 for (BatchHandle handle : batches.keys()) {
1896 Vector<Shader *> shaders = batch_finalize(handle);
1897 /* Delete any shaders in the batch. */
1898 for (Shader *shader : shaders) {
1899 if (shader) {
1900 delete shader;
1901 }
1902 }
1903 }
1904 BLI_assert(batches.is_empty());
1905}
1906
1908{
1909 std::unique_lock<std::mutex> lock(queue_mutex);
1910
1911 /* Return if the compilation threads already exist */
1912 if (!compile_threads.empty()) {
1913 return;
1914 }
1915
1916 /* Limit to the number of compiler threads to (performance cores - 1) to
1917 * leave one thread free for main thread/UI responsiveness */
1918 const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
1919 int max_mtlcompiler_threads = capabilities.num_performance_cores - 1;
1920
1921 /* Save the main thread context */
1922 GPUContext *main_thread_context = GPU_context_active_get();
1923 MTLContext *metal_context = static_cast<MTLContext *>(unwrap(main_thread_context));
1924 id<MTLDevice> metal_device = metal_context->device;
1925
1926#if defined(MAC_OS_VERSION_13_3)
1927 /* Clamp the number of threads if necessary. */
1928 if (@available(macOS 13.3, *)) {
1929 /* Check we've set the flag to allow more than 2 compile threads. */
1930 BLI_assert(metal_device.shouldMaximizeConcurrentCompilation);
1931 max_mtlcompiler_threads = MIN(int([metal_device maximumConcurrentCompilationTaskCount]),
1932 max_mtlcompiler_threads);
1933 }
1934#endif
1935
1936 /* GPU settings for context creation. */
1937 GHOST_GPUSettings gpuSettings = {0};
1938 gpuSettings.context_type = GHOST_kDrawingContextTypeMetal;
1939 if (G.debug & G_DEBUG_GPU) {
1940 gpuSettings.flags |= GHOST_gpuDebugContext;
1941 }
1942 gpuSettings.preferred_device.index = U.gpu_preferred_index;
1943 gpuSettings.preferred_device.vendor_id = U.gpu_preferred_vendor_id;
1944 gpuSettings.preferred_device.device_id = U.gpu_preferred_device_id;
1945
1946 /* Spawn the compiler threads. */
1947 for (int i = 0; i < max_mtlcompiler_threads; i++) {
1948
1949 /* Grab the system handle. */
1950 GHOST_SystemHandle ghost_system = reinterpret_cast<GHOST_SystemHandle>(
1953
1954 /* Create a Ghost GPU Context using the system handle. */
1955 GHOST_ContextHandle ghost_gpu_context = GHOST_CreateGPUContext(ghost_system, gpuSettings);
1956
1957 /* Create a GPU context for the compile thread to use. */
1958 GPUContext *per_thread_context = GPU_context_create(nullptr, ghost_gpu_context);
1959
1960 /* Restore the main thread context.
1961 * (required as the above context creation also makes it active). */
1962 GPU_context_active_set(main_thread_context);
1963
1964 /* Create a new thread */
1965 compile_threads.push_back(std::thread([this, per_thread_context] {
1966 this->parallel_compilation_thread_func(per_thread_context);
1967 }));
1968 }
1969}
1970
1971void MTLParallelShaderCompiler::parallel_compilation_thread_func(GPUContext *blender_gpu_context)
1972{
1973 /* Contexts can only be created on the main thread so we have to
1974 * pass one in and make it active here */
1975 GPU_context_active_set(blender_gpu_context);
1976
1977 MTLContext *metal_context = static_cast<MTLContext *>(unwrap(blender_gpu_context));
1978 MTLShaderCompiler *shader_compiler = static_cast<MTLShaderCompiler *>(metal_context->compiler);
1979
1980 /* This context is only for compilation, it does not need it's own instance of the compiler */
1981 shader_compiler->release_parallel_shader_compiler();
1982
1983 /* Loop until we get the terminate signal */
1984 while (!terminate_compile_threads) {
1985 /* Grab the next shader off of the queue or wait... */
1986 ParallelWork *work_item = nullptr;
1987 {
1988 std::unique_lock<std::mutex> lock(queue_mutex);
1989 cond_var.wait(lock,
1990 [&] { return terminate_compile_threads || !parallel_work_queue.empty(); });
1991 if (terminate_compile_threads || parallel_work_queue.empty()) {
1992 continue;
1993 }
1994 work_item = parallel_work_queue.front();
1995 parallel_work_queue.pop_front();
1996 }
1997
1998 /* Compile a shader */
1999 if (work_item->work_type == PARALLELWORKTYPE_COMPILE_SHADER) {
2000 BLI_assert(work_item->info);
2001
2002 const shader::ShaderCreateInfo *shader_info = work_item->info;
2003 work_item->shader = static_cast<MTLShader *>(
2004 work_item->shader_compiler->compile(*shader_info, true));
2005
2006 if (work_item->shader) {
2007 /* Generate and cache any render PSOs if possible (typically materials only)
2008 * (Finalize() will already bake a Compute PSO if possible) */
2009 work_item->shader->warm_cache(-1);
2010 }
2011 }
2012 /* Bake PSO */
2013 else if (work_item->work_type == PARALLELWORKTYPE_BAKE_PSO) {
2014 MTLShader *shader = work_item->shader;
2015 /* Currently only support Compute */
2016 BLI_assert(shader && shader->has_compute_shader_lib());
2017
2018 /* Create descriptor using these specialization constants. */
2019 MTLComputePipelineStateDescriptor compute_pipeline_descriptor(
2020 work_item->specialization_values);
2021
2022 shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
2023 }
2024 else {
2025 BLI_assert(false);
2026 }
2027 work_item->is_ready = true;
2028 }
2029
2030 GPU_context_discard(blender_gpu_context);
2031}
2032
2033BatchHandle MTLParallelShaderCompiler::create_batch(size_t batch_size)
2034{
2035 std::scoped_lock lock(batch_mutex);
2036 BatchHandle batch_handle = next_batch_handle++;
2037 batches.add(batch_handle, {});
2038 Batch &batch = batches.lookup(batch_handle);
2039 if (batch_size) {
2040 batch.items.reserve(batch_size);
2041 }
2042 batch.is_ready = false;
2043 shader_debug_printf("Created batch %llu\n", batch_handle);
2044 return batch_handle;
2045}
2046
2047void MTLParallelShaderCompiler::add_item_to_batch(ParallelWork *work_item,
2048 BatchHandle batch_handle)
2049{
2050 std::scoped_lock lock(batch_mutex);
2051 Batch &batch = batches.lookup(batch_handle);
2052 batch.items.append(work_item);
2053}
2054
2055void MTLParallelShaderCompiler::add_parallel_item_to_queue(ParallelWork *work_item,
2056 BatchHandle batch_handle)
2057{
2058 shader_debug_printf("Request add shader work\n");
2059 if (!terminate_compile_threads) {
2060
2061 /* Defer creation of compilation threads until required */
2062 if (compile_threads.empty()) {
2064 }
2065
2066 add_item_to_batch(work_item, batch_handle);
2067 std::lock_guard<std::mutex> lock(queue_mutex);
2068 parallel_work_queue.push_back(work_item);
2069 cond_var.notify_one();
2070 }
2071}
2072
2075{
2077
2078 BatchHandle batch_handle = create_batch(infos.size());
2079
2080 shader_debug_printf("Batch compile %llu shaders (Batch = %llu)\n", infos.size(), batch_handle);
2081
2082 /* Have to finalize all shaderInfos *before* any parallel compilation as
2083 * ShaderCreateInfo::finalize() is not thread safe */
2084 for (const shader::ShaderCreateInfo *info : infos) {
2085 const_cast<ShaderCreateInfo *>(info)->finalize();
2086 }
2087
2088 for (const shader::ShaderCreateInfo *info : infos) {
2089 ParallelWork *work_item = new ParallelWork;
2090 work_item->info = info;
2091 work_item->shader_compiler = shader_compiler;
2092 work_item->is_ready = false;
2093 work_item->shader = nullptr;
2094 work_item->work_type = PARALLELWORKTYPE_COMPILE_SHADER;
2095 add_parallel_item_to_queue(work_item, batch_handle);
2096 }
2097
2098 return batch_handle;
2099}
2100
2102{
2103 std::scoped_lock lock(batch_mutex);
2104 Batch &batch = batches.lookup(handle);
2105 if (batch.is_ready) {
2106 return true;
2107 }
2108
2109 for (ParallelWork *item : batch.items) {
2110 if (item->is_ready) {
2111 continue;
2112 }
2113 else {
2114 return false;
2115 }
2116 }
2117
2118 batch.is_ready = true;
2119 shader_debug_printf("Batch %llu is now ready\n", handle);
2120 return batch.is_ready;
2121}
2122
2124{
2125 while (!batch_is_ready(handle)) {
2127 }
2128 std::scoped_lock lock(batch_mutex);
2129
2130 Batch batch = batches.pop(handle);
2132 for (ParallelWork *item : batch.items) {
2133 result.append(item->shader);
2134 delete item;
2135 }
2136 handle = 0;
2137 return result;
2138}
2139
2141 Span<ShaderSpecialization> specializations)
2142{
2144 /* Zero indicates no batch was created */
2145 SpecializationBatchHandle batch_handle = 0;
2146
2147 for (auto &specialization : specializations) {
2148 MTLShader *sh = static_cast<MTLShader *>(unwrap(specialization.shader));
2149
2150 /* Specialization constants only take effect when we create the PSO.
2151 * We don't have the relevant info to create a Render PSO Descriptor unless
2152 * the shader has a has_parent_shader() but in that case it would (currently) be
2153 * invalid to apply specialization constants. For those reasons we currently only
2154 * support pre-compilation of Compute shaders.
2155 * (technically we could call makeFunction but the benefit would likely be minimal) */
2156 if (!sh->has_compute_shader_lib()) {
2157 continue;
2158 }
2159
2160 BLI_assert_msg(sh->is_valid(), "Shader must be finalized before precompiling specializations");
2161
2162 /* Defer batch creation until we have some work to do */
2163 if (!batch_handle) {
2164 batch_handle = create_batch(1);
2165 }
2166
2167 ParallelWork *work_item = new ParallelWork;
2168 work_item->info = nullptr;
2169 work_item->is_ready = false;
2170 work_item->shader = sh;
2171 work_item->work_type = PARALLELWORKTYPE_BAKE_PSO;
2172
2173 /* Add the specialization constants to the work-item */
2174 for (const SpecializationConstant &constant : specialization.constants) {
2175 const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
2176 BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
2177 work_item->specialization_values[input->location].u = constant.value.u;
2178 }
2179 sh->constants.is_dirty = true;
2180
2181 add_parallel_item_to_queue(work_item, batch_handle);
2182 }
2183 return batch_handle;
2184}
2185
2187{
2188 /* Check empty batch case where we have no handle */
2189 if (!handle) {
2190 return true;
2191 }
2192
2193 std::scoped_lock lock(batch_mutex);
2194 Batch &batch = batches.lookup(handle);
2195 if (batch.is_ready) {
2196 return true;
2197 }
2198
2199 for (ParallelWork *item : batch.items) {
2200 if (item->is_ready) {
2201 continue;
2202 }
2203 else {
2204 return false;
2205 }
2206 }
2207
2208 /* Handle is zeroed once the batch is ready */
2209 handle = 0;
2210 batch.is_ready = true;
2211 shader_debug_printf("Specialization Batch %llu is now ready\n", handle);
2212 return batch.is_ready;
2213}
2214
2216
2217/* -------------------------------------------------------------------- */
2220
2222{
2223 parallel_shader_compiler = get_shared_parallel_shader_compiler();
2224}
2225
2230
2232{
2233 if (parallel_shader_compiler) {
2235 parallel_shader_compiler = nullptr;
2236 }
2237}
2238
2240{
2241 BLI_assert(parallel_shader_compiler);
2242 return parallel_shader_compiler->batch_compile(this, infos);
2243}
2245{
2246 return parallel_shader_compiler->batch_is_ready(handle);
2247}
2249{
2250 return parallel_shader_compiler->batch_finalize(handle);
2251}
2253 Span<ShaderSpecialization> specializations)
2254{
2255 return parallel_shader_compiler->precompile_specializations(specializations);
2256}
2257
2259{
2260 return parallel_shader_compiler->specialization_batch_is_ready(handle);
2261}
2262
2264
2265} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
MINLINE int min_ii(int a, int b)
MINLINE int max_ii(int a, int b)
unsigned char uchar
unsigned short ushort
unsigned int uint
Platform independent time functions.
void BLI_time_sleep_ms(int ms)
Definition time.c:85
#define UNUSED_VARS_NDEBUG(...)
#define ELEM(...)
GHOST C-API function and type declarations.
GHOST_ContextHandle GHOST_CreateGPUContext(GHOST_SystemHandle systemhandle, GHOST_GPUSettings gpuSettings)
static GHOST_SystemCocoa * ghost_system
@ GHOST_gpuDebugContext
Definition GHOST_Types.h:77
bool GPU_use_parallel_compilation()
GPUContext * GPU_context_create(void *ghost_window, void *ghost_context)
void * GPU_backend_ghost_system_get()
GPUContext * GPU_context_active_get()
void GPU_context_discard(GPUContext *)
void GPU_context_active_set(GPUContext *)
int64_t BatchHandle
Definition GPU_shader.hh:68
eGPUShaderTFBType
@ GPU_SHADER_TFB_NONE
int64_t SpecializationBatchHandle
@ GPU_USAGE_DEVICE_ONLY
@ GPU_FETCH_INT_TO_FLOAT_UNIT
@ GPU_FETCH_INT_TO_FLOAT
#define GPU_VERT_ATTR_MAX_LEN
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color attribute
volatile int lock
struct GPUContext GPUContext
#define U
SIMD_FORCE_INLINE bool isActive() const
#define MIN(_a, _b)
SIMD_FORCE_INLINE btScalar length(const btQuaternion &q)
Return the length of a quaternion.
void reset()
clear internal cached data and reset random seed
IndexRange index_range() const
constexpr int64_t size() const
Definition BLI_span.hh:494
constexpr int64_t size() const
Definition BLI_span.hh:253
constexpr const char * c_str() const
int64_t size() const
void append(const T &value)
static MTLCapabilities & get_capabilities()
MTLRenderPassState & get_render_pass_state()
MTLFrameBuffer * get_current_framebuffer()
static MTLContext * get()
id< MTLBuffer > get_null_attribute_buffer()
MTLContextGlobalShaderPipelineState pipeline_state
MTLCommandBufferManager main_command_buffer
MTLAttachment get_color_attachment(uint slot)
bool specialization_batch_is_ready(SpecializationBatchHandle &handle)
Vector< Shader * > batch_finalize(BatchHandle &handle)
bool batch_is_ready(BatchHandle handle)
BatchHandle batch_compile(MTLShaderCompiler *shade_compiler, Span< const shader::ShaderCreateInfo * > &infos)
SpecializationBatchHandle precompile_specializations(Span< ShaderSpecialization > specializations)
void bind_vertex_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
virtual SpecializationBatchHandle precompile_specializations(Span< ShaderSpecialization > specializations) override
virtual Vector< Shader * > batch_finalize(BatchHandle &handle) override
virtual ~MTLShaderCompiler() override
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override
virtual BatchHandle batch_compile(Span< const shader::ShaderCreateInfo * > &infos) override
virtual bool batch_is_ready(BatchHandle handle) override
const MTLShaderBufferBlock & get_push_constant_block() const
const char * get_name_at_offset(uint32_t offset) const
int get_argument_buffer_bind_index(ShaderStage stage) const
const MTLShaderInputAttribute & get_attribute(uint index) const
const MTLShaderUniform & get_uniform(uint index) const
void ssbo_vertex_fetch_bind_attributes_begin()
void set_fragment_function_name(NSString *fragment_function_name)
bool transform_feedback_enable(VertBuf *buf) override
void warm_cache(int limit) override
void ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr)
void transform_feedback_names_set(Span< const char * > name_list, const eGPUShaderTFBType geom_type) override
MTLRenderPipelineStateInstance * bake_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type, const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
void geometry_shader_from_glsl(MutableSpan< const char * > sources) override
MTLComputePipelineStateInstance * bake_compute_pipeline_state(MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
void shader_compute_source_from_msl(NSString *input_compute_source)
void uniform_int(int location, int comp_len, int array_size, const int *data) override
bool finalize(const shader::ShaderCreateInfo *info=nullptr) override
void transform_feedback_disable() override
void fragment_shader_from_glsl(MutableSpan< const char * > sources) override
void vertex_shader_from_glsl(MutableSpan< const char * > sources) override
static int ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type)
bool has_transform_feedback_varying(std::string str)
void set_vertex_function_name(NSString *vetex_function_name)
void bind() override
void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source)
VertBuf * get_transform_feedback_active_buffer()
void unbind() override
MTLShaderInterface * get_interface()
void uniform_float(int location, int comp_len, int array_size, const float *data) override
MTLRenderPipelineStateInstance * bake_current_pipeline_state(MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
void ssbo_vertex_fetch_bind_attributes_end(id< MTLRenderCommandEncoder > active_encoder)
MTLShader(MTLContext *ctx, const char *name)
Definition mtl_shader.mm:72
void compute_shader_from_glsl(MutableSpan< const char * > sources) override
void init(const shader::ShaderCreateInfo &, bool is_batch_compilation) override
bool get_uses_ssbo_vertex_fetch() const override
void set_interface(MTLShaderInterface *interface)
void push_constant_bindstate_mark_dirty(bool is_dirty)
void set_compute_function_name(NSString *compute_function_name)
MTLRenderPipelineStateDescriptor & get_pipeline_descriptor()
Definition mtl_state.hh:59
const ShaderInput * constant_get(const char *name) const
ShaderInterface * interface
const char *const name_get() const
struct blender::gpu::Shader::Constants constants
void print_log(Span< const char * > sources, const char *log, const char *stage, bool error, GPULogParser *parser)
Shader(const char *name)
Definition gpu_shader.cc:55
eGPUTextureFormat format_get() const
bool join()
Definition thread.cpp:43
#define printf
CCL_NAMESPACE_BEGIN struct Options options
#define fabsf(x)
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define str(s)
struct @157336070235062372277311340362362342103123126032::@262166344314164341202215145112231240022370055142 batch
#define GPU_FB_MAX_COLOR_ATTACHMENT
#define SOURCES_INDEX_VERSION
int count
void MEM_freeN(void *vmemh)
Definition mallocn.cc:105
void *(* MEM_callocN)(size_t len, const char *str)
Definition mallocn.cc:42
#define G(x, y, z)
static void error(const char *str)
#define MTL_MAX_BUFFER_BINDINGS
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:51
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:44
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:36
#define shader_debug_printf(...)
Definition mtl_shader.hh:49
#define MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID
Definition mtl_shader.hh:53
const char datatoc_mtl_shader_common_msl[]
uint mtl_get_data_type_alignment(eMTLDataType type)
#define GPU_SHADER_ATTR_TYPE_CHAR4
#define GPU_SHADER_ATTR_TYPE_IVEC2
#define GPU_SHADER_ATTR_TYPE_UCHAR3_NORM
#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS
#define GPU_SHADER_ATTR_TYPE_UCHAR_NORM
#define GPU_SHADER_ATTR_TYPE_INT1010102_NORM
#define GPU_SHADER_ATTR_TYPE_INT
#define GPU_SHADER_ATTR_TYPE_UVEC4
#define GPU_SHADER_ATTR_TYPE_CHAR2
#define GPU_SHADER_ATTR_TYPE_SHORT3_NORM
#define GPU_SHADER_ATTR_TYPE_VEC2
#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX
#define GPU_SHADER_ATTR_TYPE_CHAR3
#define GPU_SHADER_ATTR_TYPE_IVEC4
#define GPU_SHADER_ATTR_TYPE_FLOAT
#define GPU_SHADER_ATTR_TYPE_VEC3
#define GPU_SHADER_ATTR_TYPE_SHORT
#define GPU_SHADER_ATTR_TYPE_IVEC3
#define GPU_SHADER_ATTR_TYPE_UCHAR4_NORM
#define GPU_SHADER_ATTR_TYPE_UINT
#define GPU_SHADER_ATTR_TYPE_UVEC3
#define GPU_SHADER_ATTR_TYPE_UVEC2
#define GPU_SHADER_ATTR_TYPE_UCHAR2_NORM
#define GPU_SHADER_ATTR_TYPE_CHAR
#define GPU_SHADER_ATTR_TYPE_VEC4
StringRefNull gpu_shader_dependency_get_filename_from_source_string(const StringRefNull source_string)
Find the name of the file from which the given string was generated.
const char * to_string(ShaderStage stage)
Definition mtl_shader.mm:52
static Context * unwrap(GPUContext *ctx)
MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
std::mutex g_shared_parallel_shader_compiler_mutex
MTLParallelShaderCompiler * g_shared_parallel_shader_compiler
void release_shared_parallel_shader_compiler()
bool mtl_format_supports_blending(MTLPixelFormat format)
static void populate_specialization_constant_values(MTLFunctionConstantValues *values, const Shader::Constants &shader_constants, const SpecializationStateDescriptor &specialization_descriptor)
MTLParallelShaderCompiler * get_shared_parallel_shader_compiler()
unsigned int uint32_t
Definition stdint.h:80
unsigned char uint8_t
Definition stdint.h:78
GHOST_TDrawingContextType context_type
GHOST_GPUDevice preferred_device
MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT]
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_frag
blender::Vector< MTLBufferArgumentData > buffer_bindings_reflection_data_vert
Definition mtl_shader.hh:99
MTLVertexBufferLayoutDescriptorPSO buffer_layouts[GPU_BATCH_VBO_MAX_LEN+GPU_BATCH_INST_VBO_MAX_LEN]
MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]
shader::SpecializationConstant::Value Value
Vector< gpu::shader::Type > types
Describe inputs & outputs, stage interfaces, resources and sources of a shader. If all data is correc...
PointerRNA * ptr
Definition wm_files.cc:4126