Blender V4.5
mtl_batch.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
10
11#include "BLI_assert.h"
12#include "BLI_span.hh"
13
14#include "BKE_global.hh"
15
16#include "GPU_batch.hh"
17#include "GPU_common.hh"
18#include "gpu_shader_private.hh"
19
20#include "mtl_batch.hh"
21#include "mtl_context.hh"
22#include "mtl_debug.hh"
23#include "mtl_index_buffer.hh"
24#include "mtl_shader.hh"
25#include "mtl_storage_buffer.hh"
26#include "mtl_vertex_buffer.hh"
27
28#include <string>
29
30namespace blender::gpu {
31
32/* -------------------------------------------------------------------- */
35void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
36{
37 this->draw_advanced(v_first, v_count, i_first, i_count);
38}
39
40void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
41{
42 this->draw_advanced_indirect(indirect_buf, offset);
43}
44
45void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx)
46{
47 BLI_assert(ctx != nullptr);
48 this->vertex_descriptor_cache_clear();
49 cache_context_ = ctx;
50}
51
52void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear()
53{
54 cache_life_index_++;
55 cache_context_ = nullptr;
56}
57
58void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure()
59{
60 if (this->cache_context_ != nullptr) {
61
62 /* Invalidate vertex descriptor bindings cache if batch has changed. */
63 if (batch_->flag & GPU_BATCH_DIRTY) {
64 batch_->flag &= ~GPU_BATCH_DIRTY;
65 this->vertex_descriptor_cache_clear();
66 }
67 }
68
69 /* Initialize cache if not ready. */
70 if (cache_context_ == nullptr) {
71 this->vertex_descriptor_cache_init(MTLContext::get());
72 }
73}
74
75MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find(
76 const ShaderInterface *interface)
77{
78 this->vertex_descriptor_cache_ensure();
79 for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
80 if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) {
81 return &cache_[i];
82 }
83 }
84 return nullptr;
85}
86
87bool MTLBatch::MTLVertexDescriptorCache::insert(
88 MTLBatch::VertexDescriptorShaderInterfacePair &data)
89{
90 vertex_descriptor_cache_ensure();
91 for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
92 if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) {
93 cache_[i] = data;
94 cache_[i].cache_life_index = cache_life_index_;
95 return true;
96 }
97 }
98 return false;
99}
100
101int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
102 MTLRenderPipelineStateDescriptor &desc,
103 const MTLShaderInterface *interface,
104 uint16_t &attr_mask,
105 bool instanced)
106{
107
108 const GPUVertFormat *format = &verts->format;
109 /* Whether the current vertex buffer has been added to the buffer layout descriptor. */
110 bool buffer_added = false;
111 /* Per-vertex stride of current vertex buffer. */
112 int buffer_stride = format->stride;
113 /* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */
114 int buffer_index = -1;
115 int attribute_offset = 0;
116
117 /* Iterate over VertBuf vertex format and find attributes matching those in the active
118 * shader's interface. */
119 for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) {
120 const GPUVertAttr *a = &format->attrs[a_idx];
121
122 if (format->deinterleaved) {
123 attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len;
124 buffer_stride = a->size;
125 }
126 else {
127 attribute_offset = a->offset;
128 }
129
130 /* Find attribute with the matching name. Attributes may have multiple compatible
131 * name aliases. */
132 for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) {
133 const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
134 const ShaderInput *input = interface->attr_get(name);
135
136 if (input == nullptr || input->location == -1) {
137 continue;
138 }
139
140 /* Fetch metal attribute information (ShaderInput->binding is used to fetch the corresponding
141 * slot. */
142 const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->binding);
143 BLI_assert(mtl_attr.location >= 0);
144 /* Verify that the attribute location from the shader interface
145 * matches the attribute location returned in the input table. These should always be the
146 * same. */
147 BLI_assert(mtl_attr.location == input->location);
148
149 /* Check if attribute is already present in the given slot. */
150 if ((~attr_mask) & (1 << mtl_attr.location)) {
152 " -- [Batch] Skipping attribute with input location %d (As one is already bound)",
153 mtl_attr.location);
154 }
155 else {
156
157 /* Update attribute used-slot mask. */
158 attr_mask &= ~(1 << mtl_attr.location);
159
160 /* Add buffer layout entry in descriptor if it has not yet been added
161 * for current vertex buffer. */
162 if (!buffer_added) {
163 buffer_index = desc.vertex_descriptor.num_vert_buffers;
164 desc.vertex_descriptor.buffer_layouts[buffer_index].step_function =
165 (instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
166 desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1;
167 desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
168 desc.vertex_descriptor.num_vert_buffers++;
169 buffer_added = true;
170
171 MTL_LOG_INFO(" -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)",
172 (instanced) ? "instance" : "vertex",
173 buffer_index,
174 buffer_stride);
175 }
176 else {
177 /* Ensure stride is correct for de-interleaved attributes. */
178 desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
179 }
180
181 /* Handle Matrix/Array vertex attribute types.
182 * Metal does not natively support these as attribute types, so we handle these cases
183 * by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining
184 * the data in the shader.
185 * The generated Metal shader will contain a generated input binding, which reads
186 * in individual attributes and merges them into the desired type after vertex
187 * assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */
188 if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
190 a->comp_len == 16,
191 "only mat4 attributes currently supported -- Not ready to handle other long "
192 "component length attributes yet");
193
194 {
195 /* Handle Mat4 attributes. */
196 if (a->comp_len == 16) {
197 /* Debug safety checks. */
198 BLI_assert_msg(mtl_attr.matrix_element_count == 4,
199 "mat4 type expected but there are fewer components");
200 BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes");
202 mtl_attr.format == MTLVertexFormatFloat4,
203 "Per-attribute vertex format MUST be float4 for an input type of 'mat4'");
204
205 /* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute
206 * locations we must map to. */
207 for (int i = 0; i < a->comp_len / 4; i++) {
208 desc.vertex_descriptor.attributes[mtl_attr.location + i].format =
209 MTLVertexFormatFloat4;
210 /* Data is consecutive in the buffer for the whole matrix, each float4 will shift
211 * the offset by 16 bytes. */
212 desc.vertex_descriptor.attributes[mtl_attr.location + i].offset =
213 attribute_offset + i * 16;
214 /* All source data for a matrix is in the same singular buffer. */
215 desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index =
216 buffer_index;
217
218 /* Update total attribute account. */
219 desc.vertex_descriptor.total_attributes++;
220 desc.vertex_descriptor.max_attribute_value = max_ii(
221 mtl_attr.location + i, desc.vertex_descriptor.max_attribute_value);
222 MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d",
223 mtl_attr.location + i,
224 attribute_offset + i * 16,
225 buffer_index);
226
227 /* Update attribute used-slot mask for array elements. */
228 attr_mask &= ~(1 << (mtl_attr.location + i));
229 }
231 "Float4x4 attribute type added for '%s' at attribute locations: %d to %d",
232 name,
233 mtl_attr.location,
234 mtl_attr.location + 3);
235 }
236
237 /* Ensure we are not exceeding the attribute limit. */
238 BLI_assert(desc.vertex_descriptor.max_attribute_value <
240 }
241 }
242 else {
243
244 /* Handle Any required format conversions.
245 * NOTE(Metal): If there is a mis-match between the format of an attribute
246 * in the shader interface, and the specified format in the VertexBuffer VertexFormat,
247 * we need to perform a format conversion.
248 *
249 * The Metal API can perform certain conversions internally during vertex assembly:
250 * - Type Normalization e.g short2 to float2 between 0.0 to 1.0.
251 * - Type Truncation e.g. Float4 to Float2.
252 * - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty
253 * elements).
254 *
255 * Certain conversion cannot be performed however, and in these cases, we need to
256 * instruct the shader to generate a specialized version with a conversion routine upon
257 * attribute read.
258 * - This handles cases such as conversion between types e.g. Integer to float without
259 * normalization.
260 *
261 * For more information on the supported and unsupported conversions, see:
262 * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
263 */
264 MTLVertexFormat converted_format;
265 bool can_use_internal_conversion = mtl_convert_vertex_format(
266 mtl_attr.format,
268 a->comp_len,
270 &converted_format);
271 bool is_floating_point_format = (a->comp_type == GPU_COMP_F32);
272
273 if (can_use_internal_conversion) {
274 desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format;
275 desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
276 is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
278 BLI_assert(converted_format != MTLVertexFormatInvalid);
279 }
280 else {
281 /* The internal implicit conversion is not supported.
282 * In this case, we need to handle conversion inside the shader.
283 * This is handled using `format_conversion_mode`.
284 * `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*).
285 * This then controls how a given attribute is interpreted. The data will be read
286 * as specified and then converted appropriately to the correct form.
287 *
288 * e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine
289 * in the shader will read the data as an int, and cast this to floating point
290 * representation. (Rather than reading the source data as float).
291 *
292 * NOTE: Even if full conversion is not supported, we may still partially perform an
293 * implicit conversion where possible, such as vector truncation or expansion. */
294 MTLVertexFormat converted_format = format_resize_comp(mtl_attr.format, a->comp_len);
295 desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format;
296 desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
298 BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format !=
299 MTLVertexFormatInvalid);
300 }
301 desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset;
302 desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index;
303 desc.vertex_descriptor.max_attribute_value =
304 ((mtl_attr.location) > desc.vertex_descriptor.max_attribute_value) ?
305 (mtl_attr.location) :
306 desc.vertex_descriptor.max_attribute_value;
307 desc.vertex_descriptor.total_attributes++;
308
309 /* NOTE: We are setting max_attribute_value to be up to the maximum found index, because
310 * of this, it is possible that we may skip over certain attributes if they were not in
311 * the source GPUVertFormat. */
313 " -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert "
314 "components: %d, Fetch Mode %d --> FINAL FORMAT: %d",
315 mtl_attr.location,
316 (int)mtl_attr.format,
317 (int)a->comp_type,
318 (int)a->comp_len,
319 (int)a->fetch_mode,
320 (int)desc.vertex_descriptor.attributes[mtl_attr.location].format);
321
323 " -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, "
324 "offset: %d)",
325 (instanced) ? "instance" : "vertex",
326 name,
327 mtl_attr.location,
328 buffer_index,
329 attribute_offset);
330 }
331 }
332 }
333 }
334 if (buffer_added) {
335 return buffer_index;
336 }
337 return -1;
338}
339
340id<MTLRenderCommandEncoder> MTLBatch::bind()
341{
342 /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
343 * MTLDrawList only needs to perform setup a single time. */
344 BLI_assert(this);
345
346 /* Fetch Metal device. */
348 if (!ctx) {
349 BLI_assert_msg(false, "No context available for rendering.");
350 return nil;
351 }
352
353 /* Fetch bound shader from context. */
354 active_shader_ = static_cast<MTLShader *>(ctx->shader);
355
356 if (active_shader_ == nullptr || !active_shader_->is_valid()) {
357 /* Skip drawing if there is no valid Metal shader.
358 * This will occur if the path through which the shader is prepared
359 * is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */
360 BLI_assert_msg(false, "No valid Metal shader!");
361 return nil;
362 }
363
364 /* Prepare Vertex Descriptor and extract VertexBuffers to bind. */
366 int num_buffers = 0;
367
368 /* Ensure Index Buffer is ready. */
369 MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
370 if (mtl_elem != nullptr) {
371 mtl_elem->upload_data();
372 }
373
374 /* Populate vertex descriptor with attribute binding information.
375 * The vertex descriptor and buffer layout descriptors describe
376 * how vertex data from bound vertex buffers maps to the
377 * shader's input.
378 * A unique vertex descriptor will result in a new PipelineStateObject
379 * being generated for the currently bound shader. */
380 prepare_vertex_descriptor_and_bindings(buffers, num_buffers);
381
382 /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
383 * data operations are required. */
384 for (int i = 0; i < num_buffers; i++) {
385 MTLVertBuf *buf_at_index = buffers[i];
386 if (buf_at_index == nullptr) {
388 false,
389 "Total buffer count does not match highest buffer index, could be gaps in bindings");
390 continue;
391 }
392
393 MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
394 mtlvbo->bind();
395 }
396
397 /* Ensure render pass is active and fetch active RenderCommandEncoder. */
398 id<MTLRenderCommandEncoder> rec = ctx->ensure_begin_render_pass();
399
400 /* Fetch RenderPassState to enable resource binding for active pass. */
402
403 /* Debug Check: Ensure Frame-buffer instance is not dirty. */
405
406 /* GPU debug markers. */
407 if (G.debug & G_DEBUG_GPU) {
408 [rec pushDebugGroup:[NSString stringWithFormat:@"Draw Commands%@ (GPUShader: %s)",
409 this->elem ? @"(indexed)" : @"",
410 active_shader_->get_interface()->get_name()]];
411 [rec insertDebugSignpost:[NSString
412 stringWithFormat:@"Draw Commands %@ (GPUShader: %s)",
413 this->elem ? @"(indexed)" : @"",
414 active_shader_->get_interface()->get_name()]];
415 }
416
417 /*** Bind Vertex Buffers and Index Buffers **/
418
419 /* Ensure Context Render Pipeline State is fully setup and ready to execute the draw.
420 * This should happen after all other final rendering setup is complete. */
421 MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
422 if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) {
423 MTL_LOG_ERROR("Failed to prepare and apply render pipeline state.");
424 BLI_assert(false);
425 return nil;
426 }
427
428 /* Bind Vertex Buffers. */
429 for (int i = 0; i < num_buffers; i++) {
430 MTLVertBuf *buf_at_index = buffers[i];
431 if (buf_at_index == nullptr) {
433 false,
434 "Total buffer count does not match highest buffer index, could be gaps in bindings");
435 continue;
436 }
437 /* Buffer handle. */
438 MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
439 mtlvbo->flag_used();
440
441 /* Fetch buffer from MTLVertexBuffer and bind. */
442 id<MTLBuffer> mtl_buffer = mtlvbo->get_metal_buffer();
443
444 BLI_assert(mtl_buffer != nil);
445 rps.bind_vertex_buffer(mtl_buffer, 0, i);
446 }
447
448 /* Return Render Command Encoder used with setup. */
449 return rec;
450}
451
452void MTLBatch::unbind(id<MTLRenderCommandEncoder> rec)
453{
454 /* Pop bind debug group. */
455 if (G.debug & G_DEBUG_GPU) {
456 [rec popDebugGroup];
457 }
458}
459
460void MTLBatch::prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers)
461{
462
463 /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
464 */
465 MTLStateManager *state_manager = static_cast<MTLStateManager *>(
468 const MTLShaderInterface *interface = active_shader_->get_interface();
469 uint16_t attr_mask = interface->get_enabled_attribute_mask();
470
471 /* Reset vertex descriptor to default state. */
473
474 /* Fetch Vertex and Instance Buffers. */
475 Span<MTLVertBuf *> mtl_verts(reinterpret_cast<MTLVertBuf **>(this->verts),
477 Span<MTLVertBuf *> mtl_inst(reinterpret_cast<MTLVertBuf **>(this->inst),
479
480 /* Resolve Metal vertex buffer bindings. */
481 /* Vertex Descriptors
482 * ------------------
483 * Vertex Descriptors are required to generate a pipeline state, based on the current Batch's
484 * buffer bindings. These bindings are a unique matching, depending on what input attributes a
485 * batch has in its buffers, and those which are supported by the shader interface.
486 *
487 * We iterate through the buffers and resolve which attributes satisfy the requirements of the
488 * currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a
489 * VAO cache to avoid the need to recalculate this data. */
490 bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false};
491
492 VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface);
493 if (descriptor) {
494 desc.vertex_descriptor = descriptor->vertex_descriptor;
495 attr_mask = descriptor->attr_mask;
496 num_buffers = descriptor->num_buffers;
497
498 for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) {
499 if (descriptor->bufferIds[bid].used) {
500 if (descriptor->bufferIds[bid].is_instance) {
501 buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id];
502 buffer_is_instanced[bid] = true;
503 }
504 else {
505 buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id];
506 buffer_is_instanced[bid] = false;
507 }
508 }
509 }
510 }
511 else {
512 VertexDescriptorShaderInterfacePair pair{};
513 pair.interface = interface;
514
515 for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) {
516 pair.bufferIds[i].id = -1;
517 pair.bufferIds[i].is_instance = 0;
518 pair.bufferIds[i].used = 0;
519 }
520 /* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an
521 * attribute is found, rather than pre-setting the mask. */
522 /* Extract Instance attributes (These take highest priority). */
523 for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
524 if (mtl_inst[v]) {
525 MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p", mtl_inst[v]);
526 int buffer_ind = this->prepare_vertex_binding(
527 mtl_inst[v], desc, interface, attr_mask, true);
528 if (buffer_ind >= 0) {
529 buffers[buffer_ind] = mtl_inst[v];
530 buffer_is_instanced[buffer_ind] = true;
531
532 pair.bufferIds[buffer_ind].id = v;
533 pair.bufferIds[buffer_ind].used = 1;
534 pair.bufferIds[buffer_ind].is_instance = 1;
535 num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
536 }
537 }
538 }
539
540 /* Extract Vertex attributes (First-bound vertex buffer takes priority). */
541 for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
542 if (mtl_verts[v] != nullptr) {
543 MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p", mtl_verts[v]);
544 int buffer_ind = this->prepare_vertex_binding(
545 mtl_verts[v], desc, interface, attr_mask, false);
546 if (buffer_ind >= 0) {
547 buffers[buffer_ind] = mtl_verts[v];
548 buffer_is_instanced[buffer_ind] = false;
549
550 pair.bufferIds[buffer_ind].id = v;
551 pair.bufferIds[buffer_ind].used = 1;
552 pair.bufferIds[buffer_ind].is_instance = 0;
553 num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
554 }
555 }
556 }
557
558 /* Add to VertexDescriptor cache */
559 pair.attr_mask = attr_mask;
560 pair.vertex_descriptor = desc.vertex_descriptor;
561 pair.num_buffers = num_buffers;
562 if (!this->vao_cache.insert(pair)) {
563 printf(
564 "[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n",
566 }
567 }
568
569/* DEBUG: verify if our attribute bindings have been fully provided as expected. */
570#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
571 if (attr_mask != 0) {
572 /* Attributes are not necessarily contiguous. */
573 for (int i = 0; i < active_shader_->get_interface()->get_total_attributes(); i++) {
574 const MTLShaderInputAttribute &attr = active_shader_->get_interface()->get_attribute(i);
575 if (attr_mask & (1 << attr.location)) {
577 "Warning: Missing expected attribute '%s' with location: %u in shader %s (attr "
578 "number: %u)",
579 active_shader_->get_interface()->get_name_at_offset(attr.name_offset),
580 attr.location,
581 active_shader_->name_get(),
582 i);
583
584 /* If an attribute is not included, then format in vertex descriptor should be invalid due
585 * to nil assignment. */
586 BLI_assert(desc.vertex_descriptor.attributes[attr.location].format ==
587 MTLVertexFormatInvalid);
588 }
589 }
590 }
591#endif
592}
593
594void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
595{
596 BLI_assert(v_count > 0 && i_count > 0);
597
598 /* Setup RenderPipelineState for batch. */
599 MTLContext *ctx = MTLContext::get();
600 id<MTLRenderCommandEncoder> rec = this->bind();
601 if (rec == nil) {
602 /* End of draw. */
603 this->unbind(rec);
604 return;
605 }
606
607 /* Fetch IndexBuffer and resolve primitive type. */
608 MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
609 MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
610
611 /* Perform regular draw. */
612 if (mtl_elem == nullptr) {
613 /* Primitive Type toplogy emulation. */
614 if (mtl_needs_topology_emulation(this->prim_type)) {
615 /* Generate index buffer for primitive types requiring emulation. */
616 GPUPrimType emulated_prim_type = this->prim_type;
617 uint32_t emulated_v_count = v_count;
618 id<MTLBuffer> generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type,
619 emulated_v_count);
620 BLI_assert(generated_index_buffer != nil);
621
622 MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type);
623
624 /* Temp: Disable culling for emulated primitive types.
625 * TODO(Metal): Support face winding in topology buffer. */
626 [rec setCullMode:MTLCullModeNone];
627
628 if (generated_index_buffer != nil) {
629 BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle ||
630 emulated_mtl_prim_type == MTLPrimitiveTypeLine);
631 if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) {
632 BLI_assert(emulated_v_count % 3 == 0);
633 }
634 if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) {
635 BLI_assert(emulated_v_count % 2 == 0);
636 }
637
638 /* Set depth stencil state (requires knowledge of primitive type). */
639 ctx->ensure_depth_stencil_state(emulated_mtl_prim_type);
640
641 [rec drawIndexedPrimitives:emulated_mtl_prim_type
642 indexCount:emulated_v_count
643 indexType:MTLIndexTypeUInt32
644 indexBuffer:generated_index_buffer
645 indexBufferOffset:0
646 instanceCount:i_count
647 baseVertex:v_first
648 baseInstance:i_first];
649 }
650 else {
651 printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n",
652 this->prim_type);
653 }
654 }
655 else {
656 /* Set depth stencil state (requires knowledge of primitive type). */
657 ctx->ensure_depth_stencil_state(mtl_prim_type);
658
659 /* Issue draw call. */
660 [rec drawPrimitives:mtl_prim_type
661 vertexStart:v_first
662 vertexCount:v_count
663 instanceCount:i_count
664 baseInstance:i_first];
665 }
666 ctx->main_command_buffer.register_draw_counters(v_count * i_count);
667 }
668 /* Perform indexed draw. */
669 else {
670
671 MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
672 uint32_t base_index = mtl_elem->index_base_;
673 uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
674 uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size);
675 BLI_assert_msg((v_first_ofs % index_size) == 0,
676 "Index offset is not 2/4-byte aligned as per METAL spec");
677
678 /* Fetch index buffer. May return an index buffer of a differing format,
679 * if index buffer optimization is used. In these cases, final_prim_type and
680 * index_count get updated with the new properties. */
681 GPUPrimType final_prim_type = this->prim_type;
682 uint index_count = v_count;
683
684 id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
685 mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
686 BLI_assert(index_buffer != nil);
687
688 if (index_buffer != nil) {
689
690 /* Set depth stencil state (requires knowledge of primitive type). */
691 ctx->ensure_depth_stencil_state(mtl_prim_type);
692
693 /* Issue draw call. */
694 [rec drawIndexedPrimitives:mtl_prim_type
695 indexCount:index_count
696 indexType:index_type
697 indexBuffer:index_buffer
698 indexBufferOffset:v_first_ofs
699 instanceCount:i_count
700 baseVertex:base_index
701 baseInstance:i_first];
702 ctx->main_command_buffer.register_draw_counters(index_count * i_count);
703 }
704 else {
705 BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
706 }
707 }
708
709 /* End of draw. */
710 this->unbind(rec);
711}
712
713void MTLBatch::draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
714{
715 /* Setup RenderPipelineState for batch. */
716 MTLContext *ctx = MTLContext::get();
717 id<MTLRenderCommandEncoder> rec = this->bind();
718 if (rec == nil) {
719 printf("Failed to open Render Command encoder for DRAW INDIRECT\n");
720
721 /* End of draw. */
722 this->unbind(rec);
723 return;
724 }
725
726 /* Fetch indirect buffer Metal handle. */
727 MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(unwrap(indirect_buf));
728 id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
729 BLI_assert(mtl_indirect_buf != nil);
730 if (mtl_indirect_buf == nil) {
731 MTL_LOG_WARNING("Metal Indirect Draw Storage Buffer is nil.");
732
733 /* End of draw. */
734 this->unbind(rec);
735 return;
736 }
737
738 /* Unsupported primitive type check. */
739 BLI_assert_msg(this->prim_type != GPU_PRIM_TRI_FAN,
740 "TriangleFan is not supported in Metal for Indirect draws.");
741
742 /* Fetch IndexBuffer and resolve primitive type. */
743 MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
744 MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
745
746 if (mtl_needs_topology_emulation(this->prim_type)) {
747 BLI_assert_msg(false, "Metal Topology emulation unsupported for draw indirect.\n");
748
749 /* End of draw. */
750 this->unbind(rec);
751 return;
752 }
753
754 if (mtl_elem == nullptr) {
755 /* Set depth stencil state (requires knowledge of primitive type). */
756 ctx->ensure_depth_stencil_state(mtl_prim_type);
757
758 /* Issue draw call. */
759 [rec drawPrimitives:mtl_prim_type indirectBuffer:mtl_indirect_buf indirectBufferOffset:offset];
760 ctx->main_command_buffer.register_draw_counters(1);
761 }
762 else {
763 /* Fetch index buffer. May return an index buffer of a differing format,
764 * if index buffer optimization is used. In these cases, final_prim_type and
765 * index_count get updated with the new properties. */
766 MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
767 GPUPrimType final_prim_type = this->prim_type;
768 uint index_count = 0;
769
770 /* Disable index optimization for indirect draws. */
771 mtl_elem->flag_can_optimize(false);
772
773 id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
774 mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
775 BLI_assert(index_buffer != nil);
776
777 if (index_buffer != nil) {
778
779 /* Set depth stencil state (requires knowledge of primitive type). */
780 ctx->ensure_depth_stencil_state(mtl_prim_type);
781
782 /* Issue draw call. */
783 [rec drawIndexedPrimitives:mtl_prim_type
784 indexType:index_type
785 indexBuffer:index_buffer
786 indexBufferOffset:0
787 indirectBuffer:mtl_indirect_buf
788 indirectBufferOffset:offset];
789 ctx->main_command_buffer.register_draw_counters(1);
790 }
791 else {
792 BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
793 }
794 }
795
796 /* End of draw. */
797 this->unbind(rec);
798}
799
801
802/* -------------------------------------------------------------------- */
805
806id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type,
807 uint32_t &in_out_v_count)
808{
809
810 BLI_assert(in_out_v_count > 0);
811 /* Determine emulated primitive types. */
812 GPUPrimType input_prim_type = in_out_prim_type;
813 uint32_t v_count = in_out_v_count;
814 GPUPrimType output_prim_type;
815 switch (input_prim_type) {
816 case GPU_PRIM_POINTS:
817 case GPU_PRIM_LINES:
818 case GPU_PRIM_TRIS:
819 BLI_assert_msg(false, "Optimal primitive types should not reach here.");
820 return nil;
821 break;
824 BLI_assert_msg(false, "Adjacency primitive types should not reach here.");
825 return nil;
826 break;
830 output_prim_type = GPU_PRIM_LINES;
831 break;
833 case GPU_PRIM_TRI_FAN:
834 output_prim_type = GPU_PRIM_TRIS;
835 break;
836 default:
837 BLI_assert_msg(false, "Invalid primitive type.");
838 return nil;
839 }
840
841 /* Check if topology buffer exists and is valid. */
842 if (this->emulated_topology_buffer_ != nullptr &&
843 (emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count))
844 {
845
846 /* Release existing topology buffer. */
847 emulated_topology_buffer_->free();
848 emulated_topology_buffer_ = nullptr;
849 }
850
851 /* Generate new topology index buffer. */
852 if (this->emulated_topology_buffer_ == nullptr) {
853 /* Calculate IB len. */
854 uint32_t output_prim_count = 0;
855 switch (input_prim_type) {
858 output_prim_count = v_count - 1;
859 break;
861 output_prim_count = v_count;
862 break;
864 case GPU_PRIM_TRI_FAN:
865 output_prim_count = v_count - 2;
866 break;
867 default:
868 BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types.");
869 break;
870 }
871 uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2);
872
873 /* Allocate buffer. */
874 uint32_t buffer_bytes = output_IB_elems * 4;
875 BLI_assert(buffer_bytes > 0);
876 this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager()->allocate(
877 buffer_bytes, true);
878
879 /* Populate. */
880 uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr();
881 BLI_assert(data != nullptr);
882
883 /* TODO(Metal): Support inverse winding modes. */
884 bool winding_clockwise = false;
885 UNUSED_VARS(winding_clockwise);
886
887 switch (input_prim_type) {
888 /* Line Loop. */
889 case GPU_PRIM_LINE_LOOP: {
890 int line = 0;
891 for (line = 0; line < output_prim_count - 1; line++) {
892 data[line * 2 + 0] = line + 0;
893 data[line * 2 + 1] = line + 1;
894 }
895 /* Closing line. */
896 data[line * 2 + 0] = line + 0;
897 data[line * 2 + 1] = 0;
898 } break;
899
900 /* Triangle Fan. */
901 case GPU_PRIM_TRI_FAN: {
902 for (int triangle = 0; triangle < output_prim_count; triangle++) {
903 data[triangle * 3 + 0] = 0; /* Always 0 */
904 data[triangle * 3 + 1] = triangle + 1;
905 data[triangle * 3 + 2] = triangle + 2;
906 }
907 } break;
908
909 default:
910 BLI_assert_msg(false, "Other primitive types do not require emulation.");
911 return nil;
912 }
913
914 /* Flush. */
915 this->emulated_topology_buffer_->flush();
916 /* Assign members relating to current cached IB. */
917 topology_buffer_input_v_count_ = v_count;
918 topology_buffer_output_v_count_ = output_IB_elems;
919 emulated_topology_type_ = input_prim_type;
920 }
921
922 /* Return. */
923 in_out_v_count = topology_buffer_output_v_count_;
924 in_out_prim_type = output_prim_type;
925 return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil;
926}
927
929
930} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int max_ii(int a, int b)
unsigned int uint
#define UNUSED_VARS(...)
static constexpr int GPU_BATCH_VBO_MAX_LEN
Definition GPU_batch.hh:31
static constexpr int GPU_BATCH_INST_VBO_MAX_LEN
Definition GPU_batch.hh:32
@ GPU_BATCH_DIRTY
Definition GPU_batch.hh:57
GPUPrimType
@ GPU_PRIM_TRI_FAN
@ GPU_PRIM_LINE_LOOP
@ GPU_PRIM_LINE_STRIP_ADJ
@ GPU_PRIM_TRIS_ADJ
@ GPU_PRIM_LINES
@ GPU_PRIM_POINTS
@ GPU_PRIM_LINES_ADJ
@ GPU_PRIM_LINE_STRIP
@ GPU_PRIM_TRI_STRIP
@ GPU_PRIM_TRIS
BLI_INLINE const char * GPU_vertformat_attr_name_get(const GPUVertFormat *format, const GPUVertAttr *attr, uint n_idx)
GPUVertFetchMode
@ GPU_FETCH_FLOAT
@ GPU_FETCH_INT
GPUVertCompType
@ GPU_COMP_F32
BMesh const char void * data
ATTR_WARN_UNUSED_RESULT const BMVert * v
id< MTLRenderCommandEncoder > bind()
Definition mtl_batch.mm:340
void draw(int v_first, int v_count, int i_first, int i_count) override
Definition mtl_batch.mm:35
void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override
Definition mtl_batch.mm:40
void unbind(id< MTLRenderCommandEncoder > rec)
Definition mtl_batch.mm:452
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
MTLRenderPassState & get_render_pass_state()
bool ensure_render_pipeline_state(MTLPrimitiveType prim_type)
id< MTLRenderCommandEncoder > ensure_begin_render_pass()
static MTLContext * get()
MTLCommandBufferManager main_command_buffer
static MTLBufferPool * get_global_memory_manager()
static MTLIndexType gpu_index_type_to_metal(GPUIndexBufType type)
void bind_vertex_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
MTLRenderPipelineStateDescriptor & get_pipeline_descriptor()
Definition mtl_state.hh:59
static float verts[][3]
#define input
#define interface
#define printf(...)
format
descriptor
#define G(x, y, z)
#define GPU_VAO_STATIC_LEN
Definition mtl_batch.hh:26
#define MTL_MAX_VERTEX_INPUT_ATTRIBUTES
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:49
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:34
MTLVertexFormat format_resize_comp(MTLVertexFormat mtl_format, uint32_t components)
static Context * unwrap(GPUContext *ctx)
static MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type)
bool mtl_convert_vertex_format(MTLVertexFormat shader_attr_format, GPUVertCompType component_type, uint32_t component_len, GPUVertFetchMode fetch_mode, MTLVertexFormat *r_convertedFormat)
static bool mtl_needs_topology_emulation(GPUPrimType prim_type)
MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]
i
Definition text_draw.cc:230
char * buffers[2]