Blender V4.3
bvh.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include "scene/hair.h"
8# include "scene/mesh.h"
9# include "scene/object.h"
10# include "scene/pointcloud.h"
11
12# include "util/progress.h"
13
14# include "device/metal/bvh.h"
15# include "device/metal/util.h"
16
18
19# define BVH_status(...) \
20 { \
21 string str = string_printf(__VA_ARGS__); \
22 progress.set_substatus(str); \
23 metal_printf("%s\n", str.c_str()); \
24 }
25
26// # define BVH_THROTTLE_DIAGNOSTICS
27# ifdef BVH_THROTTLE_DIAGNOSTICS
28# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
29# else
30# define bvh_throttle_printf(...)
31# endif
32
33/* Limit the number of concurrent BVH builds so that we don't approach unsafe GPU working set
34 * sizes. */
35struct BVHMetalBuildThrottler {
37 size_t wired_memory = 0;
38 size_t safe_wired_limit = 0;
39 int requests_in_flight = 0;
40
41 BVHMetalBuildThrottler()
42 {
43 /* The default device will always be the one that supports MetalRT if the machine supports it.
44 */
45 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
46
47 /* Set a conservative limit, but which will still only throttle in extreme cases. */
48 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
49 bvh_throttle_printf("safe_wired_limit = %zu\n", safe_wired_limit);
50 }
51
52 /* Block until we're safely able to wire the requested resources. */
53 void acquire(size_t bytes_to_be_wired)
54 {
55 bool throttled = false;
56 while (true) {
57 {
59
60 /* Always allow a BVH build to proceed if no other is in flight, otherwise
61 * only proceed if we're within safe limits. */
62 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
63 wired_memory += bytes_to_be_wired;
64 requests_in_flight += 1;
65 bvh_throttle_printf("acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
66 requests_in_flight,
67 wired_memory);
68 return;
69 }
70
71 if (!throttled) {
72 bvh_throttle_printf(
73 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
74 "bytes_to_be_wired = %zu)\n",
75 requests_in_flight,
76 wired_memory,
77 bytes_to_be_wired);
78 }
79 throttled = true;
80 }
81
82 std::this_thread::sleep_for(std::chrono::milliseconds(10));
83 }
84 }
85
86 /* Notify of resources that have stopped being wired. */
87 void release(size_t bytes_just_unwired)
88 {
90 wired_memory -= bytes_just_unwired;
91 requests_in_flight -= 1;
92 bvh_throttle_printf("release (requests_in_flight = %d, wired_memory = %zu)\n",
93 requests_in_flight,
94 wired_memory);
95 }
96
97 /* Wait for all outstanding work to finish. */
98 void wait_for_all()
99 {
100 while (true) {
101 {
103 if (wired_memory == 0) {
104 return;
105 }
106 }
107 std::this_thread::sleep_for(std::chrono::milliseconds(10));
108 }
109 }
110} g_bvh_build_throttler;
111
112BVHMetal::BVHMetal(const BVHParams &params_,
113 const vector<Geometry *> &geometry_,
114 const vector<Object *> &objects_,
115 Device *device)
116 : BVH(params_, geometry_, objects_), device(device)
117{
118}
119
120BVHMetal::~BVHMetal()
121{
122 if (@available(macos 12.0, *)) {
123 set_accel_struct(nil);
124 if (null_BLAS) {
125 [null_BLAS release];
126 }
127 }
128}
129
130API_AVAILABLE(macos(11.0))
131void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
132{
133 if (@available(macos 12.0, *)) {
134 if (accel_struct) {
135 device->stats.mem_free(accel_struct.allocatedSize);
136 [accel_struct release];
137 accel_struct = nil;
138 }
139
140 if (new_accel_struct) {
141 accel_struct = new_accel_struct;
142 device->stats.mem_alloc(accel_struct.allocatedSize);
143 }
144 }
145}
146
147bool BVHMetal::build_BLAS_mesh(Progress &progress,
148 id<MTLDevice> mtl_device,
149 id<MTLCommandQueue> queue,
150 Geometry *const geom,
151 bool refit)
152{
153 if (@available(macos 12.0, *)) {
154 /* Build BLAS for triangle primitives */
155 Mesh *const mesh = static_cast<Mesh *const>(geom);
156 if (mesh->num_triangles() == 0) {
157 return false;
158 }
159
160 /*------------------------------------------------*/
161 BVH_status(
162 "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
163 /*------------------------------------------------*/
164
165 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
166
167 const array<float3> &verts = mesh->get_verts();
168 const array<int> &tris = mesh->get_triangles();
169 const size_t num_verts = verts.size();
170 const size_t num_indices = tris.size();
171
172 size_t num_motion_steps = 1;
174 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
175 num_motion_steps = mesh->get_motion_steps();
176 }
177
178 MTLResourceOptions storage_mode;
179 if (mtl_device.hasUnifiedMemory) {
180 storage_mode = MTLResourceStorageModeShared;
181 }
182 else {
183 storage_mode = MTLResourceStorageModeManaged;
184 }
185
186 /* Upload the mesh data to the GPU */
187 id<MTLBuffer> posBuf = nil;
188 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
189 length:num_indices * sizeof(tris.data()[0])
190 options:storage_mode];
191
192 if (num_motion_steps == 1) {
193 posBuf = [mtl_device newBufferWithBytes:verts.data()
194 length:num_verts * sizeof(verts.data()[0])
195 options:storage_mode];
196 }
197 else {
198 posBuf = [mtl_device
199 newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
200 options:storage_mode];
201 float3 *dest_data = (float3 *)[posBuf contents];
202 size_t center_step = (num_motion_steps - 1) / 2;
203 for (size_t step = 0; step < num_motion_steps; ++step) {
204 const float3 *verts = mesh->get_verts().data();
205
206 /* The center step for motion vertices is not stored in the attribute. */
207 if (step != center_step) {
208 verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
209 }
210 memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
211 }
212 if (storage_mode == MTLResourceStorageModeManaged) {
213 [posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
214 }
215 }
216
217 /* Create an acceleration structure. */
218 MTLAccelerationStructureGeometryDescriptor *geomDesc;
219 if (num_motion_steps > 1) {
220 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
221 vertex_ptrs.reserve(num_motion_steps);
222 for (size_t step = 0; step < num_motion_steps; ++step) {
223 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
224 k.buffer = posBuf;
225 k.offset = num_verts * step * sizeof(float3);
226 vertex_ptrs.push_back(k);
227 }
228
229 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
230 [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
231 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
232 count:vertex_ptrs.size()];
233 geomDescMotion.vertexStride = sizeof(verts.data()[0]);
234 geomDescMotion.indexBuffer = indexBuf;
235 geomDescMotion.indexBufferOffset = 0;
236 geomDescMotion.indexType = MTLIndexTypeUInt32;
237 geomDescMotion.triangleCount = num_indices / 3;
238 geomDescMotion.intersectionFunctionTableOffset = 0;
239 geomDescMotion.opaque = true;
240
241 geomDesc = geomDescMotion;
242 }
243 else {
244 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
245 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
246 geomDescNoMotion.vertexBuffer = posBuf;
247 geomDescNoMotion.vertexBufferOffset = 0;
248 geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
249 geomDescNoMotion.indexBuffer = indexBuf;
250 geomDescNoMotion.indexBufferOffset = 0;
251 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
252 geomDescNoMotion.triangleCount = num_indices / 3;
253 geomDescNoMotion.intersectionFunctionTableOffset = 0;
254 geomDescNoMotion.opaque = true;
255
256 geomDesc = geomDescNoMotion;
257 }
258
259 /* Force a single any-hit call, so shadow record-all behavior works correctly */
260 /* (Match optix behavior: unsigned int build_flags =
261 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
262 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
263
264 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
265 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
266 accelDesc.geometryDescriptors = @[ geomDesc ];
267 if (num_motion_steps > 1) {
268 accelDesc.motionStartTime = 0.0f;
269 accelDesc.motionEndTime = 1.0f;
270 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
271 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
272 accelDesc.motionKeyframeCount = num_motion_steps;
273 }
274 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
275
276 if (!use_fast_trace_bvh) {
277 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
278 MTLAccelerationStructureUsagePreferFastBuild);
279 }
280
281 MTLAccelerationStructureSizes accelSizes = [mtl_device
282 accelerationStructureSizesWithDescriptor:accelDesc];
283 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
284 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
285 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
286 options:MTLResourceStorageModePrivate];
287 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
288 options:MTLResourceStorageModeShared];
289 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
290 id<MTLAccelerationStructureCommandEncoder> accelEnc =
291 [accelCommands accelerationStructureCommandEncoder];
292 if (refit) {
293 [accelEnc refitAccelerationStructure:accel_struct
294 descriptor:accelDesc
295 destination:accel_uncompressed
296 scratchBuffer:scratchBuf
297 scratchBufferOffset:0];
298 }
299 else {
300 [accelEnc buildAccelerationStructure:accel_uncompressed
301 descriptor:accelDesc
302 scratchBuffer:scratchBuf
303 scratchBufferOffset:0];
304 }
305 if (use_fast_trace_bvh) {
306 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
307 toBuffer:sizeBuf
308 offset:0
309 sizeDataType:MTLDataTypeULong];
310 }
311 [accelEnc endEncoding];
312
313 /* Estimated size of resources that will be wired for the GPU accelerated build.
314 * Acceleration-struct size is doubled to account for possible compaction step. */
315 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
316 accel_uncompressed.allocatedSize * 2;
317
318 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
319 /* free temp resources */
320 [scratchBuf release];
321 [indexBuf release];
322 [posBuf release];
323
324 if (use_fast_trace_bvh) {
325 /* Compact the accel structure */
326 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
327
328 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
329 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
330 id<MTLAccelerationStructureCommandEncoder> accelEnc =
331 [accelCommands accelerationStructureCommandEncoder];
332 id<MTLAccelerationStructure> accel = [mtl_device
333 newAccelerationStructureWithSize:compressed_size];
334 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
335 toAccelerationStructure:accel];
336 [accelEnc endEncoding];
337 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
338 set_accel_struct(accel);
339 [accel_uncompressed release];
340
341 /* Signal that we've finished doing GPU acceleration struct build. */
342 g_bvh_build_throttler.release(wired_size);
343 }];
344 [accelCommands commit];
345 });
346 }
347 else {
348 /* set our acceleration structure to the uncompressed structure */
349 set_accel_struct(accel_uncompressed);
350
351 /* Signal that we've finished doing GPU acceleration struct build. */
352 g_bvh_build_throttler.release(wired_size);
353 }
354
355 [sizeBuf release];
356 }];
357
358 /* Wait until it's safe to proceed with GPU acceleration struct build. */
359 g_bvh_build_throttler.acquire(wired_size);
360 [accelCommands commit];
361
362 return true;
363 }
364 return false;
365}
366
367bool BVHMetal::build_BLAS_hair(Progress &progress,
368 id<MTLDevice> mtl_device,
369 id<MTLCommandQueue> queue,
370 Geometry *const geom,
371 bool refit)
372{
373# if defined(MAC_OS_VERSION_14_0)
374 if (@available(macos 14.0, *)) {
375 /* Build BLAS for hair curves */
376 Hair *hair = static_cast<Hair *>(geom);
377 if (hair->num_curves() == 0) {
378 return false;
379 }
380
381 /*------------------------------------------------*/
382 BVH_status(
383 "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
384 /*------------------------------------------------*/
385
386 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
387
388 size_t num_motion_steps = 1;
390 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
391 num_motion_steps = hair->get_motion_steps();
392 }
393
394 MTLResourceOptions storage_mode;
395 if (mtl_device.hasUnifiedMemory) {
396 storage_mode = MTLResourceStorageModeShared;
397 }
398 else {
399 storage_mode = MTLResourceStorageModeManaged;
400 }
401
402 id<MTLBuffer> cpBuffer = nil;
403 id<MTLBuffer> radiusBuffer = nil;
404 id<MTLBuffer> idxBuffer = nil;
405
406 MTLAccelerationStructureGeometryDescriptor *geomDesc;
407 if (motion_blur) {
408 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
409 [MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor];
410
411 uint64_t numKeys = hair->num_keys();
412 uint64_t numCurves = hair->num_curves();
413 const array<float> &radiuses = hair->get_curve_radius();
414
415 /* Gather the curve geometry. */
416 std::vector<float3> cpData;
417 std::vector<int> idxData;
418 std::vector<float> radiusData;
419 cpData.reserve(numKeys);
420 radiusData.reserve(numKeys);
421
422 std::vector<int> step_offsets;
423 for (size_t step = 0; step < num_motion_steps; ++step) {
424
425 /* The center step for motion vertices is not stored in the attribute. */
426 const float3 *keys = hair->get_curve_keys().data();
427 size_t center_step = (num_motion_steps - 1) / 2;
428 if (step != center_step) {
429 size_t attr_offset = (step > center_step) ? step - 1 : step;
430 /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
431 keys = motion_keys->data_float3() + attr_offset * numKeys;
432 }
433
434 step_offsets.push_back(cpData.size());
435
436 for (int c = 0; c < numCurves; ++c) {
437 const Hair::Curve curve = hair->get_curve(c);
438 int segCount = curve.num_segments();
439 int firstKey = curve.first_key;
440 uint64_t idxBase = cpData.size();
441 cpData.push_back(keys[firstKey]);
442 radiusData.push_back(radiuses[firstKey]);
443 for (int s = 0; s < segCount; ++s) {
444 if (step == 0) {
445 idxData.push_back(idxBase + s);
446 }
447 cpData.push_back(keys[firstKey + s]);
448 radiusData.push_back(radiuses[firstKey + s]);
449 }
450 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
451 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
452 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
453 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
454 }
455 }
456
457 /* Allocate and populate MTLBuffers for geometry. */
458 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
459 length:idxData.size() * sizeof(int)
460 options:storage_mode];
461
462 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
463 length:cpData.size() * sizeof(float3)
464 options:storage_mode];
465
466 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
467 length:radiusData.size() * sizeof(float)
468 options:storage_mode];
469
470 std::vector<MTLMotionKeyframeData *> cp_ptrs;
471 std::vector<MTLMotionKeyframeData *> radius_ptrs;
472 cp_ptrs.reserve(num_motion_steps);
473 radius_ptrs.reserve(num_motion_steps);
474
475 for (size_t step = 0; step < num_motion_steps; ++step) {
476 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
477 k.buffer = cpBuffer;
478 k.offset = step_offsets[step] * sizeof(float3);
479 cp_ptrs.push_back(k);
480
481 k = [MTLMotionKeyframeData data];
482 k.buffer = radiusBuffer;
483 k.offset = step_offsets[step] * sizeof(float);
484 radius_ptrs.push_back(k);
485 }
486
487 if (storage_mode == MTLResourceStorageModeManaged) {
488 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
489 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
490 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
491 }
492
493 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
494 count:cp_ptrs.size()];
495 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
496 count:radius_ptrs.size()];
497
498 geomDescCrv.controlPointCount = cpData.size();
499 geomDescCrv.controlPointStride = sizeof(float3);
500 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
501 geomDescCrv.radiusStride = sizeof(float);
502 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
503 geomDescCrv.segmentCount = idxData.size();
504 geomDescCrv.segmentControlPointCount = 4;
505 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
506 MTLCurveTypeRound;
507 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
508 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
509 geomDescCrv.indexType = MTLIndexTypeUInt32;
510 geomDescCrv.indexBuffer = idxBuffer;
511 geomDescCrv.intersectionFunctionTableOffset = 1;
512
513 /* Force a single any-hit call, so shadow record-all behavior works correctly */
514 /* (Match optix behavior: unsigned int build_flags =
515 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
516 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
517 geomDescCrv.opaque = true;
518 geomDesc = geomDescCrv;
519 }
520 else {
521 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
522 [MTLAccelerationStructureCurveGeometryDescriptor descriptor];
523
524 uint64_t numKeys = hair->num_keys();
525 uint64_t numCurves = hair->num_curves();
526 const array<float> &radiuses = hair->get_curve_radius();
527
528 /* Gather the curve geometry. */
529 std::vector<float3> cpData;
530 std::vector<int> idxData;
531 std::vector<float> radiusData;
532 cpData.reserve(numKeys);
533 radiusData.reserve(numKeys);
534 auto keys = hair->get_curve_keys();
535 for (int c = 0; c < numCurves; ++c) {
536 const Hair::Curve curve = hair->get_curve(c);
537 int segCount = curve.num_segments();
538 int firstKey = curve.first_key;
539 radiusData.push_back(radiuses[firstKey]);
540 uint64_t idxBase = cpData.size();
541 cpData.push_back(keys[firstKey]);
542 for (int s = 0; s < segCount; ++s) {
543 idxData.push_back(idxBase + s);
544 cpData.push_back(keys[firstKey + s]);
545 radiusData.push_back(radiuses[firstKey + s]);
546 }
547 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
548 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
549 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
550 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
551 }
552
553 /* Allocate and populate MTLBuffers for geometry. */
554 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
555 length:idxData.size() * sizeof(int)
556 options:storage_mode];
557
558 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
559 length:cpData.size() * sizeof(float3)
560 options:storage_mode];
561
562 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
563 length:radiusData.size() * sizeof(float)
564 options:storage_mode];
565
566 if (storage_mode == MTLResourceStorageModeManaged) {
567 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
568 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
569 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
570 }
571 geomDescCrv.controlPointBuffer = cpBuffer;
572 geomDescCrv.radiusBuffer = radiusBuffer;
573 geomDescCrv.controlPointCount = cpData.size();
574 geomDescCrv.controlPointStride = sizeof(float3);
575 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
576 geomDescCrv.controlPointBufferOffset = 0;
577 geomDescCrv.segmentCount = idxData.size();
578 geomDescCrv.segmentControlPointCount = 4;
579 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
580 MTLCurveTypeRound;
581 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
582 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
583 geomDescCrv.indexType = MTLIndexTypeUInt32;
584 geomDescCrv.indexBuffer = idxBuffer;
585 geomDescCrv.intersectionFunctionTableOffset = 1;
586
587 /* Force a single any-hit call, so shadow record-all behavior works correctly */
588 /* (Match optix behavior: unsigned int build_flags =
589 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
590 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
591 geomDescCrv.opaque = true;
592 geomDesc = geomDescCrv;
593 }
594
595 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
596 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
597 accelDesc.geometryDescriptors = @[ geomDesc ];
598
599 if (motion_blur) {
600 accelDesc.motionStartTime = 0.0f;
601 accelDesc.motionEndTime = 1.0f;
602 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
603 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
604 accelDesc.motionKeyframeCount = num_motion_steps;
605 }
606
607 if (!use_fast_trace_bvh) {
608 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
609 MTLAccelerationStructureUsagePreferFastBuild);
610 }
611 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
612
613 MTLAccelerationStructureSizes accelSizes = [mtl_device
614 accelerationStructureSizesWithDescriptor:accelDesc];
615 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
616 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
617 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
618 options:MTLResourceStorageModePrivate];
619 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
620 options:MTLResourceStorageModeShared];
621 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
622 id<MTLAccelerationStructureCommandEncoder> accelEnc =
623 [accelCommands accelerationStructureCommandEncoder];
624 if (refit) {
625 [accelEnc refitAccelerationStructure:accel_struct
626 descriptor:accelDesc
627 destination:accel_uncompressed
628 scratchBuffer:scratchBuf
629 scratchBufferOffset:0];
630 }
631 else {
632 [accelEnc buildAccelerationStructure:accel_uncompressed
633 descriptor:accelDesc
634 scratchBuffer:scratchBuf
635 scratchBufferOffset:0];
636 }
637 if (use_fast_trace_bvh) {
638 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
639 toBuffer:sizeBuf
640 offset:0
641 sizeDataType:MTLDataTypeULong];
642 }
643 [accelEnc endEncoding];
644
645 /* Estimated size of resources that will be wired for the GPU accelerated build.
646 * Acceleration-struct size is doubled to account for possible compaction step. */
647 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
648 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
649 accel_uncompressed.allocatedSize * 2;
650
651 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
652 /* free temp resources */
653 [scratchBuf release];
654 [cpBuffer release];
655 [radiusBuffer release];
656 [idxBuffer release];
657
658 if (use_fast_trace_bvh) {
659 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
660
661 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
662 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
663 id<MTLAccelerationStructureCommandEncoder> accelEnc =
664 [accelCommands accelerationStructureCommandEncoder];
665 id<MTLAccelerationStructure> accel = [mtl_device
666 newAccelerationStructureWithSize:compressed_size];
667 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
668 toAccelerationStructure:accel];
669 [accelEnc endEncoding];
670 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
671 set_accel_struct(accel);
672 [accel_uncompressed release];
673
674 /* Signal that we've finished doing GPU acceleration struct build. */
675 g_bvh_build_throttler.release(wired_size);
676 }];
677 [accelCommands commit];
678 });
679 }
680 else {
681 /* set our acceleration structure to the uncompressed structure */
682 set_accel_struct(accel_uncompressed);
683
684 /* Signal that we've finished doing GPU acceleration struct build. */
685 g_bvh_build_throttler.release(wired_size);
686 }
687
688 [sizeBuf release];
689 }];
690
691 /* Wait until it's safe to proceed with GPU acceleration struct build. */
692 g_bvh_build_throttler.acquire(wired_size);
693 [accelCommands commit];
694
695 return true;
696 }
697# else /* MAC_OS_VERSION_14_0 */
698 (void)progress;
699 (void)mtl_device;
700 (void)queue;
701 (void)geom;
702 (void)(refit);
703# endif /* MAC_OS_VERSION_14_0 */
704 return false;
705}
706
707bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
708 id<MTLDevice> mtl_device,
709 id<MTLCommandQueue> queue,
710 Geometry *const geom,
711 bool refit)
712{
713 if (@available(macos 12.0, *)) {
714 /* Build BLAS for point cloud */
715 PointCloud *pointcloud = static_cast<PointCloud *>(geom);
716 if (pointcloud->num_points() == 0) {
717 return false;
718 }
719
720 /*------------------------------------------------*/
721 BVH_status("Building pointcloud BLAS | %7d points | %s",
722 (int)pointcloud->num_points(),
723 geom->name.c_str());
724 /*------------------------------------------------*/
725
726 const size_t num_points = pointcloud->get_points().size();
727 const float3 *points = pointcloud->get_points().data();
728 const float *radius = pointcloud->get_radius().data();
729
730 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
731
732 size_t num_motion_steps = 1;
733 Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
734 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
735 num_motion_steps = pointcloud->get_motion_steps();
736 }
737
738 const size_t num_aabbs = num_motion_steps * num_points;
739
740 MTLResourceOptions storage_mode;
741 if (mtl_device.hasUnifiedMemory) {
742 storage_mode = MTLResourceStorageModeShared;
743 }
744 else {
745 storage_mode = MTLResourceStorageModeManaged;
746 }
747
748 /* Allocate a GPU buffer for the AABB data and populate it */
749 id<MTLBuffer> aabbBuf = [mtl_device
750 newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
751 options:storage_mode];
752 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
753
754 /* Get AABBs for each motion step */
755 size_t center_step = (num_motion_steps - 1) / 2;
756 for (size_t step = 0; step < num_motion_steps; ++step) {
757 if (step == center_step) {
758 /* The center step for motion vertices is not stored in the attribute */
759 for (size_t j = 0; j < num_points; ++j) {
760 const PointCloud::Point point = pointcloud->get_point(j);
762 point.bounds_grow(points, radius, bounds);
763
764 const size_t index = step * num_points + j;
765 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
766 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
767 }
768 }
769 else {
770 size_t attr_offset = (step > center_step) ? step - 1 : step;
771 float4 *motion_points = motion_keys->data_float4() + attr_offset * num_points;
772
773 for (size_t j = 0; j < num_points; ++j) {
774 const PointCloud::Point point = pointcloud->get_point(j);
776 point.bounds_grow(motion_points[j], bounds);
777
778 const size_t index = step * num_points + j;
779 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
780 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
781 }
782 }
783 }
784
785 if (storage_mode == MTLResourceStorageModeManaged) {
786 [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
787 }
788
789# if 0
790 for (size_t i=0; i<num_aabbs && i < 400; i++) {
791 MTLAxisAlignedBoundingBox& bb = aabb_data[i];
792 printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
793 }
794# endif
795
796 MTLAccelerationStructureGeometryDescriptor *geomDesc;
797 if (motion_blur) {
798 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
799 aabb_ptrs.reserve(num_motion_steps);
800 for (size_t step = 0; step < num_motion_steps; ++step) {
801 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
802 k.buffer = aabbBuf;
803 k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox);
804 aabb_ptrs.push_back(k);
805 }
806
807 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
808 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
809 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
810 count:aabb_ptrs.size()];
811 geomDescMotion.boundingBoxCount = num_points;
812 geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
813 geomDescMotion.intersectionFunctionTableOffset = 2;
814
815 /* Force a single any-hit call, so shadow record-all behavior works correctly */
816 /* (Match optix behavior: unsigned int build_flags =
817 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
818 geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
819 geomDescMotion.opaque = true;
820 geomDesc = geomDescMotion;
821 }
822 else {
823 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
824 [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
825 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
826 geomDescNoMotion.boundingBoxBufferOffset = 0;
827 geomDescNoMotion.boundingBoxCount = int(num_aabbs);
828 geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
829 geomDescNoMotion.intersectionFunctionTableOffset = 2;
830
831 /* Force a single any-hit call, so shadow record-all behavior works correctly */
832 /* (Match optix behavior: unsigned int build_flags =
833 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
834 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
835 geomDescNoMotion.opaque = true;
836 geomDesc = geomDescNoMotion;
837 }
838
839 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
840 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
841 accelDesc.geometryDescriptors = @[ geomDesc ];
842
843 if (motion_blur) {
844 accelDesc.motionStartTime = 0.0f;
845 accelDesc.motionEndTime = 1.0f;
846 // accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
847 // accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
848 accelDesc.motionKeyframeCount = num_motion_steps;
849 }
850 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
851
852 if (!use_fast_trace_bvh) {
853 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
854 MTLAccelerationStructureUsagePreferFastBuild);
855 }
856
857 MTLAccelerationStructureSizes accelSizes = [mtl_device
858 accelerationStructureSizesWithDescriptor:accelDesc];
859 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
860 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
861 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
862 options:MTLResourceStorageModePrivate];
863 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
864 options:MTLResourceStorageModeShared];
865 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
866 id<MTLAccelerationStructureCommandEncoder> accelEnc =
867 [accelCommands accelerationStructureCommandEncoder];
868 if (refit) {
869 [accelEnc refitAccelerationStructure:accel_struct
870 descriptor:accelDesc
871 destination:accel_uncompressed
872 scratchBuffer:scratchBuf
873 scratchBufferOffset:0];
874 }
875 else {
876 [accelEnc buildAccelerationStructure:accel_uncompressed
877 descriptor:accelDesc
878 scratchBuffer:scratchBuf
879 scratchBufferOffset:0];
880 }
881 if (use_fast_trace_bvh) {
882 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
883 toBuffer:sizeBuf
884 offset:0
885 sizeDataType:MTLDataTypeULong];
886 }
887 [accelEnc endEncoding];
888
889 /* Estimated size of resources that will be wired for the GPU accelerated build.
890 * Acceleration-struct size is doubled to account for possible compaction step. */
891 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
892 accel_uncompressed.allocatedSize * 2;
893
894 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
895 /* free temp resources */
896 [scratchBuf release];
897 [aabbBuf release];
898
899 if (use_fast_trace_bvh) {
900 /* Compact the accel structure */
901 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
902
903 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
904 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
905 id<MTLAccelerationStructureCommandEncoder> accelEnc =
906 [accelCommands accelerationStructureCommandEncoder];
907 id<MTLAccelerationStructure> accel = [mtl_device
908 newAccelerationStructureWithSize:compressed_size];
909 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
910 toAccelerationStructure:accel];
911 [accelEnc endEncoding];
912 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
913 set_accel_struct(accel);
914 [accel_uncompressed release];
915
916 /* Signal that we've finished doing GPU acceleration struct build. */
917 g_bvh_build_throttler.release(wired_size);
918 }];
919 [accelCommands commit];
920 });
921 }
922 else {
923 /* set our acceleration structure to the uncompressed structure */
924 set_accel_struct(accel_uncompressed);
925
926 /* Signal that we've finished doing GPU acceleration struct build. */
927 g_bvh_build_throttler.release(wired_size);
928 }
929
930 [sizeBuf release];
931 }];
932
933 /* Wait until it's safe to proceed with GPU acceleration struct build. */
934 g_bvh_build_throttler.acquire(wired_size);
935 [accelCommands commit];
936 return true;
937 }
938 return false;
939}
940
941bool BVHMetal::build_BLAS(Progress &progress,
942 id<MTLDevice> mtl_device,
943 id<MTLCommandQueue> queue,
944 bool refit)
945{
946 assert(objects.size() == 1 && geometry.size() == 1);
947
948 /* Build bottom level acceleration structures (BLAS) */
949 Geometry *const geom = geometry[0];
950 switch (geom->geometry_type) {
951 case Geometry::VOLUME:
952 case Geometry::MESH:
953 return build_BLAS_mesh(progress, mtl_device, queue, geom, refit);
954 case Geometry::HAIR:
955 return build_BLAS_hair(progress, mtl_device, queue, geom, refit);
957 return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit);
958 default:
959 return false;
960 }
961 return false;
962}
963
964bool BVHMetal::build_TLAS(Progress &progress,
965 id<MTLDevice> mtl_device,
966 id<MTLCommandQueue> queue,
967 bool refit)
968{
969 /* Wait for all BLAS builds to finish. */
970 g_bvh_build_throttler.wait_for_all();
971
972 if (@available(macos 12.0, *)) {
973 /* Defined inside available check, for return type to be available. */
974 auto make_null_BLAS = [](id<MTLDevice> mtl_device,
975 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
976 MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
977 if (mtl_device.hasUnifiedMemory) {
978 storage_mode = MTLResourceStorageModeShared;
979 }
980
981 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3) options:storage_mode];
982
983 /* Create an acceleration structure. */
984 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
985 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
986 geomDesc.vertexBuffer = nullBuf;
987 geomDesc.vertexBufferOffset = 0;
988 geomDesc.vertexStride = sizeof(float3);
989 geomDesc.indexBuffer = nullBuf;
990 geomDesc.indexBufferOffset = 0;
991 geomDesc.indexType = MTLIndexTypeUInt32;
992 geomDesc.triangleCount = 0;
993 geomDesc.intersectionFunctionTableOffset = 0;
994 geomDesc.opaque = true;
995 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
996
997 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
998 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
999 accelDesc.geometryDescriptors = @[ geomDesc ];
1000 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1001
1002 MTLAccelerationStructureSizes accelSizes = [mtl_device
1003 accelerationStructureSizesWithDescriptor:accelDesc];
1004 id<MTLAccelerationStructure> accel_struct = [mtl_device
1005 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1006 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1007 options:MTLResourceStorageModePrivate];
1008 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1009 options:MTLResourceStorageModeShared];
1010 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1011 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1012 [accelCommands accelerationStructureCommandEncoder];
1013 [accelEnc buildAccelerationStructure:accel_struct
1014 descriptor:accelDesc
1015 scratchBuffer:scratchBuf
1016 scratchBufferOffset:0];
1017 [accelEnc endEncoding];
1018 [accelCommands commit];
1019 [accelCommands waitUntilCompleted];
1020
1021 /* free temp resources */
1022 [scratchBuf release];
1023 [nullBuf release];
1024 [sizeBuf release];
1025
1026 return accel_struct;
1027 };
1028
1029 uint32_t num_instances = 0;
1030 uint32_t num_motion_transforms = 0;
1031 for (Object *ob : objects) {
1032 num_instances++;
1033
1034 if (ob->use_motion()) {
1035 num_motion_transforms += max((size_t)1, ob->get_motion().size());
1036 }
1037 else {
1038 num_motion_transforms++;
1039 }
1040 }
1041
1042 if (num_instances == 0) {
1043 return false;
1044 }
1045
1046 /*------------------------------------------------*/
1047 BVH_status("Building TLAS | %7d instances", (int)num_instances);
1048 /*------------------------------------------------*/
1049
1050 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
1051
1052 NSMutableArray *all_blas = [NSMutableArray array];
1053 unordered_map<BVHMetal const *, int> instance_mapping;
1054
1055 /* Lambda function to build/retrieve the BLAS index mapping */
1056 auto get_blas_index = [&](BVHMetal const *blas) {
1057 auto it = instance_mapping.find(blas);
1058 if (it != instance_mapping.end()) {
1059 return it->second;
1060 }
1061 else {
1062 int blas_index = (int)[all_blas count];
1063 instance_mapping[blas] = blas_index;
1064 if (@available(macos 12.0, *)) {
1065 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1066 }
1067 return blas_index;
1068 }
1069 };
1070
1071 MTLResourceOptions storage_mode;
1072 if (mtl_device.hasUnifiedMemory) {
1073 storage_mode = MTLResourceStorageModeShared;
1074 }
1075 else {
1076 storage_mode = MTLResourceStorageModeManaged;
1077 }
1078
1079 size_t instance_size;
1080 if (motion_blur) {
1081 instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1082 }
1083 else {
1084 instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1085 }
1086
1087 /* Allocate a GPU buffer for the instance data and populate it */
1088 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1089 options:storage_mode];
1090 id<MTLBuffer> motion_transforms_buf = nil;
1091 MTLPackedFloat4x3 *motion_transforms = nullptr;
1092 if (motion_blur && num_motion_transforms) {
1093 motion_transforms_buf = [mtl_device
1094 newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
1095 options:storage_mode];
1096 motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1097 }
1098
1099 uint32_t instance_index = 0;
1100 uint32_t motion_transform_index = 0;
1101
1102 blas_array.clear();
1103 blas_array.reserve(num_instances);
1104
1105 for (Object *ob : objects) {
1106 /* Skip non-traceable objects */
1107 Geometry const *geom = ob->get_geometry();
1108 BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
1109 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1110 /* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
1111 * in our intersection functions */
1112 blas = nullptr;
1113
1114 /* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
1115 * the descriptor. */
1116 if (!null_BLAS) {
1117 null_BLAS = make_null_BLAS(mtl_device, queue);
1118 }
1119 blas_array.push_back(null_BLAS);
1120 }
1121 else {
1122 blas_array.push_back(blas->accel_struct);
1123 }
1124
1125 uint32_t accel_struct_index = get_blas_index(blas);
1126
1127 /* Add some of the object visibility bits to the mask.
1128 * __prim_visibility contains the combined visibility bits of all instances, so is not
1129 * reliable if they differ between instances.
1130 */
1131 uint32_t mask = ob->visibility_for_tracing();
1132
1133 /* Have to have at least one bit in the mask, or else instance would always be culled. */
1134 if (0 == mask) {
1135 mask = 0xFF;
1136 }
1137
1138 /* Set user instance ID to object index */
1139 uint32_t primitive_offset = 0;
1140 int currIndex = instance_index++;
1141
1142 if (geom->geometry_type == Geometry::HAIR) {
1143 /* Build BLAS for curve primitives. */
1144 Hair *const hair = static_cast<Hair *const>(const_cast<Geometry *>(geom));
1145 primitive_offset = uint32_t(hair->curve_segment_offset);
1146 }
1147 else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
1148 /* Build BLAS for triangle primitives. */
1149 Mesh *const mesh = static_cast<Mesh *const>(const_cast<Geometry *>(geom));
1150 primitive_offset = uint32_t(mesh->prim_offset);
1151 }
1152 else if (geom->geometry_type == Geometry::POINTCLOUD) {
1153 /* Build BLAS for points primitives. */
1154 PointCloud *const pointcloud = static_cast<PointCloud *const>(
1155 const_cast<Geometry *>(geom));
1156 primitive_offset = uint32_t(pointcloud->prim_offset);
1157 }
1158
1159 /* Bake into the appropriate descriptor */
1160 if (motion_blur) {
1161 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1162 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1163 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1164
1165 desc.accelerationStructureIndex = accel_struct_index;
1166 desc.userID = primitive_offset;
1167 desc.mask = mask;
1168 desc.motionStartTime = 0.0f;
1169 desc.motionEndTime = 1.0f;
1170 desc.motionTransformsStartIndex = motion_transform_index;
1171 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1172 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1173 desc.intersectionFunctionTableOffset = 0;
1174
1175 int key_count = ob->get_motion().size();
1176 if (key_count) {
1177 desc.motionTransformsCount = key_count;
1178
1179 Transform *keys = ob->get_motion().data();
1180 for (int i = 0; i < key_count; i++) {
1181 float *t = (float *)&motion_transforms[motion_transform_index++];
1182 /* Transpose transform */
1183 auto src = (float const *)&keys[i];
1184 for (int i = 0; i < 12; i++) {
1185 t[i] = src[(i / 3) + 4 * (i % 3)];
1186 }
1187 }
1188 }
1189 else {
1190 desc.motionTransformsCount = 1;
1191
1192 float *t = (float *)&motion_transforms[motion_transform_index++];
1193 if (ob->get_geometry()->is_instanced()) {
1194 /* Transpose transform */
1195 auto src = (float const *)&ob->get_tfm();
1196 for (int i = 0; i < 12; i++) {
1197 t[i] = src[(i / 3) + 4 * (i % 3)];
1198 }
1199 }
1200 else {
1201 /* Clear transform to identity matrix */
1202 t[0] = t[4] = t[8] = 1.0f;
1203 }
1204 }
1205 }
1206 else {
1207 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1208 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1209 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1210
1211 desc.accelerationStructureIndex = accel_struct_index;
1212 desc.userID = primitive_offset;
1213 desc.mask = mask;
1214 desc.intersectionFunctionTableOffset = 0;
1215 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1216
1217 float *t = (float *)&desc.transformationMatrix;
1218 if (ob->get_geometry()->is_instanced()) {
1219 /* Transpose transform */
1220 auto src = (float const *)&ob->get_tfm();
1221 for (int i = 0; i < 12; i++) {
1222 t[i] = src[(i / 3) + 4 * (i % 3)];
1223 }
1224 }
1225 else {
1226 /* Clear transform to identity matrix */
1227 t[0] = t[4] = t[8] = 1.0f;
1228 }
1229 }
1230 }
1231
1232 if (storage_mode == MTLResourceStorageModeManaged) {
1233 [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
1234 if (motion_transforms_buf) {
1235 [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
1236 assert(num_motion_transforms == motion_transform_index);
1237 }
1238 }
1239
1240 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1241 [MTLInstanceAccelerationStructureDescriptor descriptor];
1242 accelDesc.instanceCount = num_instances;
1243 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1244 accelDesc.instanceDescriptorBuffer = instanceBuf;
1245 accelDesc.instanceDescriptorBufferOffset = 0;
1246 accelDesc.instanceDescriptorStride = instance_size;
1247 accelDesc.instancedAccelerationStructures = all_blas;
1248
1249 if (motion_blur) {
1250 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1251 accelDesc.motionTransformBuffer = motion_transforms_buf;
1252 accelDesc.motionTransformCount = num_motion_transforms;
1253 }
1254
1255 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1256 if (!use_fast_trace_bvh) {
1257 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1258 MTLAccelerationStructureUsagePreferFastBuild);
1259 }
1260
1261 MTLAccelerationStructureSizes accelSizes = [mtl_device
1262 accelerationStructureSizesWithDescriptor:accelDesc];
1263 id<MTLAccelerationStructure> accel = [mtl_device
1264 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1265 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1266 options:MTLResourceStorageModePrivate];
1267 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1268 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1269 [accelCommands accelerationStructureCommandEncoder];
1270 if (refit) {
1271 [accelEnc refitAccelerationStructure:accel_struct
1272 descriptor:accelDesc
1273 destination:accel
1274 scratchBuffer:scratchBuf
1275 scratchBufferOffset:0];
1276 }
1277 else {
1278 [accelEnc buildAccelerationStructure:accel
1279 descriptor:accelDesc
1280 scratchBuffer:scratchBuf
1281 scratchBufferOffset:0];
1282 }
1283 [accelEnc endEncoding];
1284 [accelCommands commit];
1285 [accelCommands waitUntilCompleted];
1286
1287 if (motion_transforms_buf) {
1288 [motion_transforms_buf release];
1289 }
1290 [instanceBuf release];
1291 [scratchBuf release];
1292
1293 /* Cache top and bottom-level acceleration structs */
1294 set_accel_struct(accel);
1295
1296 unique_blas_array.clear();
1297 unique_blas_array.reserve(all_blas.count);
1298 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
1299 unique_blas_array.push_back(blas);
1300 }];
1301
1302 return true;
1303 }
1304 return false;
1305}
1306
1307bool BVHMetal::build(Progress &progress,
1308 id<MTLDevice> mtl_device,
1309 id<MTLCommandQueue> queue,
1310 bool refit)
1311{
1312 if (@available(macos 12.0, *)) {
1313 if (refit) {
1314 /* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
1315 * In such cases, assert in development but try to recover in the wild. */
1316 if (params.bvh_type != BVH_TYPE_DYNAMIC || !accel_struct) {
1317 assert(false);
1318 refit = false;
1319 }
1320 }
1321
1322 if (!refit) {
1323 set_accel_struct(nil);
1324 }
1325 }
1326
1327 @autoreleasepool {
1328 if (!params.top_level) {
1329 return build_BLAS(progress, mtl_device, queue, refit);
1330 }
1331 else {
1332 return build_TLAS(progress, mtl_device, queue, refit);
1333 }
1334 }
1335}
1336
1338
1339#endif /* WITH_METAL */
ThreadMutex mutex
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a point
volatile int lock
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
Definition btDbvt.cpp:299
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE btScalar length(const btQuaternion &q)
Return the length of a quaternion.
Attribute * find(ustring name) const
float3 * data_float3()
float4 * data_float4()
Definition bvh/bvh.h:66
Type geometry_type
size_t prim_offset
AttributeSet attributes
Definition hair.h:14
Curve get_curve(size_t i) const
Definition hair.h:112
size_t curve_segment_offset
Definition hair.h:91
size_t num_curves() const
Definition hair.h:126
CurveShapeType curve_shape
Definition hair.h:92
size_t num_keys() const
Definition hair.h:121
size_t size() const
#define printf
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
static float verts[][3]
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
int count
@ ATTR_STD_MOTION_VERTEX_POSITION
@ CURVE_RIBBON
descriptor
ccl_device_inline float4 mask(const int4 mask, const float4 a)
ThreadQueue * queue
all scheduled work for the cpu
T step(const T &edge, const T &value)
void index(const bNode &, void *r_value)
@ BVH_TYPE_DYNAMIC
Definition params.h:34
@ BVH_TYPE_STATIC
Definition params.h:41
unsigned int uint32_t
Definition stdint.h:80
unsigned __int64 uint64_t
Definition stdint.h:90
int first_key
Definition hair.h:20
int num_segments() const
Definition hair.h:23
int num_keys
Definition hair.h:21
size_t num_triangles() const
Definition scene/mesh.h:80
ustring name
Definition graph/node.h:177
Point get_point(int i) const
size_t num_points() const
VecBase< float, 4 > float4
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:30
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition thread.h:29
float max