19# include "device/metal/bvh.h"
24# define BVH_status(...) \
26 string str = string_printf(__VA_ARGS__); \
27 progress.set_substatus(str); \
28 metal_printf("%s\n", str.c_str()); \
32# ifdef BVH_THROTTLE_DIAGNOSTICS
33# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
35# define bvh_throttle_printf(...)
40struct BVHMetalBuildThrottler {
42 size_t wired_memory = 0;
43 size_t safe_wired_limit = 0;
44 int requests_in_flight = 0;
46 BVHMetalBuildThrottler()
50 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
53 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
54 bvh_throttle_printf(
"safe_wired_limit = %zu\n", safe_wired_limit);
58 void acquire(
const size_t bytes_to_be_wired)
60 bool throttled =
false;
67 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
68 wired_memory += bytes_to_be_wired;
69 requests_in_flight += 1;
70 bvh_throttle_printf(
"acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
78 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
79 "bytes_to_be_wired = %zu)\n",
87 std::this_thread::sleep_for(std::chrono::milliseconds(10));
92 void release(
const size_t bytes_just_unwired)
95 wired_memory -= bytes_just_unwired;
96 requests_in_flight -= 1;
97 bvh_throttle_printf(
"release (requests_in_flight = %d, wired_memory = %zu)\n",
108 if (wired_memory == 0) {
112 std::this_thread::sleep_for(std::chrono::milliseconds(10));
115} g_bvh_build_throttler;
122static bool support_refit_blas()
124 if (@available(macos 15.4, *)) {
127 if (@available(macos 15.2, *)) {
133BVHMetal::BVHMetal(
const BVHParams ¶ms_,
137 :
BVH(params_, geometry_, objects_), device(device)
143 if (@available(macos 12.0, *)) {
144 set_accel_struct(nil);
151API_AVAILABLE(macos(11.0))
152void BVHMetal::set_accel_struct(
id<MTLAccelerationStructure> new_accel_struct)
154 if (@available(macos 12.0, *)) {
156 device->stats.mem_free(accel_struct.allocatedSize);
157 [accel_struct release];
161 if (new_accel_struct) {
162 accel_struct = new_accel_struct;
163 device->stats.mem_alloc(accel_struct.allocatedSize);
169 id<MTLDevice> mtl_device,
170 id<MTLCommandQueue> queue,
174 if (@available(macos 12.0, *)) {
176 Mesh *
const mesh =
static_cast<Mesh *const
>(geom);
184 const array<int> &tris = mesh->get_triangles();
185 const size_t num_verts =
verts.size();
186 const size_t num_indices = tris.
size();
188 size_t num_motion_steps = 1;
190 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
191 num_motion_steps = mesh->get_motion_steps();
195 id<MTLBuffer> posBuf = nil;
196 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.
data()
198 options:MTLResourceStorageModeShared];
200 if (num_motion_steps == 1) {
201 posBuf = [mtl_device newBufferWithBytes:
verts.data()
203 options:MTLResourceStorageModeShared];
207 newBufferWithLength:num_verts * num_motion_steps *
sizeof(
verts.data()[0])
208 options:MTLResourceStorageModeShared];
210 size_t center_step = (num_motion_steps - 1) / 2;
215 if (
step != center_step) {
218 std::copy_n(
verts, num_verts, dest_data + num_verts *
step);
223 MTLAccelerationStructureGeometryDescriptor *geomDesc;
224 if (num_motion_steps > 1) {
225 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
226 vertex_ptrs.reserve(num_motion_steps);
227 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
228 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
231 vertex_ptrs.push_back(k);
234 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
235 [MTLAccelerationStructureMotionTriangleGeometryDescriptor
descriptor];
236 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
237 count:vertex_ptrs.size()];
238 geomDescMotion.vertexStride =
sizeof(
verts.data()[0]);
239 geomDescMotion.indexBuffer = indexBuf;
240 geomDescMotion.indexBufferOffset = 0;
241 geomDescMotion.indexType = MTLIndexTypeUInt32;
242 geomDescMotion.triangleCount = num_indices / 3;
243 geomDescMotion.intersectionFunctionTableOffset = 0;
244 geomDescMotion.opaque =
true;
246 geomDesc = geomDescMotion;
248 BVH_status(
"Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes",
251 (
int)num_motion_steps);
254 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
255 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
256 geomDescNoMotion.vertexBuffer = posBuf;
257 geomDescNoMotion.vertexBufferOffset = 0;
258 geomDescNoMotion.vertexStride =
sizeof(
verts.data()[0]);
259 geomDescNoMotion.indexBuffer = indexBuf;
260 geomDescNoMotion.indexBufferOffset = 0;
261 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
262 geomDescNoMotion.triangleCount = num_indices / 3;
263 geomDescNoMotion.intersectionFunctionTableOffset = 0;
264 geomDescNoMotion.opaque =
true;
266 geomDesc = geomDescNoMotion;
269 "Building mesh BLAS | %7d tris | %s", (
int)mesh->
num_triangles(), geom->
name.c_str());
275 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
277 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
278 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
279 accelDesc.geometryDescriptors = @[ geomDesc ];
280 if (num_motion_steps > 1) {
281 accelDesc.motionStartTime = 0.0f;
282 accelDesc.motionEndTime = 1.0f;
283 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
284 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
285 accelDesc.motionKeyframeCount = num_motion_steps;
287 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
289 if (!use_fast_trace_bvh) {
290 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
291 MTLAccelerationStructureUsagePreferFastBuild);
294 MTLAccelerationStructureSizes accelSizes = [mtl_device
295 accelerationStructureSizesWithDescriptor:accelDesc];
296 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
297 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
298 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
299 options:MTLResourceStorageModePrivate];
300 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
301 options:MTLResourceStorageModeShared];
302 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
303 id<MTLAccelerationStructureCommandEncoder> accelEnc =
304 [accelCommands accelerationStructureCommandEncoder];
306 [accelEnc refitAccelerationStructure:accel_struct
308 destination:accel_uncompressed
309 scratchBuffer:scratchBuf
310 scratchBufferOffset:0];
313 [accelEnc buildAccelerationStructure:accel_uncompressed
315 scratchBuffer:scratchBuf
316 scratchBufferOffset:0];
318 if (use_fast_trace_bvh) {
319 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
322 sizeDataType:MTLDataTypeULong];
324 [accelEnc endEncoding];
328 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
329 accel_uncompressed.allocatedSize * 2;
331 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
333 [scratchBuf release];
337 if (use_fast_trace_bvh) {
341 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
342 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
343 id<MTLAccelerationStructureCommandEncoder> accelEnc =
344 [accelCommands accelerationStructureCommandEncoder];
345 id<MTLAccelerationStructure> accel = [mtl_device
346 newAccelerationStructureWithSize:compressed_size];
347 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
348 toAccelerationStructure:accel];
349 [accelEnc endEncoding];
350 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
351 set_accel_struct(accel);
352 [accel_uncompressed release];
355 g_bvh_build_throttler.release(wired_size);
357 [accelCommands commit];
362 set_accel_struct(accel_uncompressed);
365 g_bvh_build_throttler.release(wired_size);
372 g_bvh_build_throttler.acquire(wired_size);
373 [accelCommands commit];
381 id<MTLDevice> mtl_device,
382 id<MTLCommandQueue> queue,
386# if defined(MAC_OS_VERSION_14_0)
387 if (@available(macos 14.0, *)) {
389 Hair *hair =
static_cast<Hair *
>(geom);
396 size_t num_motion_steps = 1;
398 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
399 num_motion_steps = hair->get_motion_steps();
402 id<MTLBuffer> cpBuffer = nil;
403 id<MTLBuffer> radiusBuffer = nil;
404 id<MTLBuffer> idxBuffer = nil;
406 MTLAccelerationStructureGeometryDescriptor *geomDesc;
407 if (num_motion_steps > 1) {
408 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
409 [MTLAccelerationStructureMotionCurveGeometryDescriptor
descriptor];
413 const array<float> &radiuses = hair->get_curve_radius();
416 std::vector<float3> cpData;
417 std::vector<int> idxData;
418 std::vector<float> radiusData;
419 cpData.reserve(numKeys);
420 radiusData.reserve(numKeys);
422 std::vector<int> step_offsets;
423 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
426 const float3 *keys = hair->get_curve_keys().data();
427 size_t center_step = (num_motion_steps - 1) / 2;
428 if (
step != center_step) {
429 size_t attr_offset = (
step > center_step) ?
step - 1 :
step;
431 keys = motion_keys->
data_float3() + attr_offset * numKeys;
434 step_offsets.push_back(cpData.size());
436 for (
int c = 0; c < numCurves; ++c) {
441 cpData.push_back(keys[firstKey]);
442 radiusData.push_back(radiuses[firstKey]);
443 for (
int s = 0; s < segCount; ++s) {
445 idxData.push_back(idxBase + s);
447 cpData.push_back(keys[firstKey + s]);
448 radiusData.push_back(radiuses[firstKey + s]);
450 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
451 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
452 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
453 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
458 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
459 length:idxData.size() *
sizeof(int)
460 options:MTLResourceStorageModeShared];
462 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
464 options:MTLResourceStorageModeShared];
466 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
467 length:radiusData.size() *
sizeof(float)
468 options:MTLResourceStorageModeShared];
470 std::vector<MTLMotionKeyframeData *> cp_ptrs;
471 std::vector<MTLMotionKeyframeData *> radius_ptrs;
472 cp_ptrs.reserve(num_motion_steps);
473 radius_ptrs.reserve(num_motion_steps);
475 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
476 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
478 k.offset = step_offsets[
step] *
sizeof(
float3);
479 cp_ptrs.push_back(k);
481 k = [MTLMotionKeyframeData
data];
482 k.buffer = radiusBuffer;
483 k.offset = step_offsets[
step] *
sizeof(float);
484 radius_ptrs.push_back(k);
487 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
488 count:cp_ptrs.size()];
489 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
490 count:radius_ptrs.size()];
493 geomDescCrv.controlPointCount = cpData.size() / num_motion_steps;
494 geomDescCrv.controlPointStride =
sizeof(
float3);
495 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
496 geomDescCrv.radiusStride =
sizeof(float);
497 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
498 geomDescCrv.segmentCount = idxData.size();
499 geomDescCrv.segmentControlPointCount = 4;
502 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
503 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
504 geomDescCrv.indexType = MTLIndexTypeUInt32;
505 geomDescCrv.indexBuffer = idxBuffer;
506 geomDescCrv.intersectionFunctionTableOffset = 1;
511 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
512 geomDescCrv.opaque =
true;
513 geomDesc = geomDescCrv;
516 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
517 [MTLAccelerationStructureCurveGeometryDescriptor
descriptor];
521 const array<float> &radiuses = hair->get_curve_radius();
524 std::vector<float3> cpData;
525 std::vector<int> idxData;
526 std::vector<float> radiusData;
527 cpData.reserve(numKeys);
528 radiusData.reserve(numKeys);
529 auto keys = hair->get_curve_keys();
530 for (
int c = 0; c < numCurves; ++c) {
534 radiusData.push_back(radiuses[firstKey]);
536 cpData.push_back(keys[firstKey]);
537 for (
int s = 0; s < segCount; ++s) {
538 idxData.push_back(idxBase + s);
539 cpData.push_back(keys[firstKey + s]);
540 radiusData.push_back(radiuses[firstKey + s]);
542 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
543 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
544 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
545 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
549 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
550 length:idxData.size() *
sizeof(int)
551 options:MTLResourceStorageModeShared];
553 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
555 options:MTLResourceStorageModeShared];
557 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
558 length:radiusData.size() *
sizeof(float)
559 options:MTLResourceStorageModeShared];
561 geomDescCrv.controlPointBuffer = cpBuffer;
562 geomDescCrv.radiusBuffer = radiusBuffer;
563 geomDescCrv.controlPointCount = cpData.size();
564 geomDescCrv.controlPointStride =
sizeof(
float3);
565 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
566 geomDescCrv.controlPointBufferOffset = 0;
567 geomDescCrv.segmentCount = idxData.size();
568 geomDescCrv.segmentControlPointCount = 4;
571 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
572 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
573 geomDescCrv.indexType = MTLIndexTypeUInt32;
574 geomDescCrv.indexBuffer = idxBuffer;
575 geomDescCrv.intersectionFunctionTableOffset = 1;
580 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
581 geomDescCrv.opaque =
true;
582 geomDesc = geomDescCrv;
585 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
586 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
587 accelDesc.geometryDescriptors = @[ geomDesc ];
589 if (num_motion_steps > 1) {
590 accelDesc.motionStartTime = 0.0f;
591 accelDesc.motionEndTime = 1.0f;
592 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
593 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
594 accelDesc.motionKeyframeCount = num_motion_steps;
596 BVH_status(
"Building motion hair BLAS | %7d curves | %s | %7d motion keyframes",
599 (
int)num_motion_steps);
603 "Building hair BLAS | %7d curves | %s", (
int)hair->
num_curves(), geom->
name.c_str());
606 if (!use_fast_trace_bvh) {
607 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
608 MTLAccelerationStructureUsagePreferFastBuild);
610 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
612 MTLAccelerationStructureSizes accelSizes = [mtl_device
613 accelerationStructureSizesWithDescriptor:accelDesc];
614 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
615 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
616 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
617 options:MTLResourceStorageModePrivate];
618 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
619 options:MTLResourceStorageModeShared];
620 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
621 id<MTLAccelerationStructureCommandEncoder> accelEnc =
622 [accelCommands accelerationStructureCommandEncoder];
624 [accelEnc refitAccelerationStructure:accel_struct
626 destination:accel_uncompressed
627 scratchBuffer:scratchBuf
628 scratchBufferOffset:0];
631 [accelEnc buildAccelerationStructure:accel_uncompressed
633 scratchBuffer:scratchBuf
634 scratchBufferOffset:0];
636 if (use_fast_trace_bvh) {
637 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
640 sizeDataType:MTLDataTypeULong];
642 [accelEnc endEncoding];
646 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
647 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
648 accel_uncompressed.allocatedSize * 2;
650 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
652 [scratchBuf release];
654 [radiusBuffer release];
657 if (use_fast_trace_bvh) {
660 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
661 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
662 id<MTLAccelerationStructureCommandEncoder> accelEnc =
663 [accelCommands accelerationStructureCommandEncoder];
664 id<MTLAccelerationStructure> accel = [mtl_device
665 newAccelerationStructureWithSize:compressed_size];
666 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
667 toAccelerationStructure:accel];
668 [accelEnc endEncoding];
669 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
670 set_accel_struct(accel);
671 [accel_uncompressed release];
674 g_bvh_build_throttler.release(wired_size);
676 [accelCommands commit];
681 set_accel_struct(accel_uncompressed);
684 g_bvh_build_throttler.release(wired_size);
691 g_bvh_build_throttler.acquire(wired_size);
692 [accelCommands commit];
707 id<MTLDevice> mtl_device,
708 id<MTLCommandQueue> queue,
712 if (@available(macos 12.0, *)) {
719 const size_t num_points = pointcloud->get_points().size();
720 const float3 *points = pointcloud->get_points().data();
721 const float *radius = pointcloud->get_radius().data();
725 size_t num_motion_steps = 1;
727 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
728 num_motion_steps = pointcloud->get_motion_steps();
731 const size_t num_aabbs = num_motion_steps * num_points;
734 id<MTLBuffer> aabbBuf = [mtl_device
735 newBufferWithLength:num_aabbs *
sizeof(MTLAxisAlignedBoundingBox)
736 options:MTLResourceStorageModeShared];
737 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
740 size_t center_step = (num_motion_steps - 1) / 2;
741 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
742 if (
step == center_step) {
744 for (
size_t j = 0; j < num_points; ++j) {
749 const size_t index =
step * num_points + j;
750 aabb_data[index].min = (MTLPackedFloat3 &)
bounds.min;
751 aabb_data[index].max = (MTLPackedFloat3 &)
bounds.max;
755 size_t attr_offset = (
step > center_step) ?
step - 1 :
step;
758 for (
size_t j = 0; j < num_points; ++j) {
763 const size_t index =
step * num_points + j;
764 aabb_data[index].min = (MTLPackedFloat3 &)
bounds.min;
765 aabb_data[index].max = (MTLPackedFloat3 &)
bounds.max;
770 MTLAccelerationStructureGeometryDescriptor *geomDesc;
771 if (num_motion_steps > 1) {
772 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
773 aabb_ptrs.reserve(num_motion_steps);
774 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
775 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
777 k.offset =
step * num_points *
sizeof(MTLAxisAlignedBoundingBox);
778 aabb_ptrs.push_back(k);
781 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
782 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor
descriptor];
783 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
784 count:aabb_ptrs.size()];
785 geomDescMotion.boundingBoxCount = num_points;
786 geomDescMotion.boundingBoxStride =
sizeof(aabb_data[0]);
787 geomDescMotion.intersectionFunctionTableOffset = 2;
792 geomDescMotion.allowDuplicateIntersectionFunctionInvocation =
false;
793 geomDescMotion.opaque =
true;
794 geomDesc = geomDescMotion;
797 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
798 [MTLAccelerationStructureBoundingBoxGeometryDescriptor
descriptor];
799 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
800 geomDescNoMotion.boundingBoxBufferOffset = 0;
801 geomDescNoMotion.boundingBoxCount = int(num_aabbs);
802 geomDescNoMotion.boundingBoxStride =
sizeof(aabb_data[0]);
803 geomDescNoMotion.intersectionFunctionTableOffset = 2;
808 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation =
false;
809 geomDescNoMotion.opaque =
true;
810 geomDesc = geomDescNoMotion;
813 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
814 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
815 accelDesc.geometryDescriptors = @[ geomDesc ];
817 if (num_motion_steps > 1) {
818 accelDesc.motionStartTime = 0.0f;
819 accelDesc.motionEndTime = 1.0f;
822 accelDesc.motionKeyframeCount = num_motion_steps;
824 BVH_status(
"Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes",
827 (
int)num_motion_steps);
830 BVH_status(
"Building pointcloud BLAS | %7d points | %s",
834 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
836 if (!use_fast_trace_bvh) {
837 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
838 MTLAccelerationStructureUsagePreferFastBuild);
841 MTLAccelerationStructureSizes accelSizes = [mtl_device
842 accelerationStructureSizesWithDescriptor:accelDesc];
843 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
844 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
845 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
846 options:MTLResourceStorageModePrivate];
847 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
848 options:MTLResourceStorageModeShared];
849 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
850 id<MTLAccelerationStructureCommandEncoder> accelEnc =
851 [accelCommands accelerationStructureCommandEncoder];
853 [accelEnc refitAccelerationStructure:accel_struct
855 destination:accel_uncompressed
856 scratchBuffer:scratchBuf
857 scratchBufferOffset:0];
860 [accelEnc buildAccelerationStructure:accel_uncompressed
862 scratchBuffer:scratchBuf
863 scratchBufferOffset:0];
865 if (use_fast_trace_bvh) {
866 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
869 sizeDataType:MTLDataTypeULong];
871 [accelEnc endEncoding];
875 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
876 accel_uncompressed.allocatedSize * 2;
878 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
880 [scratchBuf release];
883 if (use_fast_trace_bvh) {
887 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
888 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
889 id<MTLAccelerationStructureCommandEncoder> accelEnc =
890 [accelCommands accelerationStructureCommandEncoder];
891 id<MTLAccelerationStructure> accel = [mtl_device
892 newAccelerationStructureWithSize:compressed_size];
893 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
894 toAccelerationStructure:accel];
895 [accelEnc endEncoding];
896 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
897 set_accel_struct(accel);
898 [accel_uncompressed release];
901 g_bvh_build_throttler.release(wired_size);
903 [accelCommands commit];
908 set_accel_struct(accel_uncompressed);
911 g_bvh_build_throttler.release(wired_size);
918 g_bvh_build_throttler.acquire(wired_size);
919 [accelCommands commit];
926 id<MTLDevice> mtl_device,
927 id<MTLCommandQueue> queue,
930 assert(objects.size() == 1 && geometry.size() == 1);
937 return build_BLAS_mesh(
progress, mtl_device, queue, geom,
refit);
939 return build_BLAS_hair(
progress, mtl_device, queue, geom,
refit);
941 return build_BLAS_pointcloud(
progress, mtl_device, queue, geom,
refit);
948# if defined(MAC_OS_VERSION_15_0)
951static MTLComponentTransform decomposed_to_component_transform(
const DecomposedTransform &src)
953 MTLComponentTransform tfm;
954 tfm.scale = MTLPackedFloat3Make(src.
y.w, src.
z.w, src.
w.w);
955 tfm.shear = MTLPackedFloat3Make(src.
z.x, src.
z.y, src.
w.x);
956 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
957 tfm.rotation = MTLPackedFloatQuaternionMake(src.
x.x, src.
x.y, src.
x.z, src.
x.w);
958 tfm.translation = MTLPackedFloat3Make(src.
y.x, src.
y.y, src.
y.z);
963static MTLComponentTransform component_transform_make_unit()
965 MTLComponentTransform tfm;
966 tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f);
967 tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
968 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
969 tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f);
970 tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
977 id<MTLDevice> mtl_device,
978 id<MTLCommandQueue> queue,
982 g_bvh_build_throttler.wait_for_all();
984 if (@available(macos 12.0, *)) {
986 auto make_null_BLAS = [](id<MTLDevice> mtl_device,
987 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
988 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:
sizeof(
float3)
989 options:MTLResourceStorageModeShared];
992 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
993 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
994 geomDesc.vertexBuffer = nullBuf;
995 geomDesc.vertexBufferOffset = 0;
996 geomDesc.vertexStride =
sizeof(
float3);
997 geomDesc.indexBuffer = nullBuf;
998 geomDesc.indexBufferOffset = 0;
999 geomDesc.indexType = MTLIndexTypeUInt32;
1000 geomDesc.triangleCount = 0;
1001 geomDesc.intersectionFunctionTableOffset = 0;
1002 geomDesc.opaque =
true;
1003 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
1005 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
1006 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
1007 accelDesc.geometryDescriptors = @[ geomDesc ];
1008 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1010 MTLAccelerationStructureSizes accelSizes = [mtl_device
1011 accelerationStructureSizesWithDescriptor:accelDesc];
1012 id<MTLAccelerationStructure> accel_struct = [mtl_device
1013 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1014 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1015 options:MTLResourceStorageModePrivate];
1016 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1017 options:MTLResourceStorageModeShared];
1018 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1019 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1020 [accelCommands accelerationStructureCommandEncoder];
1021 [accelEnc buildAccelerationStructure:accel_struct
1023 scratchBuffer:scratchBuf
1024 scratchBufferOffset:0];
1025 [accelEnc endEncoding];
1026 [accelCommands commit];
1027 [accelCommands waitUntilCompleted];
1030 [scratchBuf release];
1034 return accel_struct;
1038 uint32_t num_motion_transforms = 0;
1040 for (
Object *ob : objects) {
1043 if (ob->use_motion()) {
1044 num_motion_transforms +=
max((
size_t)1, ob->get_motion().size());
1045 num_motion_instances++;
1048 num_motion_transforms++;
1052 const bool use_instance_motion = motion_blur && num_motion_instances;
1055 NSMutableArray *all_blas = [NSMutableArray
array];
1056 unordered_map<const BVHMetal *, int> instance_mapping;
1059 auto get_blas_index = [&](
const BVHMetal *blas) {
1060 auto it = instance_mapping.find(blas);
1061 if (it != instance_mapping.end()) {
1064 int blas_index = (int)[all_blas
count];
1065 instance_mapping[blas] = blas_index;
1066 if (@available(macos 12.0, *)) {
1067 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1072 size_t instance_size;
1073 if (use_instance_motion) {
1074 instance_size =
sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1077 instance_size =
sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1081 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1082 options:MTLResourceStorageModeShared];
1083 id<MTLBuffer> motion_transforms_buf = nil;
1084 MTLPackedFloat4x3 *matrix_motion_transforms =
nullptr;
1085# if defined(MAC_OS_VERSION_15_0)
1086 MTLComponentTransform *decomposed_motion_transforms =
nullptr;
1088 if (use_instance_motion && num_motion_transforms) {
1089# if defined(MAC_OS_VERSION_15_0)
1091 if (@available(macos 15.0, *)) {
1092 motion_transforms_buf = [mtl_device
1093 newBufferWithLength:num_motion_transforms *
sizeof(MTLComponentTransform)
1094 options:MTLResourceStorageModeShared];
1095 decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents;
1101 motion_transforms_buf = [mtl_device
1102 newBufferWithLength:num_motion_transforms *
sizeof(MTLPackedFloat4x3)
1103 options:MTLResourceStorageModeShared];
1104 matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1109 uint32_t motion_transform_index = 0;
1112 blas_array.reserve(num_instances);
1114 for (
Object *ob : objects) {
1116 const Geometry *geom = ob->get_geometry();
1117 const BVHMetal *blas =
static_cast<const BVHMetal *
>(geom->
bvh.get());
1118 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1126 null_BLAS = make_null_BLAS(mtl_device, queue);
1128 blas_array.push_back(null_BLAS);
1131 blas_array.push_back(blas->accel_struct);
1134 uint32_t accel_struct_index = get_blas_index(blas);
1149 int currIndex = instance_index++;
1169 if (use_instance_motion) {
1170 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1171 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1172 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1174 desc.accelerationStructureIndex = accel_struct_index;
1175 desc.userID = primitive_offset;
1177 desc.motionStartTime = 0.0f;
1178 desc.motionEndTime = 1.0f;
1179 desc.motionTransformsStartIndex = motion_transform_index;
1180 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1181 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1182 desc.intersectionFunctionTableOffset = 0;
1186 decomp.data(), ob->get_motion().data(), ob->get_motion().size());
1188 int key_count = ob->get_motion().size();
1190 desc.motionTransformsCount = key_count;
1192# if defined(MAC_OS_VERSION_15_0)
1194 for (
int i = 0;
i < key_count;
i++) {
1195 decomposed_motion_transforms[motion_transform_index++] =
1196 decomposed_to_component_transform(decomp[
i]);
1202 Transform *keys = ob->get_motion().data();
1203 for (
int i = 0;
i < key_count;
i++) {
1204 float *t = (
float *)&matrix_motion_transforms[motion_transform_index++];
1206 const auto *src = (
const float *)&keys[
i];
1207 for (
int i = 0;
i < 12;
i++) {
1208 t[
i] = src[(
i / 3) + 4 * (
i % 3)];
1214 desc.motionTransformsCount = 1;
1216# if defined(MAC_OS_VERSION_15_0)
1218 if (ob->get_geometry()->is_instanced()) {
1221 decomposed_motion_transforms[motion_transform_index++] =
1222 decomposed_to_component_transform(decomp);
1225 decomposed_motion_transforms[motion_transform_index++] =
1226 component_transform_make_unit();
1232 float *t = (
float *)&matrix_motion_transforms[motion_transform_index++];
1233 if (ob->get_geometry()->is_instanced()) {
1235 const auto *src = (
const float *)&ob->get_tfm();
1236 for (
int i = 0;
i < 12;
i++) {
1237 t[
i] = src[(
i / 3) + 4 * (
i % 3)];
1242 t[0] = t[4] = t[8] = 1.0f;
1248 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1249 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1250 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1252 desc.accelerationStructureIndex = accel_struct_index;
1253 desc.userID = primitive_offset;
1255 desc.intersectionFunctionTableOffset = 0;
1256 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1258 float *t = (
float *)&desc.transformationMatrix;
1259 if (ob->get_geometry()->is_instanced()) {
1261 const auto *src = (
const float *)&ob->get_tfm();
1262 for (
int i = 0;
i < 12;
i++) {
1263 t[
i] = src[(
i / 3) + 4 * (
i % 3)];
1268 t[0] = t[4] = t[8] = 1.0f;
1273 if (use_instance_motion) {
1275 "Building motion TLAS | %7d instances | %7d motion instances | %7d motion "
1278 (
int)num_motion_instances,
1279 (
int)num_motion_transforms);
1282 BVH_status(
"Building TLAS | %7d instances", (
int)num_instances);
1285 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1286 [MTLInstanceAccelerationStructureDescriptor
descriptor];
1287 accelDesc.instanceCount = num_instances;
1288 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1289 accelDesc.instanceDescriptorBuffer = instanceBuf;
1290 accelDesc.instanceDescriptorBufferOffset = 0;
1291 accelDesc.instanceDescriptorStride = instance_size;
1292 accelDesc.instancedAccelerationStructures = all_blas;
1294 if (use_instance_motion) {
1295 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1296 accelDesc.motionTransformBuffer = motion_transforms_buf;
1297 accelDesc.motionTransformCount = num_motion_transforms;
1298# if defined(MAC_OS_VERSION_15_0)
1299 if (@available(macos 15.0, *)) {
1300 accelDesc.motionTransformStride = 0;
1301 accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent :
1302 MTLTransformTypePackedFloat4x3;
1307 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1308 if (!use_fast_trace_bvh) {
1309 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1310 MTLAccelerationStructureUsagePreferFastBuild);
1313 MTLAccelerationStructureSizes accelSizes = [mtl_device
1314 accelerationStructureSizesWithDescriptor:accelDesc];
1315 id<MTLAccelerationStructure> accel = [mtl_device
1316 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1317 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1318 options:MTLResourceStorageModePrivate];
1319 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1320 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1321 [accelCommands accelerationStructureCommandEncoder];
1323 [accelEnc refitAccelerationStructure:accel_struct
1326 scratchBuffer:scratchBuf
1327 scratchBufferOffset:0];
1330 [accelEnc buildAccelerationStructure:accel
1332 scratchBuffer:scratchBuf
1333 scratchBufferOffset:0];
1335 [accelEnc endEncoding];
1336 [accelCommands commit];
1337 [accelCommands waitUntilCompleted];
1339 if (motion_transforms_buf) {
1340 [motion_transforms_buf release];
1342 [instanceBuf release];
1343 [scratchBuf release];
1346 set_accel_struct(accel);
1348 unique_blas_array.clear();
1349 unique_blas_array.reserve(all_blas.count);
1350 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
1351 unique_blas_array.push_back(blas);
1360 id<MTLDevice> mtl_device,
1361 id<MTLCommandQueue> queue,
1364 if (@available(macos 12.0, *)) {
1369 assert(!
"Can't refit static Metal BVH");
1372 else if (!accel_struct) {
1373 assert(!
"Can't refit non-existing Metal BVH");
1379 set_accel_struct(nil);
1383 if (!support_refit_blas()) {
for(;discarded_id_iter !=nullptr;discarded_id_iter=static_cast< ID * >(discarded_id_iter->next))
BMesh const char void * data
unsigned long long int uint64_t
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Attribute * find(ustring name) const
bool is_pointcloud() const
Curve get_curve(const size_t i) const
size_t curve_segment_offset
size_t num_curves() const
CurveShapeType curve_shape
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
float length(VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
size_t num_triangles() const
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
Point get_point(const int i) const
size_t num_points() const
std::unique_lock< std::mutex > thread_scoped_lock