14# include "device/metal/bvh.h"
19# define BVH_status(...) \
21 string str = string_printf(__VA_ARGS__); \
22 progress.set_substatus(str); \
23 metal_printf("%s\n", str.c_str()); \
27# ifdef BVH_THROTTLE_DIAGNOSTICS
28# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
30# define bvh_throttle_printf(...)
35struct BVHMetalBuildThrottler {
37 size_t wired_memory = 0;
38 size_t safe_wired_limit = 0;
39 int requests_in_flight = 0;
41 BVHMetalBuildThrottler()
45 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
48 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
49 bvh_throttle_printf(
"safe_wired_limit = %zu\n", safe_wired_limit);
53 void acquire(
size_t bytes_to_be_wired)
55 bool throttled =
false;
62 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
63 wired_memory += bytes_to_be_wired;
64 requests_in_flight += 1;
65 bvh_throttle_printf(
"acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
73 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
74 "bytes_to_be_wired = %zu)\n",
82 std::this_thread::sleep_for(std::chrono::milliseconds(10));
87 void release(
size_t bytes_just_unwired)
90 wired_memory -= bytes_just_unwired;
91 requests_in_flight -= 1;
92 bvh_throttle_printf(
"release (requests_in_flight = %d, wired_memory = %zu)\n",
103 if (wired_memory == 0) {
107 std::this_thread::sleep_for(std::chrono::milliseconds(10));
110} g_bvh_build_throttler;
112BVHMetal::BVHMetal(
const BVHParams ¶ms_,
116 :
BVH(params_, geometry_, objects_), device(device)
122 if (@available(macos 12.0, *)) {
123 set_accel_struct(nil);
130API_AVAILABLE(macos(11.0))
131void BVHMetal::set_accel_struct(
id<MTLAccelerationStructure> new_accel_struct)
133 if (@available(macos 12.0, *)) {
135 device->stats.mem_free(accel_struct.allocatedSize);
136 [accel_struct release];
140 if (new_accel_struct) {
141 accel_struct = new_accel_struct;
142 device->stats.mem_alloc(accel_struct.allocatedSize);
147bool BVHMetal::build_BLAS_mesh(
Progress &progress,
148 id<MTLDevice> mtl_device,
149 id<MTLCommandQueue> queue,
153 if (@available(macos 12.0, *)) {
155 Mesh *
const mesh =
static_cast<Mesh *const
>(geom);
162 "Building mesh BLAS | %7d tris | %s", (
int)mesh->
num_triangles(), geom->
name.c_str());
168 const array<int> &tris = mesh->get_triangles();
169 const size_t num_verts =
verts.size();
170 const size_t num_indices = tris.
size();
172 size_t num_motion_steps = 1;
174 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
175 num_motion_steps = mesh->get_motion_steps();
178 MTLResourceOptions storage_mode;
179 if (mtl_device.hasUnifiedMemory) {
180 storage_mode = MTLResourceStorageModeShared;
183 storage_mode = MTLResourceStorageModeManaged;
187 id<MTLBuffer> posBuf = nil;
188 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.
data()
192 if (num_motion_steps == 1) {
193 posBuf = [mtl_device newBufferWithBytes:
verts.data()
199 newBufferWithLength:num_verts * num_motion_steps *
sizeof(
verts.data()[0])
202 size_t center_step = (num_motion_steps - 1) / 2;
203 for (
size_t step = 0; step < num_motion_steps; ++step) {
207 if (step != center_step) {
210 memcpy(dest_data + num_verts * step,
verts, num_verts *
sizeof(
float3));
212 if (storage_mode == MTLResourceStorageModeManaged) {
213 [posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
218 MTLAccelerationStructureGeometryDescriptor *geomDesc;
219 if (num_motion_steps > 1) {
220 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
221 vertex_ptrs.reserve(num_motion_steps);
222 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
223 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
226 vertex_ptrs.push_back(k);
229 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
230 [MTLAccelerationStructureMotionTriangleGeometryDescriptor
descriptor];
231 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
232 count:vertex_ptrs.size()];
233 geomDescMotion.vertexStride =
sizeof(
verts.data()[0]);
234 geomDescMotion.indexBuffer = indexBuf;
235 geomDescMotion.indexBufferOffset = 0;
236 geomDescMotion.indexType = MTLIndexTypeUInt32;
237 geomDescMotion.triangleCount = num_indices / 3;
238 geomDescMotion.intersectionFunctionTableOffset = 0;
239 geomDescMotion.opaque =
true;
241 geomDesc = geomDescMotion;
244 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
245 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
246 geomDescNoMotion.vertexBuffer = posBuf;
247 geomDescNoMotion.vertexBufferOffset = 0;
248 geomDescNoMotion.vertexStride =
sizeof(
verts.data()[0]);
249 geomDescNoMotion.indexBuffer = indexBuf;
250 geomDescNoMotion.indexBufferOffset = 0;
251 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
252 geomDescNoMotion.triangleCount = num_indices / 3;
253 geomDescNoMotion.intersectionFunctionTableOffset = 0;
254 geomDescNoMotion.opaque =
true;
256 geomDesc = geomDescNoMotion;
262 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
264 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
265 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
266 accelDesc.geometryDescriptors = @[ geomDesc ];
267 if (num_motion_steps > 1) {
268 accelDesc.motionStartTime = 0.0f;
269 accelDesc.motionEndTime = 1.0f;
270 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
271 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
272 accelDesc.motionKeyframeCount = num_motion_steps;
274 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
276 if (!use_fast_trace_bvh) {
277 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
278 MTLAccelerationStructureUsagePreferFastBuild);
281 MTLAccelerationStructureSizes accelSizes = [mtl_device
282 accelerationStructureSizesWithDescriptor:accelDesc];
283 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
284 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
285 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
286 options:MTLResourceStorageModePrivate];
287 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
288 options:MTLResourceStorageModeShared];
289 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
290 id<MTLAccelerationStructureCommandEncoder> accelEnc =
291 [accelCommands accelerationStructureCommandEncoder];
293 [accelEnc refitAccelerationStructure:accel_struct
295 destination:accel_uncompressed
296 scratchBuffer:scratchBuf
297 scratchBufferOffset:0];
300 [accelEnc buildAccelerationStructure:accel_uncompressed
302 scratchBuffer:scratchBuf
303 scratchBufferOffset:0];
305 if (use_fast_trace_bvh) {
306 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
309 sizeDataType:MTLDataTypeULong];
311 [accelEnc endEncoding];
315 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
316 accel_uncompressed.allocatedSize * 2;
318 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
320 [scratchBuf release];
324 if (use_fast_trace_bvh) {
328 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
329 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
330 id<MTLAccelerationStructureCommandEncoder> accelEnc =
331 [accelCommands accelerationStructureCommandEncoder];
332 id<MTLAccelerationStructure> accel = [mtl_device
333 newAccelerationStructureWithSize:compressed_size];
334 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
335 toAccelerationStructure:accel];
336 [accelEnc endEncoding];
337 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
338 set_accel_struct(accel);
339 [accel_uncompressed release];
342 g_bvh_build_throttler.release(wired_size);
344 [accelCommands commit];
349 set_accel_struct(accel_uncompressed);
352 g_bvh_build_throttler.release(wired_size);
359 g_bvh_build_throttler.acquire(wired_size);
360 [accelCommands commit];
367bool BVHMetal::build_BLAS_hair(
Progress &progress,
368 id<MTLDevice> mtl_device,
369 id<MTLCommandQueue> queue,
373# if defined(MAC_OS_VERSION_14_0)
374 if (@available(macos 14.0, *)) {
376 Hair *hair =
static_cast<Hair *
>(geom);
383 "Building hair BLAS | %7d curves | %s", (
int)hair->
num_curves(), geom->
name.c_str());
388 size_t num_motion_steps = 1;
390 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
391 num_motion_steps = hair->get_motion_steps();
394 MTLResourceOptions storage_mode;
395 if (mtl_device.hasUnifiedMemory) {
396 storage_mode = MTLResourceStorageModeShared;
399 storage_mode = MTLResourceStorageModeManaged;
402 id<MTLBuffer> cpBuffer = nil;
403 id<MTLBuffer> radiusBuffer = nil;
404 id<MTLBuffer> idxBuffer = nil;
406 MTLAccelerationStructureGeometryDescriptor *geomDesc;
408 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
409 [MTLAccelerationStructureMotionCurveGeometryDescriptor
descriptor];
413 const array<float> &radiuses = hair->get_curve_radius();
416 std::vector<float3> cpData;
417 std::vector<int> idxData;
418 std::vector<float> radiusData;
419 cpData.reserve(numKeys);
420 radiusData.reserve(numKeys);
422 std::vector<int> step_offsets;
423 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
426 const float3 *keys = hair->get_curve_keys().data();
427 size_t center_step = (num_motion_steps - 1) / 2;
428 if (step != center_step) {
429 size_t attr_offset = (
step > center_step) ? step - 1 :
step;
431 keys = motion_keys->
data_float3() + attr_offset * numKeys;
434 step_offsets.push_back(cpData.size());
436 for (
int c = 0; c < numCurves; ++c) {
441 cpData.push_back(keys[firstKey]);
442 radiusData.push_back(radiuses[firstKey]);
443 for (
int s = 0; s < segCount; ++s) {
445 idxData.push_back(idxBase + s);
447 cpData.push_back(keys[firstKey + s]);
448 radiusData.push_back(radiuses[firstKey + s]);
450 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
451 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
452 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
453 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
458 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
462 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
466 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
470 std::vector<MTLMotionKeyframeData *> cp_ptrs;
471 std::vector<MTLMotionKeyframeData *> radius_ptrs;
472 cp_ptrs.reserve(num_motion_steps);
473 radius_ptrs.reserve(num_motion_steps);
475 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
476 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
478 k.offset = step_offsets[
step] *
sizeof(
float3);
479 cp_ptrs.push_back(k);
481 k = [MTLMotionKeyframeData
data];
482 k.buffer = radiusBuffer;
483 k.offset = step_offsets[
step] *
sizeof(
float);
484 radius_ptrs.push_back(k);
487 if (storage_mode == MTLResourceStorageModeManaged) {
488 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
489 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
490 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
493 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
494 count:cp_ptrs.size()];
495 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
496 count:radius_ptrs.size()];
498 geomDescCrv.controlPointCount = cpData.size();
499 geomDescCrv.controlPointStride =
sizeof(
float3);
500 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
501 geomDescCrv.radiusStride =
sizeof(
float);
502 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
503 geomDescCrv.segmentCount = idxData.size();
504 geomDescCrv.segmentControlPointCount = 4;
507 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
508 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
509 geomDescCrv.indexType = MTLIndexTypeUInt32;
510 geomDescCrv.indexBuffer = idxBuffer;
511 geomDescCrv.intersectionFunctionTableOffset = 1;
516 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
517 geomDescCrv.opaque =
true;
518 geomDesc = geomDescCrv;
521 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
522 [MTLAccelerationStructureCurveGeometryDescriptor
descriptor];
526 const array<float> &radiuses = hair->get_curve_radius();
529 std::vector<float3> cpData;
530 std::vector<int> idxData;
531 std::vector<float> radiusData;
532 cpData.reserve(numKeys);
533 radiusData.reserve(numKeys);
534 auto keys = hair->get_curve_keys();
535 for (
int c = 0; c < numCurves; ++c) {
539 radiusData.push_back(radiuses[firstKey]);
541 cpData.push_back(keys[firstKey]);
542 for (
int s = 0; s < segCount; ++s) {
543 idxData.push_back(idxBase + s);
544 cpData.push_back(keys[firstKey + s]);
545 radiusData.push_back(radiuses[firstKey + s]);
547 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
548 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
549 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
550 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
554 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
558 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
562 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
566 if (storage_mode == MTLResourceStorageModeManaged) {
567 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
568 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
569 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
571 geomDescCrv.controlPointBuffer = cpBuffer;
572 geomDescCrv.radiusBuffer = radiusBuffer;
573 geomDescCrv.controlPointCount = cpData.size();
574 geomDescCrv.controlPointStride =
sizeof(
float3);
575 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
576 geomDescCrv.controlPointBufferOffset = 0;
577 geomDescCrv.segmentCount = idxData.size();
578 geomDescCrv.segmentControlPointCount = 4;
581 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
582 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
583 geomDescCrv.indexType = MTLIndexTypeUInt32;
584 geomDescCrv.indexBuffer = idxBuffer;
585 geomDescCrv.intersectionFunctionTableOffset = 1;
590 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
591 geomDescCrv.opaque =
true;
592 geomDesc = geomDescCrv;
595 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
596 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
597 accelDesc.geometryDescriptors = @[ geomDesc ];
600 accelDesc.motionStartTime = 0.0f;
601 accelDesc.motionEndTime = 1.0f;
602 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
603 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
604 accelDesc.motionKeyframeCount = num_motion_steps;
607 if (!use_fast_trace_bvh) {
608 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
609 MTLAccelerationStructureUsagePreferFastBuild);
611 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
613 MTLAccelerationStructureSizes accelSizes = [mtl_device
614 accelerationStructureSizesWithDescriptor:accelDesc];
615 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
616 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
617 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
618 options:MTLResourceStorageModePrivate];
619 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
620 options:MTLResourceStorageModeShared];
621 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
622 id<MTLAccelerationStructureCommandEncoder> accelEnc =
623 [accelCommands accelerationStructureCommandEncoder];
625 [accelEnc refitAccelerationStructure:accel_struct
627 destination:accel_uncompressed
628 scratchBuffer:scratchBuf
629 scratchBufferOffset:0];
632 [accelEnc buildAccelerationStructure:accel_uncompressed
634 scratchBuffer:scratchBuf
635 scratchBufferOffset:0];
637 if (use_fast_trace_bvh) {
638 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
641 sizeDataType:MTLDataTypeULong];
643 [accelEnc endEncoding];
647 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
648 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
649 accel_uncompressed.allocatedSize * 2;
651 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
653 [scratchBuf release];
655 [radiusBuffer release];
658 if (use_fast_trace_bvh) {
661 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
662 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
663 id<MTLAccelerationStructureCommandEncoder> accelEnc =
664 [accelCommands accelerationStructureCommandEncoder];
665 id<MTLAccelerationStructure> accel = [mtl_device
666 newAccelerationStructureWithSize:compressed_size];
667 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
668 toAccelerationStructure:accel];
669 [accelEnc endEncoding];
670 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
671 set_accel_struct(accel);
672 [accel_uncompressed release];
675 g_bvh_build_throttler.release(wired_size);
677 [accelCommands commit];
682 set_accel_struct(accel_uncompressed);
685 g_bvh_build_throttler.release(wired_size);
692 g_bvh_build_throttler.acquire(wired_size);
693 [accelCommands commit];
707bool BVHMetal::build_BLAS_pointcloud(
Progress &progress,
708 id<MTLDevice> mtl_device,
709 id<MTLCommandQueue> queue,
713 if (@available(macos 12.0, *)) {
721 BVH_status(
"Building pointcloud BLAS | %7d points | %s",
726 const size_t num_points = pointcloud->get_points().size();
727 const float3 *points = pointcloud->get_points().data();
728 const float *radius = pointcloud->get_radius().data();
732 size_t num_motion_steps = 1;
734 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
735 num_motion_steps = pointcloud->get_motion_steps();
738 const size_t num_aabbs = num_motion_steps * num_points;
740 MTLResourceOptions storage_mode;
741 if (mtl_device.hasUnifiedMemory) {
742 storage_mode = MTLResourceStorageModeShared;
745 storage_mode = MTLResourceStorageModeManaged;
749 id<MTLBuffer> aabbBuf = [mtl_device
750 newBufferWithLength:num_aabbs *
sizeof(MTLAxisAlignedBoundingBox)
752 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
755 size_t center_step = (num_motion_steps - 1) / 2;
756 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
757 if (step == center_step) {
759 for (
size_t j = 0; j < num_points; ++j) {
764 const size_t index =
step * num_points + j;
765 aabb_data[
index].min = (MTLPackedFloat3 &)
bounds.min;
766 aabb_data[
index].max = (MTLPackedFloat3 &)
bounds.max;
770 size_t attr_offset = (
step > center_step) ? step - 1 :
step;
773 for (
size_t j = 0; j < num_points; ++j) {
778 const size_t index =
step * num_points + j;
779 aabb_data[
index].min = (MTLPackedFloat3 &)
bounds.min;
780 aabb_data[
index].max = (MTLPackedFloat3 &)
bounds.max;
785 if (storage_mode == MTLResourceStorageModeManaged) {
786 [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
790 for (
size_t i=0; i<num_aabbs && i < 400; i++) {
791 MTLAxisAlignedBoundingBox& bb = aabb_data[i];
792 printf(
" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n",
int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
796 MTLAccelerationStructureGeometryDescriptor *geomDesc;
798 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
799 aabb_ptrs.reserve(num_motion_steps);
800 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
801 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
803 k.offset =
step * num_points *
sizeof(MTLAxisAlignedBoundingBox);
804 aabb_ptrs.push_back(k);
807 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
808 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor
descriptor];
809 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
810 count:aabb_ptrs.size()];
811 geomDescMotion.boundingBoxCount = num_points;
812 geomDescMotion.boundingBoxStride =
sizeof(aabb_data[0]);
813 geomDescMotion.intersectionFunctionTableOffset = 2;
818 geomDescMotion.allowDuplicateIntersectionFunctionInvocation =
false;
819 geomDescMotion.opaque =
true;
820 geomDesc = geomDescMotion;
823 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
824 [MTLAccelerationStructureBoundingBoxGeometryDescriptor
descriptor];
825 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
826 geomDescNoMotion.boundingBoxBufferOffset = 0;
827 geomDescNoMotion.boundingBoxCount =
int(num_aabbs);
828 geomDescNoMotion.boundingBoxStride =
sizeof(aabb_data[0]);
829 geomDescNoMotion.intersectionFunctionTableOffset = 2;
834 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation =
false;
835 geomDescNoMotion.opaque =
true;
836 geomDesc = geomDescNoMotion;
839 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
840 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
841 accelDesc.geometryDescriptors = @[ geomDesc ];
844 accelDesc.motionStartTime = 0.0f;
845 accelDesc.motionEndTime = 1.0f;
848 accelDesc.motionKeyframeCount = num_motion_steps;
850 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
852 if (!use_fast_trace_bvh) {
853 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
854 MTLAccelerationStructureUsagePreferFastBuild);
857 MTLAccelerationStructureSizes accelSizes = [mtl_device
858 accelerationStructureSizesWithDescriptor:accelDesc];
859 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
860 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
861 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
862 options:MTLResourceStorageModePrivate];
863 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
864 options:MTLResourceStorageModeShared];
865 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
866 id<MTLAccelerationStructureCommandEncoder> accelEnc =
867 [accelCommands accelerationStructureCommandEncoder];
869 [accelEnc refitAccelerationStructure:accel_struct
871 destination:accel_uncompressed
872 scratchBuffer:scratchBuf
873 scratchBufferOffset:0];
876 [accelEnc buildAccelerationStructure:accel_uncompressed
878 scratchBuffer:scratchBuf
879 scratchBufferOffset:0];
881 if (use_fast_trace_bvh) {
882 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
885 sizeDataType:MTLDataTypeULong];
887 [accelEnc endEncoding];
891 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
892 accel_uncompressed.allocatedSize * 2;
894 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
896 [scratchBuf release];
899 if (use_fast_trace_bvh) {
903 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
904 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
905 id<MTLAccelerationStructureCommandEncoder> accelEnc =
906 [accelCommands accelerationStructureCommandEncoder];
907 id<MTLAccelerationStructure> accel = [mtl_device
908 newAccelerationStructureWithSize:compressed_size];
909 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
910 toAccelerationStructure:accel];
911 [accelEnc endEncoding];
912 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
913 set_accel_struct(accel);
914 [accel_uncompressed release];
917 g_bvh_build_throttler.release(wired_size);
919 [accelCommands commit];
924 set_accel_struct(accel_uncompressed);
927 g_bvh_build_throttler.release(wired_size);
934 g_bvh_build_throttler.acquire(wired_size);
935 [accelCommands commit];
941bool BVHMetal::build_BLAS(
Progress &progress,
942 id<MTLDevice> mtl_device,
943 id<MTLCommandQueue> queue,
946 assert(objects.size() == 1 && geometry.size() == 1);
953 return build_BLAS_mesh(progress, mtl_device, queue, geom,
refit);
955 return build_BLAS_hair(progress, mtl_device, queue, geom,
refit);
957 return build_BLAS_pointcloud(progress, mtl_device, queue, geom,
refit);
964bool BVHMetal::build_TLAS(
Progress &progress,
965 id<MTLDevice> mtl_device,
966 id<MTLCommandQueue> queue,
970 g_bvh_build_throttler.wait_for_all();
972 if (@available(macos 12.0, *)) {
974 auto make_null_BLAS = [](id<MTLDevice> mtl_device,
975 id<MTLCommandQueue>
queue) -> id<MTLAccelerationStructure> {
976 MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
977 if (mtl_device.hasUnifiedMemory) {
978 storage_mode = MTLResourceStorageModeShared;
981 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:
sizeof(
float3)
options:storage_mode];
984 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
985 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
986 geomDesc.vertexBuffer = nullBuf;
987 geomDesc.vertexBufferOffset = 0;
988 geomDesc.vertexStride =
sizeof(
float3);
989 geomDesc.indexBuffer = nullBuf;
990 geomDesc.indexBufferOffset = 0;
991 geomDesc.indexType = MTLIndexTypeUInt32;
992 geomDesc.triangleCount = 0;
993 geomDesc.intersectionFunctionTableOffset = 0;
994 geomDesc.opaque =
true;
995 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
997 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
998 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
999 accelDesc.geometryDescriptors = @[ geomDesc ];
1000 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1002 MTLAccelerationStructureSizes accelSizes = [mtl_device
1003 accelerationStructureSizesWithDescriptor:accelDesc];
1004 id<MTLAccelerationStructure> accel_struct = [mtl_device
1005 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1006 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1007 options:MTLResourceStorageModePrivate];
1008 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1009 options:MTLResourceStorageModeShared];
1010 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
1011 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1012 [accelCommands accelerationStructureCommandEncoder];
1013 [accelEnc buildAccelerationStructure:accel_struct
1015 scratchBuffer:scratchBuf
1016 scratchBufferOffset:0];
1017 [accelEnc endEncoding];
1018 [accelCommands commit];
1019 [accelCommands waitUntilCompleted];
1022 [scratchBuf release];
1026 return accel_struct;
1030 uint32_t num_motion_transforms = 0;
1031 for (
Object *ob : objects) {
1034 if (ob->use_motion()) {
1035 num_motion_transforms +=
max((
size_t)1, ob->get_motion().size());
1038 num_motion_transforms++;
1042 if (num_instances == 0) {
1047 BVH_status(
"Building TLAS | %7d instances", (
int)num_instances);
1052 NSMutableArray *all_blas = [NSMutableArray
array];
1053 unordered_map<BVHMetal const *, int> instance_mapping;
1056 auto get_blas_index = [&](BVHMetal
const *blas) {
1057 auto it = instance_mapping.find(blas);
1058 if (it != instance_mapping.end()) {
1062 int blas_index = (
int)[all_blas
count];
1063 instance_mapping[blas] = blas_index;
1064 if (@available(macos 12.0, *)) {
1065 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1071 MTLResourceOptions storage_mode;
1072 if (mtl_device.hasUnifiedMemory) {
1073 storage_mode = MTLResourceStorageModeShared;
1076 storage_mode = MTLResourceStorageModeManaged;
1079 size_t instance_size;
1081 instance_size =
sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1084 instance_size =
sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1088 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1090 id<MTLBuffer> motion_transforms_buf = nil;
1091 MTLPackedFloat4x3 *motion_transforms =
nullptr;
1092 if (motion_blur && num_motion_transforms) {
1093 motion_transforms_buf = [mtl_device
1094 newBufferWithLength:num_motion_transforms *
sizeof(MTLPackedFloat4x3)
1096 motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1100 uint32_t motion_transform_index = 0;
1103 blas_array.reserve(num_instances);
1105 for (
Object *ob : objects) {
1107 Geometry const *geom = ob->get_geometry();
1108 BVHMetal
const *blas =
static_cast<BVHMetal
const *
>(geom->
bvh);
1109 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1117 null_BLAS = make_null_BLAS(mtl_device, queue);
1119 blas_array.push_back(null_BLAS);
1122 blas_array.push_back(blas->accel_struct);
1125 uint32_t accel_struct_index = get_blas_index(blas);
1140 int currIndex = instance_index++;
1161 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1162 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1163 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1165 desc.accelerationStructureIndex = accel_struct_index;
1166 desc.userID = primitive_offset;
1168 desc.motionStartTime = 0.0f;
1169 desc.motionEndTime = 1.0f;
1170 desc.motionTransformsStartIndex = motion_transform_index;
1171 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1172 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1173 desc.intersectionFunctionTableOffset = 0;
1175 int key_count = ob->get_motion().size();
1177 desc.motionTransformsCount = key_count;
1179 Transform *keys = ob->get_motion().data();
1180 for (
int i = 0; i < key_count; i++) {
1181 float *t = (
float *)&motion_transforms[motion_transform_index++];
1183 auto src = (
float const *)&keys[i];
1184 for (
int i = 0; i < 12; i++) {
1185 t[i] = src[(i / 3) + 4 * (i % 3)];
1190 desc.motionTransformsCount = 1;
1192 float *t = (
float *)&motion_transforms[motion_transform_index++];
1193 if (ob->get_geometry()->is_instanced()) {
1195 auto src = (
float const *)&ob->get_tfm();
1196 for (
int i = 0; i < 12; i++) {
1197 t[i] = src[(i / 3) + 4 * (i % 3)];
1202 t[0] = t[4] = t[8] = 1.0f;
1207 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1208 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1209 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1211 desc.accelerationStructureIndex = accel_struct_index;
1212 desc.userID = primitive_offset;
1214 desc.intersectionFunctionTableOffset = 0;
1215 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1217 float *t = (
float *)&desc.transformationMatrix;
1218 if (ob->get_geometry()->is_instanced()) {
1220 auto src = (
float const *)&ob->get_tfm();
1221 for (
int i = 0; i < 12; i++) {
1222 t[i] = src[(i / 3) + 4 * (i % 3)];
1227 t[0] = t[4] = t[8] = 1.0f;
1232 if (storage_mode == MTLResourceStorageModeManaged) {
1233 [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
1234 if (motion_transforms_buf) {
1235 [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
1236 assert(num_motion_transforms == motion_transform_index);
1240 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1241 [MTLInstanceAccelerationStructureDescriptor
descriptor];
1242 accelDesc.instanceCount = num_instances;
1243 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1244 accelDesc.instanceDescriptorBuffer = instanceBuf;
1245 accelDesc.instanceDescriptorBufferOffset = 0;
1246 accelDesc.instanceDescriptorStride = instance_size;
1247 accelDesc.instancedAccelerationStructures = all_blas;
1250 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1251 accelDesc.motionTransformBuffer = motion_transforms_buf;
1252 accelDesc.motionTransformCount = num_motion_transforms;
1255 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1256 if (!use_fast_trace_bvh) {
1257 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1258 MTLAccelerationStructureUsagePreferFastBuild);
1261 MTLAccelerationStructureSizes accelSizes = [mtl_device
1262 accelerationStructureSizesWithDescriptor:accelDesc];
1263 id<MTLAccelerationStructure> accel = [mtl_device
1264 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1265 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1266 options:MTLResourceStorageModePrivate];
1267 id<MTLCommandBuffer> accelCommands = [
queue commandBuffer];
1268 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1269 [accelCommands accelerationStructureCommandEncoder];
1271 [accelEnc refitAccelerationStructure:accel_struct
1274 scratchBuffer:scratchBuf
1275 scratchBufferOffset:0];
1278 [accelEnc buildAccelerationStructure:accel
1280 scratchBuffer:scratchBuf
1281 scratchBufferOffset:0];
1283 [accelEnc endEncoding];
1284 [accelCommands commit];
1285 [accelCommands waitUntilCompleted];
1287 if (motion_transforms_buf) {
1288 [motion_transforms_buf release];
1290 [instanceBuf release];
1291 [scratchBuf release];
1294 set_accel_struct(accel);
1296 unique_blas_array.clear();
1297 unique_blas_array.reserve(all_blas.count);
1298 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger,
BOOL *) {
1299 unique_blas_array.push_back(blas);
1307bool BVHMetal::build(
Progress &progress,
1308 id<MTLDevice> mtl_device,
1309 id<MTLCommandQueue> queue,
1312 if (@available(macos 12.0, *)) {
1323 set_accel_struct(nil);
1329 return build_BLAS(progress, mtl_device, queue,
refit);
1332 return build_TLAS(progress, mtl_device, queue,
refit);
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a point
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE btScalar length(const btQuaternion &q)
Return the length of a quaternion.
Attribute * find(ustring name) const
Curve get_curve(size_t i) const
size_t curve_segment_offset
size_t num_curves() const
CurveShapeType curve_shape
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
@ ATTR_STD_MOTION_VERTEX_POSITION
ccl_device_inline float4 mask(const int4 mask, const float4 a)
ThreadQueue * queue
all scheduled work for the cpu
T step(const T &edge, const T &value)
unsigned __int64 uint64_t
size_t num_triangles() const
Point get_point(int i) const
size_t num_points() const
VecBase< float, 4 > float4
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex