24MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
25 :
DeviceQueue(device), metal_device_(device), stats_(device->stats)
28 command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc]
init];
29 command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
31 mtlDevice_ = device->mtlDevice;
32 mtlCommandQueue_ = device->mtlComputeCommandQueue;
34 shared_event_ = [mtlDevice_ newSharedEvent];
38 event_queue_ = dispatch_queue_create(
"com.cycles.metal.event_queue",
nullptr);
39 shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
41 wait_semaphore_ = dispatch_semaphore_create(0);
43 if (
auto *
str = getenv(
"CYCLES_METAL_PROFILING")) {
44 if (atoi(
str) && [mtlDevice_ supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary])
47 profiling_enabled_ =
true;
48 label_command_encoders_ =
true;
51 if (getenv(
"CYCLES_METAL_DEBUG")) {
53 verbose_tracing_ =
true;
54 label_command_encoders_ =
true;
61void MetalDeviceQueue::setup_capture()
65 if (
auto *capture_kernel_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
67 capture_kernel_ =
DeviceKernel(atoi(capture_kernel_str));
70 capture_dispatch_counter_ = 0;
71 if (
auto *capture_dispatch_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_DISPATCH")) {
72 capture_dispatch_counter_ = atoi(capture_dispatch_str);
74 printf(
"Capture dispatch number %d\n", capture_dispatch_counter_);
77 else if (
auto *capture_samples_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_SAMPLES")) {
80 capture_samples_ =
true;
81 capture_reset_counter_ = atoi(capture_samples_str);
83 capture_dispatch_counter_ = INT_MAX;
84 if (
auto *capture_limit_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_LIMIT")) {
86 capture_dispatch_counter_ = atoi(capture_limit_str);
89 printf(
"Capturing sample block %d (dispatch limit: %d)\n",
90 capture_reset_counter_,
91 capture_dispatch_counter_);
99 MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
100 mtlCaptureScope_ = [captureManager newCaptureScopeWithDevice:mtlDevice_];
101 mtlCaptureScope_.label = [NSString stringWithFormat:
@"Cycles kernel dispatch"];
102 [captureManager setDefaultCaptureScope:mtlCaptureScope_];
104 label_command_encoders_ =
true;
106 if (
auto *capture_url = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_URL")) {
107 if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
109 MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc]
init];
110 captureDescriptor.captureObject = mtlCaptureScope_;
111 captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
112 captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
115 if (![captureManager startCaptureWithDescriptor:captureDescriptor
error:&
error]) {
116 NSString *err = [
error localizedDescription];
117 printf(
"Start capture failed: %s\n", [err UTF8String]);
120 printf(
"Capture started (URL: %s)\n", capture_url);
121 is_capturing_to_disk_ =
true;
125 printf(
"Capture to file is not supported\n");
130void MetalDeviceQueue::update_capture(
DeviceKernel kernel)
134 capture_dispatch_counter_ -= 1;
142 if (capture_dispatch_counter_ < 0) {
148 if (kernel == capture_kernel_) {
150 if (capture_dispatch_counter_ == 0) {
153 capture_dispatch_counter_ -= 1;
158 if (capture_samples_) {
160 if (capture_reset_counter_ == 0) {
165 capture_reset_counter_ -= 1;
171void MetalDeviceQueue::begin_capture()
174 if (mtlCommandBuffer_) {
177 [mtlCaptureScope_ beginScope];
178 printf(
"[mtlCaptureScope_ beginScope]\n");
179 is_capturing_ =
true;
182void MetalDeviceQueue::end_capture()
184 [mtlCaptureScope_ endScope];
185 is_capturing_ =
false;
186 printf(
"[mtlCaptureScope_ endScope]\n");
188 if (is_capturing_to_disk_) {
189 [[MTLCaptureManager sharedCaptureManager] stopCapture];
190 has_captured_to_disk_ =
true;
191 is_capturing_to_disk_ =
false;
192 is_capturing_ =
false;
193 printf(
"Capture stopped\n");
197MetalDeviceQueue::~MetalDeviceQueue()
201 assert(mtlCommandBuffer_ == nil);
202 assert(command_buffers_submitted_ == command_buffers_completed_);
204 close_compute_encoder();
205 close_blit_encoder();
207 [shared_event_listener_ release];
208 [shared_event_ release];
209 [command_buffer_desc_ release];
211 if (mtlCaptureScope_) {
212 [mtlCaptureScope_ release];
219 int64_t num_pathtracing_dispatches = 0;
221 auto &stat = timing_stats_[
i];
225 num_dispatches += stat.num_dispatches;
226 num_pathtracing_dispatches += pathtracing_kernel ? stat.num_dispatches : 0;
228 bool has_extra = (num_pathtracing_dispatches && num_dispatches > num_pathtracing_dispatches);
230 if (num_dispatches) {
231 printf(
"\nMetal %sdispatch stats:\n", num_pathtracing_dispatches ?
"path-tracing " :
"");
239 auto divider = string(header.length(),
'-');
240 printf(
"%s\n%s\n%s\n", divider.c_str(), header.c_str(), divider.c_str());
243 auto &stat = timing_stats_[
i];
247 if ((pathtracing_kernel && num_pathtracing_dispatches) || stat.num_dispatches > 0) {
248 printf(
"%-40s %16llu %12llu %12llu %9.4f %9.2f\n",
250 stat.total_work_size,
252 stat.total_work_size / stat.num_dispatches,
257 printf(
"%s\n", divider.c_str());
260 printf(
"%s\n", divider.c_str());
261 printf(
"%-40s %16s %12llu %12s %9.4f %9.2f\n",
"",
"", num_dispatches,
"",
total_time, 100.0);
262 printf(
"%s\n\n", divider.c_str());
266int MetalDeviceQueue::num_concurrent_states(
const size_t state_size)
const
276 if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
278 size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
279 size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];
284 size_t min_headroom = std::max(system_ram / 8,
size_t(1024 * 1024 * 1024));
285 size_t total_state_size =
result * state_size;
286 if (max_recommended_working_set - allocated_so_far - total_state_size * 2 >= min_headroom) {
288 metal_printf(
"Doubling state count to exploit available RAM (new size = %d)\n",
result);
294int MetalDeviceQueue::num_concurrent_busy_states(
const size_t state_size)
const
297 return num_concurrent_states(state_size) / 4;
300int MetalDeviceQueue::num_sort_partitions(
int max_num_paths,
uint max_scene_shaders)
const
302 int sort_partition_elements = MetalInfo::optimal_sort_partition_elements();
307 if (max_scene_shaders < 300 && sort_partition_elements > 0) {
308 return max(max_num_paths / sort_partition_elements, 1);
315bool MetalDeviceQueue::supports_local_atomic_sort()
const
317 return metal_device_->use_local_atomic_sort();
320void MetalDeviceQueue::init_execution()
323 metal_device_->load_texture_info();
333 update_capture(kernel);
335 if (metal_device_->have_error()) {
342 id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
344 if (profiling_enabled_) {
345 command_encoder_labels_.push_back({kernel,
work_size, current_encoder_idx_});
349 size_t arg_buffer_length = 0;
350 for (
size_t i = 0;
i < args.
count;
i++) {
351 size_t size_in_bytes = args.
sizes[
i];
352 arg_buffer_length =
round_up(arg_buffer_length, size_in_bytes) + size_in_bytes;
355 arg_buffer_length =
round_up(arg_buffer_length, 256);
358 size_t globals_offsets = arg_buffer_length;
360 arg_buffer_length =
round_up(arg_buffer_length, 256);
363 size_t metal_offsets = arg_buffer_length;
364 arg_buffer_length += metal_device_->mtlAncillaryArgEncoder.encodedLength;
365 arg_buffer_length =
round_up(arg_buffer_length,
366 metal_device_->mtlAncillaryArgEncoder.alignment);
370 memset(init_arg_buffer, 0, arg_buffer_length);
373 size_t bytes_written = 0;
374 for (
size_t i = 0;
i < args.
count;
i++) {
375 size_t size_in_bytes = args.
sizes[
i];
376 bytes_written =
round_up(bytes_written, size_in_bytes);
378 memcpy(init_arg_buffer + bytes_written, args.
values[
i], size_in_bytes);
380 bytes_written += size_in_bytes;
387 size_t plain_old_launch_data_size =
sizeof(
KernelParamsMetal) - plain_old_launch_data_offset;
388 memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
389 (uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
390 plain_old_launch_data_size);
393 id<MTLBuffer> arg_buffer = temp_buffer_pool_.get_buffer(
394 mtlDevice_, mtlCommandBuffer_, arg_buffer_length, init_arg_buffer, stats_);
398 for (
size_t i = 0;
i < args.
count;
i++) {
399 size_t size_in_bytes = args.
sizes[
i];
400 bytes_written =
round_up(bytes_written, size_in_bytes);
402 [metal_device_->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
403 offset:bytes_written];
404 if (MetalDevice::MetalMem *mmem = *(MetalDevice::MetalMem **)args.
values[
i]) {
405 [mtlComputeCommandEncoder useResource:mmem->mtlBuffer
406 usage:MTLResourceUsageRead | MTLResourceUsageWrite];
407 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
412 if (@available(macos 12.0, *)) {
413 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:0];
417 bytes_written += size_in_bytes;
421 [metal_device_->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
422 offset:globals_offsets];
424 if (label_command_encoders_) {
426 mtlComputeCommandEncoder.label = [NSString
427 stringWithFormat:
@"Metal queue launch %s, work_size %d",
435 for (
size_t offset = 0; offset < pointer_block_end; offset +=
sizeof(
device_ptr)) {
436 int pointer_index = int(offset /
sizeof(
device_ptr));
437 MetalDevice::MetalMem *mmem = *(
438 MetalDevice::MetalMem **)((uint8_t *)&metal_device_->launch_params + offset);
439 if (mmem && mmem->mem && (mmem->mtlBuffer || mmem->mtlTexture)) {
440 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
442 atIndex:pointer_index];
445 if (@available(macos 12.0, *)) {
446 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:nil
448 atIndex:pointer_index];
454 if (!active_pipelines_[kernel].
update(metal_device_, kernel)) {
455 metal_device_->set_error(
459 MetalDispatchPipeline &active_pipeline = active_pipelines_[kernel];
462 [metal_device_->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
463 [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_2d
466 [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
469 [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->buffer_bindings_1d
473 if (@available(macos 12.0, *)) {
475 if (id<MTLAccelerationStructure> accel_struct = metal_device_->accel_struct) {
476 [metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
477 [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
479 atIndex:(METALRT_TABLE_NUM + 4)];
482 for (
int table = 0; table < METALRT_TABLE_NUM; table++) {
483 if (active_pipeline.intersection_func_table[table]) {
484 [active_pipeline.intersection_func_table[table] setBuffer:arg_buffer
485 offset:globals_offsets
487 [metal_device_->mtlAncillaryArgEncoder
488 setIntersectionFunctionTable:active_pipeline.intersection_func_table[table]
490 [mtlComputeCommandEncoder useResource:active_pipeline.intersection_func_table[table]
491 usage:MTLResourceUsageRead];
494 [metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
499 bytes_written = metal_offsets + metal_device_->mtlAncillaryArgEncoder.encodedLength;
502 [mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
503 [mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
504 [mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
507 if (@available(macos 12.0, *)) {
509 if (id<MTLAccelerationStructure> accel_struct = metal_device_->accel_struct) {
511 [mtlComputeCommandEncoder useResource:accel_struct usage:MTLResourceUsageRead];
512 if (metal_device_->blas_buffer) {
513 [mtlComputeCommandEncoder useResource:metal_device_->blas_buffer
514 usage:MTLResourceUsageRead];
516 [mtlComputeCommandEncoder useResources:metal_device_->unique_blas_array.data()
517 count:metal_device_->unique_blas_array.size()
518 usage:MTLResourceUsageRead];
523 [mtlComputeCommandEncoder setComputePipelineState:active_pipeline.pipeline];
526 const int num_threads_per_block = active_pipeline.num_threads_per_block;
528 int shared_mem_bytes = 0;
541 shared_mem_bytes = (int)
round_up((num_threads_per_block + 1) *
sizeof(
int), 16);
546 int key_count = metal_device_->launch_params.data.max_shaders;
547 shared_mem_bytes = (int)
round_up(key_count *
sizeof(
int), 16);
555 if (shared_mem_bytes) {
556 assert(shared_mem_bytes <= 32 * 1024);
557 [mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
560 MTLSize size_threads_per_dispatch = MTLSizeMake(
work_size, 1, 1);
561 MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
562 [mtlComputeCommandEncoder dispatchThreads:size_threads_per_dispatch
563 threadsPerThreadgroup:size_threads_per_threadgroup];
565 [mtlCommandBuffer_ addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
568 if (command_buffer.status != MTLCommandBufferStatusCompleted) {
570 int(command_buffer.status));
572 if (command_buffer.error) {
574 const char *errCStr = [[NSString stringWithFormat:
@"%@", command_buffer.error]
577 kernel_type_as_string(active_pipeline.pso_type),
583 metal_device_->set_error(
str);
587 if (verbose_tracing_ || is_capturing_) {
592 if (verbose_tracing_) {
595 "_____________________________________.____________________.______________._________"
597 "______________________________________\n");
600 printf(
"%-40s| %7d threads |%5.2fms | buckets [",
603 last_completion_time_ * 1000.0);
604 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
605 for (
auto &it : metal_device_->metal_mem_map) {
606 const string c_integrator_queue_counter =
"integrator_queue_counter";
607 if (it.first->name == c_integrator_queue_counter) {
609 it.first->host_pointer)
612 printf(
"%s%d",
i == 0 ?
"" :
",", queue_counter->num_queued[
i]);
622 return !(metal_device_->have_error());
626void MetalDeviceQueue::flush_timing_stats()
628 for (
auto label : command_encoder_labels_) {
629 TimingStats &stat = timing_stats_[label.kernel];
631 double completion_time_gpu;
632 NSData *computeTimeStamps = [metal_device_->mtlCounterSampleBuffer
633 resolveCounterRange:NSMakeRange(label.timing_id, 2)];
634 MTLCounterResultTimestamp *timestamps = (MTLCounterResultTimestamp *)(computeTimeStamps.bytes);
636 uint64_t begTime = timestamps[0].timestamp;
637 uint64_t endTime = timestamps[1].timestamp;
638 completion_time_gpu = (endTime - begTime) / (
double)NSEC_PER_SEC;
640 stat.num_dispatches++;
641 stat.total_time += completion_time_gpu;
642 stat.total_work_size += label.work_size;
643 last_completion_time_ = completion_time_gpu;
645 command_encoder_labels_.clear();
648bool MetalDeviceQueue::synchronize()
651 if (has_captured_to_disk_ || metal_device_->have_error()) {
655 close_compute_encoder();
656 close_blit_encoder();
658 if (mtlCommandBuffer_) {
661 uint64_t shared_event_id_ = this->shared_event_id_++;
663 __block dispatch_semaphore_t block_sema = wait_semaphore_;
664 [shared_event_ notifyListener:shared_event_listener_
665 atValue:shared_event_id_
666 block:^(id<MTLSharedEvent> ,
uint64_t ) {
667 dispatch_semaphore_signal(block_sema);
670 [mtlCommandBuffer_ encodeSignalEvent:shared_event_ value:shared_event_id_];
671 [mtlCommandBuffer_ commit];
672 dispatch_semaphore_wait(wait_semaphore_, DISPATCH_TIME_FOREVER);
674 [mtlCommandBuffer_ release];
676 temp_buffer_pool_.process_command_buffer_completion(mtlCommandBuffer_);
677 metal_device_->flush_delayed_free_list();
679 mtlCommandBuffer_ = nil;
680 flush_timing_stats();
683 return !(metal_device_->have_error());
690 if (metal_device_->have_error()) {
702 metal_device_->mem_alloc(mem);
708 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
709 MetalDevice::MetalMem &mmem = *metal_device_->metal_mem_map.at(&mem);
710 if (mmem.mtlBuffer) {
711 id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
712 [blitEncoder fillBuffer:mmem.mtlBuffer range:NSMakeRange(mmem.offset, mmem.size) value:0];
715 metal_device_->mem_zero(mem);
723 if (metal_device_->have_error()) {
733 metal_device_->mem_alloc(mem);
749 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
752 for (
auto &it : metal_device_->metal_mem_map) {
755 MTLResourceUsage usage = MTLResourceUsageRead;
757 usage |= MTLResourceUsageWrite;
760 if (it.second->mtlBuffer) {
762 [mtlComputeEncoder_ useResource:it.second->mtlBuffer usage:usage];
764 else if (it.second->mtlTexture) {
766 [mtlComputeEncoder_ useResource:it.second->mtlTexture usage:usage | MTLResourceUsageSample];
771 [mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
772 [mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
773 [mtlComputeEncoder_ useResource:metal_device_->buffer_bindings_1d usage:MTLResourceUsageRead];
776id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(
DeviceKernel kernel)
780 if (profiling_enabled_) {
782 close_compute_encoder();
785 if (mtlComputeEncoder_) {
786 if (mtlComputeEncoder_.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
787 MTLDispatchTypeSerial)
790 prepare_resources(kernel);
792 return mtlComputeEncoder_;
794 close_compute_encoder();
797 close_blit_encoder();
799 if (!mtlCommandBuffer_) {
800 mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
801 [mtlCommandBuffer_ retain];
804 if (profiling_enabled_) {
805 MTLComputePassDescriptor *desc = [[MTLComputePassDescriptor alloc]
init];
807 current_encoder_idx_ = (counter_sample_buffer_curr_idx_.fetch_add(2) %
808 MAX_SAMPLE_BUFFER_LENGTH);
809 [desc.sampleBufferAttachments[0] setSampleBuffer:metal_device_->mtlCounterSampleBuffer];
810 [desc.sampleBufferAttachments[0] setStartOfEncoderSampleIndex:current_encoder_idx_];
811 [desc.sampleBufferAttachments[0] setEndOfEncoderSampleIndex:current_encoder_idx_ + 1];
813 [desc setDispatchType:concurrent ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial];
815 mtlComputeEncoder_ = [mtlCommandBuffer_ computeCommandEncoderWithDescriptor:desc];
818 mtlComputeEncoder_ = [mtlCommandBuffer_
819 computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
820 MTLDispatchTypeSerial];
823 [mtlComputeEncoder_ retain];
827 prepare_resources(kernel);
829 return mtlComputeEncoder_;
832id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
834 if (mtlBlitEncoder_) {
835 return mtlBlitEncoder_;
838 close_compute_encoder();
840 if (!mtlCommandBuffer_) {
841 mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
842 [mtlCommandBuffer_ retain];
845 mtlBlitEncoder_ = [mtlCommandBuffer_ blitCommandEncoder];
846 [mtlBlitEncoder_ retain];
847 return mtlBlitEncoder_;
850void MetalDeviceQueue::close_compute_encoder()
852 if (mtlComputeEncoder_) {
853 [mtlComputeEncoder_ endEncoding];
854 [mtlComputeEncoder_ release];
855 mtlComputeEncoder_ = nil;
859void MetalDeviceQueue::close_blit_encoder()
861 if (mtlBlitEncoder_) {
862 [mtlBlitEncoder_ endEncoding];
863 [mtlBlitEncoder_ release];
864 mtlBlitEncoder_ = nil;
868void *MetalDeviceQueue::native_queue()
870 return mtlCommandQueue_;
875 return make_unique<MetalDeviceGraphicsInterop>(
this);
unsigned long long int uint64_t
device_ptr device_pointer
#define CCL_NAMESPACE_END
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
#define assert(assertion)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
@ DEVICE_KERNEL_INTEGRATOR_NUM
@ DEVICE_KERNEL_INTEGRATOR_RESET
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
#define VLOG_DEVICE_STATS
static void error(const char *str)
static void update(bNodeTree *ntree)
static void init(bNodeTree *, bNode *node)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
size_t system_physical_ram()
ccl_device_inline size_t round_up(const size_t x, const size_t multiple)