16# define MAX_SAMPLE_BUFFER_LENGTH 4096
25 MetalDeviceQueue(MetalDevice *device);
26 ~MetalDeviceQueue()
override;
37 const DeviceKernelArguments &args)
override;
56 id<MTLComputeCommandEncoder> get_compute_encoder(
DeviceKernel kernel);
57 id<MTLBlitCommandEncoder> get_blit_encoder();
59 MetalDevice *metal_device_;
60 MetalBufferPool temp_buffer_pool_;
62 API_AVAILABLE(macos(11.0), ios(14.0))
63 MTLCommandBufferDescriptor *command_buffer_desc_ =
nullptr;
64 id<MTLDevice> mtlDevice_ = nil;
65 id<MTLCommandQueue> mtlCommandQueue_ = nil;
66 id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
67 id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
68 id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
69 API_AVAILABLE(macos(10.14), ios(14.0))
70 id<MTLSharedEvent> shared_event_ = nil;
71 API_AVAILABLE(macos(10.14), ios(14.0))
72 MTLSharedEventListener *shared_event_listener_ = nil;
75 dispatch_queue_t event_queue_;
76 dispatch_semaphore_t wait_semaphore_;
79 uint64_t command_buffers_submitted_ = 0;
80 uint64_t command_buffers_completed_ = 0;
83 void close_compute_encoder();
84 void close_blit_encoder();
86 bool verbose_tracing_ =
false;
87 bool label_command_encoders_ =
false;
96 std::vector<TimingData> command_encoder_labels_;
97 bool profiling_enabled_ =
false;
100 std::atomic<uint64_t> counter_sample_buffer_curr_idx_ = 0;
102 void flush_timing_stats();
110 double last_completion_time_ = 0.0;
114 id<MTLCaptureScope> mtlCaptureScope_ = nil;
116 int capture_dispatch_counter_ = 0;
117 bool capture_samples_ =
false;
118 int capture_reset_counter_ = 0;
119 bool is_capturing_ =
false;
120 bool is_capturing_to_disk_ =
false;
121 bool has_captured_to_disk_ =
false;
unsigned long long int uint64_t
virtual int num_concurrent_busy_states(const size_t state_size) const =0
virtual void copy_from_device(device_memory &mem)=0
virtual bool supports_local_atomic_sort() const
virtual int num_concurrent_states(const size_t state_size) const =0
virtual bool enqueue(DeviceKernel kernel, const int work_size, const DeviceKernelArguments &args)=0
virtual void init_execution()=0
virtual void copy_to_device(device_memory &mem)=0
virtual unique_ptr< DeviceGraphicsInterop > graphics_interop_create()
virtual int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const
virtual bool synchronize()=0
virtual void * native_queue()
virtual void zero_to_device(device_memory &mem)=0
#define CCL_NAMESPACE_END
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size