14# include <IOKit/IOKitLib.h>
21string MetalInfo::get_device_name(id<MTLDevice> device)
23 string device_name = [device.name UTF8String];
26 int gpu_core_count = get_apple_gpu_core_count(device);
27 device_name +=
string_printf(gpu_core_count ?
" (GPU - %d cores)" :
" (GPU)", gpu_core_count);
32int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
35 if (@available(macos 12.0, *)) {
36 io_service_t gpu_service = IOServiceGetMatchingService(
37 kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
38 if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
39 gpu_service, CFSTR(
"gpu-core-count"),
nullptr, 0))
41 if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
42 CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
50AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
52 const char *device_name = [device.name UTF8String];
53 if (strstr(device_name,
"M1")) {
56 if (strstr(device_name,
"M2")) {
57 return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
59 if (strstr(device_name,
"M3")) {
65int MetalInfo::optimal_sort_partition_elements()
67 if (
auto *
str = getenv(
"CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
81 static bool already_enumerated =
false;
83 if (already_enumerated) {
84 return usable_devices;
87 metal_printf(
"Usable Metal devices:\n");
88 for (id<MTLDevice> device
in MTLCopyAllDevices()) {
89 string device_name = get_device_name(device);
92 if (@available(macos 12.2, *)) {
93 const char *device_name_char = [device.name UTF8String];
94 if (!(strstr(device_name_char,
"Intel") || strstr(device_name_char,
"AMD")) &&
95 strstr(device_name_char,
"Apple"))
100 usable = [device hasUnifiedMemory];
105 metal_printf(
"- %s\n", device_name.c_str());
107 usable_devices.push_back(device);
110 metal_printf(
" (skipping \"%s\")\n", device_name.c_str());
113 if (usable_devices.empty()) {
114 metal_printf(
" No usable Metal devices found\n");
116 already_enumerated =
true;
118 return usable_devices;
121id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
122 id<MTLCommandBuffer> command_buffer,
127 id<MTLBuffer> buffer = nil;
131 for (MetalBufferListEntry &bufferEntry : temp_buffers) {
132 if (bufferEntry.buffer.length ==
length && bufferEntry.command_buffer == nil) {
133 buffer = bufferEntry.buffer;
134 bufferEntry.command_buffer = command_buffer;
141 buffer = [device newBufferWithLength:
length options:MTLResourceStorageModeShared];
143 total_temp_mem_size += buffer.allocatedSize;
144 temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
150 memcpy(buffer.contents, pointer,
length);
156void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
161 for (MetalBufferListEntry &buffer_entry : temp_buffers) {
162 if (buffer_entry.command_buffer == command_buffer) {
163 buffer_entry.command_buffer = nil;
168MetalBufferPool::~MetalBufferPool()
172 for (MetalBufferListEntry &buffer_entry : temp_buffers) {
173 total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
174 [buffer_entry.buffer release];
175 buffer_entry.buffer = nil;
177 temp_buffers.clear();
void mem_alloc(const size_t size)
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
#define assert(assertion)
float length(VecOp< float, D >) RET
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock