Blender V4.5
util.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include "device/metal/util.h"
9# include "util/md5.h"
10# include "util/path.h"
11# include "util/string.h"
12# include "util/time.h"
13
14# include <IOKit/IOKitLib.h>
15# include <ctime>
16# include <pwd.h>
17# include <sys/shm.h>
18
20
21string MetalInfo::get_device_name(id<MTLDevice> device)
22{
23 string device_name = [device.name UTF8String];
24
25 /* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
26 int gpu_core_count = get_apple_gpu_core_count(device);
27 device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
28
29 return device_name;
30}
31
32int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
33{
34 int core_count = 0;
35 if (@available(macos 12.0, *)) {
36 io_service_t gpu_service = IOServiceGetMatchingService(
37 kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
38 if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
39 gpu_service, CFSTR("gpu-core-count"), nullptr, 0))
40 {
41 if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
42 CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
43 }
44 CFRelease(numberRef);
45 }
46 }
47 return core_count;
48}
49
50AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
51{
52 const char *device_name = [device.name UTF8String];
53 if (strstr(device_name, "M1")) {
54 return APPLE_M1;
55 }
56 if (strstr(device_name, "M2")) {
57 return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
58 }
59 if (strstr(device_name, "M3")) {
60 return APPLE_M3;
61 }
62 return APPLE_UNKNOWN;
63}
64
65int MetalInfo::optimal_sort_partition_elements()
66{
67 if (auto *str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
68 return atoi(str);
69 }
70
71 /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
72 * sorting each partition by material. Partitioning into chunks of 65536 elements results in an
73 * overall render time speedup of up to 15%. */
74
75 return 65536;
76}
77
78const vector<id<MTLDevice>> &MetalInfo::get_usable_devices()
79{
80 static vector<id<MTLDevice>> usable_devices;
81 static bool already_enumerated = false;
82
83 if (already_enumerated) {
84 return usable_devices;
85 }
86
87 metal_printf("Usable Metal devices:\n");
88 for (id<MTLDevice> device in MTLCopyAllDevices()) {
89 string device_name = get_device_name(device);
90 bool usable = false;
91
92 if (@available(macos 12.2, *)) {
93 const char *device_name_char = [device.name UTF8String];
94 if (!(strstr(device_name_char, "Intel") || strstr(device_name_char, "AMD")) &&
95 strstr(device_name_char, "Apple"))
96 {
97 /* TODO: Implement a better way to identify device vendor instead of relying on name. */
98 /* We only support Apple Silicon GPUs which all have unified memory, but explicitly check
99 * just in case it ever changes. */
100 usable = [device hasUnifiedMemory];
101 }
102 }
103
104 if (usable) {
105 metal_printf("- %s\n", device_name.c_str());
106 [device retain];
107 usable_devices.push_back(device);
108 }
109 else {
110 metal_printf(" (skipping \"%s\")\n", device_name.c_str());
111 }
112 }
113 if (usable_devices.empty()) {
114 metal_printf(" No usable Metal devices found\n");
115 }
116 already_enumerated = true;
117
118 return usable_devices;
119}
120
121id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
122 id<MTLCommandBuffer> command_buffer,
123 NSUInteger length,
124 const void *pointer,
125 Stats &stats)
126{
127 id<MTLBuffer> buffer = nil;
128 {
129 thread_scoped_lock lock(buffer_mutex);
130 /* Find an unused buffer with matching size and storage mode. */
131 for (MetalBufferListEntry &bufferEntry : temp_buffers) {
132 if (bufferEntry.buffer.length == length && bufferEntry.command_buffer == nil) {
133 buffer = bufferEntry.buffer;
134 bufferEntry.command_buffer = command_buffer;
135 break;
136 }
137 }
138 if (!buffer) {
139 /* Create a new buffer and add it to the pool. Typically this pool will only grow to a
140 * handful of entries. */
141 buffer = [device newBufferWithLength:length options:MTLResourceStorageModeShared];
142 stats.mem_alloc(buffer.allocatedSize);
143 total_temp_mem_size += buffer.allocatedSize;
144 temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
145 }
146 }
147
148 /* Copy over data */
149 if (pointer) {
150 memcpy(buffer.contents, pointer, length);
151 }
152
153 return buffer;
154}
155
156void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
157{
158 assert(command_buffer);
159 thread_scoped_lock lock(buffer_mutex);
160 /* Mark any temp buffers associated with command_buffer as unused. */
161 for (MetalBufferListEntry &buffer_entry : temp_buffers) {
162 if (buffer_entry.command_buffer == command_buffer) {
163 buffer_entry.command_buffer = nil;
164 }
165 }
166}
167
168MetalBufferPool::~MetalBufferPool()
169{
170 thread_scoped_lock lock(buffer_mutex);
171 /* Release all buffers that have not been recently reused */
172 for (MetalBufferListEntry &buffer_entry : temp_buffers) {
173 total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
174 [buffer_entry.buffer release];
175 buffer_entry.buffer = nil;
176 }
177 temp_buffers.clear();
178}
179
181
182#endif /* WITH_METAL */
volatile int lock
void mem_alloc(const size_t size)
Definition util/stats.h:18
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
#define str(s)
#define assert(assertion)
#define in
float length(VecOp< float, D >) RET
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:28