Blender  V2.93
device_cuda.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef WITH_CUDA
18 
19 # include "device/cuda/device_cuda.h"
20 # include "device/device.h"
21 # include "device/device_intern.h"
22 
23 # include "util/util_logging.h"
24 # include "util/util_string.h"
25 # include "util/util_windows.h"
26 
28 
29 bool device_cuda_init()
30 {
31 # ifdef WITH_CUDA_DYNLOAD
32  static bool initialized = false;
33  static bool result = false;
34 
35  if (initialized)
36  return result;
37 
38  initialized = true;
39  int cuew_result = cuewInit(CUEW_INIT_CUDA);
40  if (cuew_result == CUEW_SUCCESS) {
41  VLOG(1) << "CUEW initialization succeeded";
42  if (CUDADevice::have_precompiled_kernels()) {
43  VLOG(1) << "Found precompiled kernels";
44  result = true;
45  }
46  else if (cuewCompilerPath() != NULL) {
47  VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
48  result = true;
49  }
50  else {
51  VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
52  << " unable to use CUDA";
53  }
54  }
55  else {
56  VLOG(1) << "CUEW initialization failed: "
57  << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
58  "Error opening the library");
59  }
60 
61  return result;
62 # else /* WITH_CUDA_DYNLOAD */
63  return true;
64 # endif /* WITH_CUDA_DYNLOAD */
65 }
66 
67 Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
68 {
69  return new CUDADevice(info, stats, profiler, background);
70 }
71 
72 static CUresult device_cuda_safe_init()
73 {
74 # ifdef _WIN32
75  __try {
76  return cuInit(0);
77  }
78  __except (EXCEPTION_EXECUTE_HANDLER) {
79  /* Ignore crashes inside the CUDA driver and hope we can
80  * survive even with corrupted CUDA installs. */
81  fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
82  }
83 
84  return CUDA_ERROR_NO_DEVICE;
85 # else
86  return cuInit(0);
87 # endif
88 }
89 
91 {
92  CUresult result = device_cuda_safe_init();
93  if (result != CUDA_SUCCESS) {
94  if (result != CUDA_ERROR_NO_DEVICE)
95  fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
96  return;
97  }
98 
99  int count = 0;
100  result = cuDeviceGetCount(&count);
101  if (result != CUDA_SUCCESS) {
102  fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
103  return;
104  }
105 
106  vector<DeviceInfo> display_devices;
107 
108  for (int num = 0; num < count; num++) {
109  char name[256];
110 
111  result = cuDeviceGetName(name, 256, num);
112  if (result != CUDA_SUCCESS) {
113  fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result));
114  continue;
115  }
116 
117  int major;
118  cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
119  if (major < 3) {
120  VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported.";
121  continue;
122  }
123 
124  DeviceInfo info;
125 
126  info.type = DEVICE_CUDA;
127  info.description = string(name);
128  info.num = num;
129 
130  info.has_half_images = (major >= 3);
131  info.has_nanovdb = true;
132  info.has_volume_decoupled = false;
133  info.has_adaptive_stop_per_sample = false;
134  info.denoisers = DENOISER_NLM;
135 
136  /* Check if the device has P2P access to any other device in the system. */
137  for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
138  if (num != peer_num) {
139  int can_access = 0;
140  cuDeviceCanAccessPeer(&can_access, num, peer_num);
141  info.has_peer_memory = (can_access != 0);
142  }
143  }
144 
145  int pci_location[3] = {0, 0, 0};
146  cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
147  cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
148  cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
149  info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
150  name,
151  (unsigned int)pci_location[0],
152  (unsigned int)pci_location[1],
153  (unsigned int)pci_location[2]);
154 
155  /* If device has a kernel timeout and no compute preemption, we assume
156  * it is connected to a display and will freeze the display while doing
157  * computations. */
158  int timeout_attr = 0, preempt_attr = 0;
159  cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
160  cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
161 
162  /* The CUDA driver reports compute preemption as not being available on
163  * Windows 10 even when it is, due to an issue in application profiles.
164  * Detect case where we expect it to be available and override. */
165  if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
166  VLOG(1) << "Assuming device has compute preemption on Windows 10.";
167  preempt_attr = 1;
168  }
169 
170  if (timeout_attr && !preempt_attr) {
171  VLOG(1) << "Device is recognized as display.";
172  info.description += " (Display)";
173  info.display_device = true;
174  display_devices.push_back(info);
175  }
176  else {
177  VLOG(1) << "Device has compute preemption or is not used for display.";
178  devices.push_back(info);
179  }
180  VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
181  }
182 
183  if (!display_devices.empty())
184  devices.insert(devices.end(), display_devices.begin(), display_devices.end());
185 }
186 
188 {
189  CUresult result = device_cuda_safe_init();
190  if (result != CUDA_SUCCESS) {
191  if (result != CUDA_ERROR_NO_DEVICE) {
192  return string("Error initializing CUDA: ") + cuewErrorString(result);
193  }
194  return "No CUDA device found\n";
195  }
196 
197  int count;
198  result = cuDeviceGetCount(&count);
199  if (result != CUDA_SUCCESS) {
200  return string("Error getting devices: ") + cuewErrorString(result);
201  }
202 
203  string capabilities = "";
204  for (int num = 0; num < count; num++) {
205  char name[256];
206  if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
207  continue;
208  }
209  capabilities += string("\t") + name + "\n";
210  int value;
211 # define GET_ATTR(attr) \
212  { \
213  if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
214  capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
215  } \
216  } \
217  (void)0
218  /* TODO(sergey): Strip all attributes which are not useful for us
219  * or does not depend on the driver.
220  */
221  GET_ATTR(MAX_THREADS_PER_BLOCK);
222  GET_ATTR(MAX_BLOCK_DIM_X);
223  GET_ATTR(MAX_BLOCK_DIM_Y);
224  GET_ATTR(MAX_BLOCK_DIM_Z);
225  GET_ATTR(MAX_GRID_DIM_X);
226  GET_ATTR(MAX_GRID_DIM_Y);
227  GET_ATTR(MAX_GRID_DIM_Z);
228  GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
229  GET_ATTR(SHARED_MEMORY_PER_BLOCK);
230  GET_ATTR(TOTAL_CONSTANT_MEMORY);
231  GET_ATTR(WARP_SIZE);
232  GET_ATTR(MAX_PITCH);
233  GET_ATTR(MAX_REGISTERS_PER_BLOCK);
234  GET_ATTR(REGISTERS_PER_BLOCK);
235  GET_ATTR(CLOCK_RATE);
236  GET_ATTR(TEXTURE_ALIGNMENT);
237  GET_ATTR(GPU_OVERLAP);
238  GET_ATTR(MULTIPROCESSOR_COUNT);
239  GET_ATTR(KERNEL_EXEC_TIMEOUT);
240  GET_ATTR(INTEGRATED);
241  GET_ATTR(CAN_MAP_HOST_MEMORY);
242  GET_ATTR(COMPUTE_MODE);
243  GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
244  GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
245  GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
246  GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
247  GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
248  GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
249  GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
250  GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
251  GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
252  GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
253  GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
254  GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
255  GET_ATTR(SURFACE_ALIGNMENT);
256  GET_ATTR(CONCURRENT_KERNELS);
257  GET_ATTR(ECC_ENABLED);
258  GET_ATTR(TCC_DRIVER);
259  GET_ATTR(MEMORY_CLOCK_RATE);
260  GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
261  GET_ATTR(L2_CACHE_SIZE);
262  GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
263  GET_ATTR(ASYNC_ENGINE_COUNT);
264  GET_ATTR(UNIFIED_ADDRESSING);
265  GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
266  GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
267  GET_ATTR(CAN_TEX2D_GATHER);
268  GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
269  GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
270  GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
271  GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
272  GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
273  GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
274  GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
275  GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
276  GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
277  GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
278  GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
279  GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
280  GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
281  GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
282  GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
283  GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
284  GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
285  GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
286  GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
287  GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
288  GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
289  GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
290  GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
291  GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
292  GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
293  GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
294  GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
295  GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
296  GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
297  GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
298  GET_ATTR(COMPUTE_CAPABILITY_MINOR);
299  GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
300  GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
301  GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
302  GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
303  GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
304  GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
305  GET_ATTR(MANAGED_MEMORY);
306  GET_ATTR(MULTI_GPU_BOARD);
307  GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
308 # undef GET_ATTR
309  capabilities += "\n";
310  }
311 
312  return capabilities;
313 }
314 
316 
317 #endif
bool has_half_images
Definition: device.h:79
int num
Definition: device.h:77
string id
Definition: device.h:76
DenoiserTypeMask denoisers
Definition: device.h:88
bool display_device
Definition: device.h:78
bool has_peer_memory
Definition: device.h:87
bool has_nanovdb
Definition: device.h:80
bool has_adaptive_stop_per_sample
Definition: device.h:83
DeviceType type
Definition: device.h:74
string description
Definition: device.h:75
bool has_volume_decoupled
Definition: device.h:81
Definition: device.h:293
@ DEVICE_CUDA
Definition: device.h:47
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
Device * device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
bool device_cuda_init()
@ DENOISER_NLM
Definition: device_task.h:36
static bool initialized
Definition: gpu_init_exit.c:41
int count
#define CCL_NAMESPACE_END
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
#define VLOG(severity)
Definition: util_logging.h:50
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: util_string.cpp:32
CCL_NAMESPACE_BEGIN bool system_windows_version_at_least(int major, int build)