Blender V4.5
device/device.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include <cstdlib>
6#include <cstring>
7
8#include "bvh/bvh2.h"
9
10#include "device/device.h"
11#include "device/queue.h"
12
13#include "device/cpu/device.h"
14#include "device/cpu/kernel.h"
15#include "device/cuda/device.h"
16#include "device/dummy/device.h"
17#include "device/hip/device.h"
18#include "device/metal/device.h"
19#include "device/multi/device.h"
21#include "device/optix/device.h"
22
23#ifdef WITH_HIPRT
24# include <hiprtew.h>
25#endif
26
27#include "util/log.h"
28#include "util/math.h"
29#include "util/string.h"
30#include "util/system.h"
31#include "util/task.h"
32#include "util/types.h"
33#include "util/vector.h"
34
36
37bool Device::need_types_update = true;
38bool Device::need_devices_update = true;
39thread_mutex Device::device_mutex;
40vector<DeviceInfo> Device::cuda_devices;
41vector<DeviceInfo> Device::optix_devices;
42vector<DeviceInfo> Device::cpu_devices;
43vector<DeviceInfo> Device::hip_devices;
44vector<DeviceInfo> Device::metal_devices;
45vector<DeviceInfo> Device::oneapi_devices;
46uint Device::devices_initialized_mask = 0;
47
48/* Device */
49
50Device::~Device() noexcept(false) = default;
51
53{
54 assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
55
56 BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
57 if (refit) {
58 bvh2->refit(progress);
59 }
60 else {
61 bvh2->build(progress, &stats);
62 }
63}
64
66 Stats &stats,
68 bool headless)
69{
70 if (!info.multi_devices.empty()) {
71 /* Always create a multi device when info contains multiple devices.
72 * This is done so that the type can still be e.g. DEVICE_CPU to indicate
73 * that it is a homogeneous collection of devices, which simplifies checks. */
75 }
76
77 unique_ptr<Device> device;
78
79 switch (info.type) {
80 case DEVICE_CPU:
82 break;
83#ifdef WITH_CUDA
84 case DEVICE_CUDA:
85 if (device_cuda_init()) {
87 }
88 break;
89#endif
90#ifdef WITH_OPTIX
91 case DEVICE_OPTIX:
92 if (device_optix_init()) {
94 }
95 break;
96#endif
97
98#ifdef WITH_HIP
99 case DEVICE_HIP:
100 if (device_hip_init()) {
102 }
103 break;
104#endif
105
106#ifdef WITH_METAL
107 case DEVICE_METAL:
108 if (device_metal_init()) {
110 }
111 break;
112#endif
113
114#ifdef WITH_ONEAPI
115 case DEVICE_ONEAPI:
117 break;
118#endif
119
120 default:
121 break;
122 }
123
124 if (device == nullptr) {
126 }
127
128 return device;
129}
130
132{
133 if (strcmp(name, "CPU") == 0) {
134 return DEVICE_CPU;
135 }
136 if (strcmp(name, "CUDA") == 0) {
137 return DEVICE_CUDA;
138 }
139 if (strcmp(name, "OPTIX") == 0) {
140 return DEVICE_OPTIX;
141 }
142 if (strcmp(name, "MULTI") == 0) {
143 return DEVICE_MULTI;
144 }
145 if (strcmp(name, "HIP") == 0) {
146 return DEVICE_HIP;
147 }
148 if (strcmp(name, "METAL") == 0) {
149 return DEVICE_METAL;
150 }
151 if (strcmp(name, "ONEAPI") == 0) {
152 return DEVICE_ONEAPI;
153 }
154 if (strcmp(name, "HIPRT") == 0) {
155 return DEVICE_HIPRT;
156 }
157
158 return DEVICE_NONE;
159}
160
162{
163 if (type == DEVICE_CPU) {
164 return "CPU";
165 }
166 if (type == DEVICE_CUDA) {
167 return "CUDA";
168 }
169 if (type == DEVICE_OPTIX) {
170 return "OPTIX";
171 }
172 if (type == DEVICE_MULTI) {
173 return "MULTI";
174 }
175 if (type == DEVICE_HIP) {
176 return "HIP";
177 }
178 if (type == DEVICE_METAL) {
179 return "METAL";
180 }
181 if (type == DEVICE_ONEAPI) {
182 return "ONEAPI";
183 }
184 if (type == DEVICE_HIPRT) {
185 return "HIPRT";
186 }
187
188 return "";
189}
190
192{
194 types.push_back(DEVICE_CPU);
195#ifdef WITH_CUDA
196 types.push_back(DEVICE_CUDA);
197#endif
198#ifdef WITH_OPTIX
199 types.push_back(DEVICE_OPTIX);
200#endif
201#ifdef WITH_HIP
202 types.push_back(DEVICE_HIP);
203#endif
204#ifdef WITH_METAL
205 types.push_back(DEVICE_METAL);
206#endif
207#ifdef WITH_ONEAPI
208 types.push_back(DEVICE_ONEAPI);
209#endif
210#ifdef WITH_HIPRT
211 if (hiprtewInit()) {
212 types.push_back(DEVICE_HIPRT);
213 }
214#endif
215 return types;
216}
217
219{
220 /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
221 * be broken and cause crashes when only trying to get device info, so
222 * we don't want to do any initialization until the user chooses to. */
223 const thread_scoped_lock lock(device_mutex);
224 vector<DeviceInfo> devices;
225
226#if defined(WITH_CUDA) || defined(WITH_OPTIX)
228 if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
229 if (device_cuda_init()) {
230 device_cuda_info(cuda_devices);
231 }
232 devices_initialized_mask |= DEVICE_MASK_CUDA;
233 }
234 if (mask & DEVICE_MASK_CUDA) {
235 for (DeviceInfo &info : cuda_devices) {
236 devices.push_back(info);
237 }
238 }
239 }
240#endif
241
242#ifdef WITH_OPTIX
243 if (mask & DEVICE_MASK_OPTIX) {
244 if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
245 if (device_optix_init()) {
246 device_optix_info(cuda_devices, optix_devices);
247 }
248 devices_initialized_mask |= DEVICE_MASK_OPTIX;
249 }
250 for (DeviceInfo &info : optix_devices) {
251 devices.push_back(info);
252 }
253 }
254#endif
255
256#ifdef WITH_HIP
257 if (mask & DEVICE_MASK_HIP) {
258 if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
259 if (device_hip_init()) {
260 device_hip_info(hip_devices);
261 }
262 devices_initialized_mask |= DEVICE_MASK_HIP;
263 }
264 for (DeviceInfo &info : hip_devices) {
265 devices.push_back(info);
266 }
267 }
268#endif
269
270#ifdef WITH_ONEAPI
271 if (mask & DEVICE_MASK_ONEAPI) {
272 if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
273 if (device_oneapi_init()) {
274 device_oneapi_info(oneapi_devices);
275 }
276 devices_initialized_mask |= DEVICE_MASK_ONEAPI;
277 }
278 for (DeviceInfo &info : oneapi_devices) {
279 devices.push_back(info);
280 }
281 }
282#endif
283
284 if (mask & DEVICE_MASK_CPU) {
285 if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
286 device_cpu_info(cpu_devices);
287 devices_initialized_mask |= DEVICE_MASK_CPU;
288 }
289 for (const DeviceInfo &info : cpu_devices) {
290 devices.push_back(info);
291 }
292 }
293
294#ifdef WITH_METAL
295 if (mask & DEVICE_MASK_METAL) {
296 if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
297 if (device_metal_init()) {
298 device_metal_info(metal_devices);
299 }
300 devices_initialized_mask |= DEVICE_MASK_METAL;
301 }
302 for (const DeviceInfo &info : metal_devices) {
303 devices.push_back(info);
304 }
305 }
306#endif
307
308 return devices;
309}
310
312{
314 info.type = DEVICE_DUMMY;
315 info.error_msg = error_msg;
316 return info;
317}
318
320{
321 const thread_scoped_lock lock(device_mutex);
322 string capabilities;
323
324 if (mask & DEVICE_MASK_CPU) {
325 capabilities += "\nCPU device capabilities: ";
326 capabilities += device_cpu_capabilities() + "\n";
327 }
328
329#ifdef WITH_CUDA
330 if (mask & DEVICE_MASK_CUDA) {
331 if (device_cuda_init()) {
333 if (!device_capabilities.empty()) {
334 capabilities += "\nCUDA device capabilities:\n";
335 capabilities += device_capabilities;
336 }
337 }
338 }
339#endif
340
341#ifdef WITH_HIP
342 if (mask & DEVICE_MASK_HIP) {
343 if (device_hip_init()) {
345 if (!device_capabilities.empty()) {
346 capabilities += "\nHIP device capabilities:\n";
347 capabilities += device_capabilities;
348 }
349 }
350 }
351#endif
352
353#ifdef WITH_ONEAPI
354 if (mask & DEVICE_MASK_ONEAPI) {
355 if (device_oneapi_init()) {
357 if (!device_capabilities.empty()) {
358 capabilities += "\noneAPI device capabilities:\n";
359 capabilities += device_capabilities;
360 }
361 }
362 }
363#endif
364
365#ifdef WITH_METAL
366 if (mask & DEVICE_MASK_METAL) {
367 if (device_metal_init()) {
369 if (!device_capabilities.empty()) {
370 capabilities += "\nMetal device capabilities:\n";
371 capabilities += device_capabilities;
372 }
373 }
374 }
375#endif
376
377 return capabilities;
378}
379
381 const int threads,
382 bool background)
383{
384 assert(!subdevices.empty());
385
386 if (subdevices.size() == 1) {
387 /* No multi device needed. */
388 return subdevices.front();
389 }
390
392 info.type = DEVICE_NONE;
393 info.id = "MULTI";
394 info.description = "Multi Device";
395 info.num = 0;
396
397 info.has_nanovdb = true;
398 info.has_mnee = true;
399 info.has_osl = true;
400 info.has_guiding = true;
401 info.has_profiling = true;
402 info.has_peer_memory = false;
403 info.use_hardware_raytracing = false;
404 info.denoisers = DENOISER_ALL;
405
406 for (const DeviceInfo &device : subdevices) {
407 /* Ensure CPU device does not slow down GPU. */
408 if (device.type == DEVICE_CPU && subdevices.size() > 1) {
409 if (background) {
410 const int orig_cpu_threads = (threads) ? threads : TaskScheduler::max_concurrency();
411 const int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), size_t(0));
412
413 VLOG_INFO << "CPU render threads reduced from " << orig_cpu_threads << " to "
414 << cpu_threads << ", to dedicate to GPU.";
415
416 if (cpu_threads >= 1) {
417 DeviceInfo cpu_device = device;
418 cpu_device.cpu_threads = cpu_threads;
419 info.multi_devices.push_back(cpu_device);
420 }
421 else {
422 continue;
423 }
424 }
425 else {
426 VLOG_INFO << "CPU render threads disabled for interactive render.";
427 continue;
428 }
429 }
430 else {
431 info.multi_devices.push_back(device);
432 }
433
434 /* Create unique ID for this combination of devices. */
435 info.id += device.id;
436
437 /* Set device type to MULTI if subdevices are not of a common type. */
438 if (info.type == DEVICE_NONE) {
439 info.type = device.type;
440 }
441 else if (device.type != info.type) {
442 info.type = DEVICE_MULTI;
443 }
444
445 /* Accumulate device info. */
446 info.has_nanovdb &= device.has_nanovdb;
447 info.has_mnee &= device.has_mnee;
448 info.has_osl &= device.has_osl;
449 info.has_guiding &= device.has_guiding;
450 info.has_profiling &= device.has_profiling;
451 info.has_peer_memory |= device.has_peer_memory;
452 info.use_hardware_raytracing |= device.use_hardware_raytracing;
453 info.denoisers &= device.denoisers;
454 }
455
456 return info;
457}
458
460{
461 free_memory();
462}
463
465{
466 devices_initialized_mask = 0;
467 cuda_devices.free_memory();
468 optix_devices.free_memory();
469 hip_devices.free_memory();
470 oneapi_devices.free_memory();
471 cpu_devices.free_memory();
472 metal_devices.free_memory();
473}
474
476{
477 LOG(FATAL) << "Device does not support queues.";
478 return nullptr;
479}
480
482{
483 /* Initialize CPU kernels once and reuse. */
484 static const CPUKernels kernels;
485 return kernels;
486}
487
489 vector<ThreadKernelGlobalsCPU> & /*kernel_thread_globals*/)
490{
491 LOG(FATAL) << "Device does not support CPU kernels.";
492}
493
495{
496 return nullptr;
497}
498
499void *Device::host_alloc(const MemoryType /*type*/, const size_t size)
500{
502}
503
504void Device::host_free(const MemoryType /*type*/, void *host_pointer, const size_t size)
505{
506 util_aligned_free(host_pointer, size);
507}
508
509GPUDevice::~GPUDevice() noexcept(false) = default;
510
512{
513 /* Note texture_info is never host mapped, and load_texture_info() should only
514 * be called right before kernel enqueue when all memory operations have completed. */
515 if (need_texture_info) {
516 texture_info.copy_to_device();
517 need_texture_info = false;
518 return true;
519 }
520 return false;
521}
522
523void GPUDevice::init_host_memory(const size_t preferred_texture_headroom,
524 const size_t preferred_working_headroom)
525{
526 /* Limit amount of host mapped memory, because allocating too much can
527 * cause system instability. Leave at least half or 4 GB of system
528 * memory free, whichever is smaller. */
529 const size_t default_limit = 4 * 1024 * 1024 * 1024LL;
530 const size_t system_ram = system_physical_ram();
531
532 if (system_ram > 0) {
533 if (system_ram / 2 > default_limit) {
534 map_host_limit = system_ram - default_limit;
535 }
536 else {
537 map_host_limit = system_ram / 2;
538 }
539 }
540 else {
541 VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
542 map_host_limit = 0;
543 }
544
545 /* Amount of device memory to keep free after texture memory
546 * and working memory allocations respectively. We set the working
547 * memory limit headroom lower than the working one so there
548 * is space left for it. */
549 device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
550 32 * 1024 * 1024LL; // 32MB
551 device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
552 128 * 1024 * 1024LL; // 128MB
553
554 VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
555 << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
556}
557
558void GPUDevice::move_textures_to_host(size_t size, const size_t headroom, const bool for_texture)
559{
560 static thread_mutex move_mutex;
561 const thread_scoped_lock lock(move_mutex);
562
563 /* Check if there is enough space. Within mutex locks so that multiple threads
564 * calling take into account memory freed by another thread. */
565 size_t total = 0;
566 size_t free = 0;
568 if (size + headroom < free) {
569 return;
570 }
571
572 while (size > 0) {
573 /* Find suitable memory allocation to move. */
574 device_memory *max_mem = nullptr;
575 size_t max_size = 0;
576 bool max_is_image = false;
577
579 for (MemMap::value_type &pair : device_mem_map) {
580 device_memory &mem = *pair.first;
581 Mem *cmem = &pair.second;
582
583 /* Can only move textures allocated on this device (and not those from peer devices).
584 * And need to ignore memory that is already on the host. */
585 if (!mem.is_resident(this) || mem.is_shared(this)) {
586 continue;
587 }
588
589 const bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
590 (&mem != &texture_info);
591 const bool is_image = is_texture && (mem.data_height > 1);
592
593 /* Can't move this type of memory. */
594 if (!is_texture || cmem->array) {
595 continue;
596 }
597
598 /* For other textures, only move image textures. */
599 if (for_texture && !is_image) {
600 continue;
601 }
602
603 /* Try to move largest allocation, prefer moving images. */
604 if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
605 max_is_image = is_image;
606 max_size = mem.device_size;
607 max_mem = &mem;
608 }
609 }
610 lock.unlock();
611
612 /* Move to host memory. This part is mutex protected since
613 * multiple backend devices could be moving the memory. The
614 * first one will do it, and the rest will adopt the pointer. */
615 if (max_mem) {
616 VLOG_WORK << "Move memory from device to host: " << max_mem->name;
617
618 /* Potentially need to call back into multi device, so pointer mapping
619 * and peer devices are updated. This is also necessary since the device
620 * pointer may just be a key here, so cannot be accessed and freed directly.
621 * Unfortunately it does mean that memory is reallocated on all other
622 * devices as well, which is potentially dangerous when still in use (since
623 * a thread rendering on another devices would only be caught in this mutex
624 * if it so happens to do an allocation at the same time as well. */
625 max_mem->move_to_host = true;
626 max_mem->device_move_to_host();
627 max_mem->move_to_host = false;
628 size = (max_size >= size) ? 0 : size - max_size;
629
630 /* Tag texture info update for new pointers. */
631 need_texture_info = true;
632 }
633 else {
634 break;
635 }
636 }
637}
638
639GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, const size_t pitch_padding)
640{
641 void *device_pointer = nullptr;
642 const size_t size = mem.memory_size() + pitch_padding;
643
644 bool mem_alloc_result = false;
645 const char *status = "";
646
647 /* First try allocating in device memory, respecting headroom. We make
648 * an exception for texture info. It is small and frequently accessed,
649 * so treat it as working memory.
650 *
651 * If there is not enough room for working memory, we will try to move
652 * textures to host memory, assuming the performance impact would have
653 * been worse for working memory. */
654 const bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
655 (&mem != &texture_info);
656 const bool is_image = is_texture && (mem.data_height > 1);
657
658 const size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
659
660 /* Move textures to host memory if needed. */
661 if (!mem.move_to_host && !is_image && can_map_host) {
662 move_textures_to_host(size, headroom, is_texture);
663 }
664
665 size_t total = 0;
666 size_t free = 0;
668
669 /* Allocate in device memory. */
670 if ((!mem.move_to_host && (size + headroom) < free) || (mem.type == MEM_DEVICE_ONLY)) {
671 mem_alloc_result = alloc_device(device_pointer, size);
672 if (mem_alloc_result) {
674 status = " in device memory";
675 }
676 }
677
678 /* Fall back to mapped host memory if needed and possible. */
679
680 void *shared_pointer = nullptr;
681
682 if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
683 if (mem.shared_pointer) {
684 /* Another device already allocated host memory. */
685 mem_alloc_result = true;
686 shared_pointer = mem.shared_pointer;
687 }
688 else if (map_host_used + size < map_host_limit) {
689 /* Allocate host memory ourselves. */
690 mem_alloc_result = shared_alloc(shared_pointer, size);
691
692 assert((mem_alloc_result && shared_pointer != nullptr) ||
693 (!mem_alloc_result && shared_pointer == nullptr));
694 }
695
696 if (mem_alloc_result) {
697 device_pointer = shared_to_device_pointer(shared_pointer);
699 status = " in host memory";
700 }
701 }
702
703 if (!mem_alloc_result) {
704 if (mem.type == MEM_DEVICE_ONLY) {
705 status = " failed, out of device memory";
706 set_error("System is out of GPU memory");
707 }
708 else {
709 status = " failed, out of device and host memory";
710 set_error("System is out of GPU and shared host memory");
711 }
712 }
713
714 if (mem.name) {
715 VLOG_WORK << "Buffer allocate: " << mem.name << ", "
716 << string_human_readable_number(mem.memory_size()) << " bytes. ("
717 << string_human_readable_size(mem.memory_size()) << ")" << status;
718 }
719
720 mem.device_pointer = (device_ptr)device_pointer;
721 mem.device_size = size;
722 stats.mem_alloc(size);
723
724 if (!mem.device_pointer) {
725 return nullptr;
726 }
727
728 /* Insert into map of allocations. */
730 Mem *cmem = &device_mem_map[&mem];
731 if (shared_pointer != nullptr) {
732 /* Replace host pointer with our host allocation. Only works if
733 * memory layout is the same and has no pitch padding. Also
734 * does not work if we move textures to host during a render,
735 * since other devices might be using the memory. */
736
737 if (!mem.move_to_host && pitch_padding == 0 && mem.host_pointer &&
738 mem.host_pointer != shared_pointer)
739 {
740 memcpy(shared_pointer, mem.host_pointer, size);
741 host_free(mem.type, mem.host_pointer, mem.memory_size());
742 mem.host_pointer = shared_pointer;
743 }
744 mem.shared_pointer = shared_pointer;
745 mem.shared_counter++;
746 }
747
748 return cmem;
749}
750
752{
753 if (!(mem.device_pointer && mem.is_resident(this))) {
754 return;
755 }
756
757 /* Host pointer should already have been freed at this point. If not we might
758 * end up freeing shared memory and can't recover original host memory. */
759 assert(mem.host_pointer == nullptr || mem.move_to_host);
760
762 DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
763
764 /* For host mapped memory, reference counting is used to safely free it. */
765 if (mem.is_shared(this)) {
766 assert(mem.shared_counter > 0);
767 if (--mem.shared_counter == 0) {
768 if (mem.host_pointer == mem.shared_pointer) {
769 /* Safely move the device-side data back to the host before it is freed.
770 * We should actually never reach this code as it is inefficient, but
771 * better than to crash if there is a bug. */
772 assert(!"GPU device should not copy memory back to host");
773 const size_t size = mem.memory_size();
774 mem.host_pointer = mem.host_alloc(size);
775 memcpy(mem.host_pointer, mem.shared_pointer, size);
776 }
778 mem.shared_pointer = nullptr;
779 }
781 }
782 else {
783 /* Free device memory. */
784 free_device((void *)mem.device_pointer);
786 }
787
788 stats.mem_free(mem.device_size);
789 mem.device_pointer = 0;
790 mem.device_size = 0;
791
792 device_mem_map.erase(device_mem_map.find(&mem));
793}
794
796{
797 if (!mem.host_pointer || !mem.device_pointer) {
798 return;
799 }
800
801 /* If not host mapped, the current device only uses device memory allocated by backend
802 * device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
803 * copy data from mem.host_pointer. */
804 if (!(mem.is_shared(this) && mem.host_pointer == mem.shared_pointer)) {
806 }
807}
808
809bool GPUDevice::is_shared(const void *shared_pointer,
810 const device_ptr device_pointer,
811 Device * /*sub_device*/)
812{
813 return (shared_pointer && device_pointer &&
814 (device_ptr)shared_to_device_pointer(shared_pointer) == device_pointer);
815}
816
817/* DeviceInfo */
818
820{
821 if (this->type == type) {
822 return true;
823 }
824 for (const DeviceInfo &info : multi_devices) {
825 if (info.contains_device_type(type)) {
826 return true;
827 }
828 }
829 return false;
830}
831
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
float progress
Definition WM_types.hh:1019
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(const size_t size, const int alignment)
void util_aligned_free(void *ptr, const size_t size)
volatile int lock
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Definition bvh2.h:38
void refit(Progress &progress)
Definition bvh2.cpp:78
void build(Progress &progress, Stats *stats)
Definition bvh2.cpp:39
Definition bvh/bvh.h:67
vector< DeviceInfo > multi_devices
bool contains_device_type(const DeviceType type) const
DeviceInfo()=default
DeviceType type
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
static void free_memory()
static DeviceInfo dummy_device(const string &error_msg="")
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, const int threads, bool background)
static void tag_update()
static const CPUKernels & get_cpu_kernels()
string error_msg
virtual ~Device() noexcept(false)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
static DeviceType type_from_string(const char *name)
bool headless
virtual void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &)
virtual void set_error(const string &error)
DeviceInfo info
static string device_capabilities(const uint device_type_mask=DEVICE_MASK_ALL)
static vector< DeviceType > available_types()
static string string_from_type(DeviceType type)
virtual OSLGlobals * get_cpu_osl_memory()
static vector< DeviceInfo > available_devices(const uint device_type_mask=DEVICE_MASK_ALL)
static unique_ptr< Device > create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void * host_alloc(const MemoryType type, const size_t size)
static int max_concurrency()
Definition task.cpp:96
bool is_resident(Device *sub_device) const
Definition memory.cpp:134
void * host_alloc(const size_t size)
Definition memory.cpp:42
bool is_shared(Device *sub_device) const
Definition memory.cpp:139
void device_move_to_host()
Definition memory.cpp:89
@ MEM_TEXTURE
@ MEM_DEVICE_ONLY
#define MIN_ALIGNMENT_CPU_DATA_TYPES
@ DENOISER_ALL
Definition denoise.h:17
void device_cpu_info(vector< DeviceInfo > &devices)
string device_cpu_capabilities()
CCL_NAMESPACE_BEGIN unique_ptr< Device > device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
unique_ptr< Device > device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
@ DEVICE_MASK_OPTIX
@ DEVICE_MASK_CPU
@ DEVICE_MASK_HIP
@ DEVICE_MASK_CUDA
@ DEVICE_MASK_METAL
@ DEVICE_MASK_ONEAPI
DeviceType
@ DEVICE_DUMMY
@ DEVICE_NONE
@ DEVICE_METAL
@ DEVICE_MULTI
@ DEVICE_CUDA
@ DEVICE_CPU
@ DEVICE_HIPRT
@ DEVICE_OPTIX
@ DEVICE_HIP
@ DEVICE_ONEAPI
unique_ptr< Device > device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_hip_info(vector< DeviceInfo > &devices)
string device_hip_capabilities()
unique_ptr< Device > device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, const bool headless)
CCL_NAMESPACE_BEGIN bool device_hip_init()
string device_metal_capabilities()
Definition device.mm:148
unique_ptr< Device > device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
bool device_metal_init()
Definition device.mm:141
void device_metal_info(vector< DeviceInfo > &devices)
Definition device.mm:146
unique_ptr< Device > device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
unique_ptr< Device > device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_oneapi_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN bool device_oneapi_init()
string device_oneapi_capabilities()
CCL_NAMESPACE_BEGIN bool device_optix_init()
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
unique_ptr< Device > device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define assert(assertion)
@ BVH_LAYOUT_BVH2
#define VLOG_INFO
Definition log.h:71
#define VLOG_WARNING
Definition log.h:69
#define DCHECK(expression)
Definition log.h:50
#define VLOG_WORK
Definition log.h:74
#define LOG(severity)
Definition log.h:32
static char ** types
Definition makesdna.cc:71
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
string string_human_readable_size(size_t size)
Definition string.cpp:257
string string_human_readable_number(size_t num)
Definition string.cpp:276
arrayMemObject array
bool is_shared(const void *shared_pointer, const device_ptr device_pointer, Device *sub_device) override
bool need_texture_info
virtual bool shared_alloc(void *&shared_pointer, const size_t size)=0
size_t map_host_used
virtual void shared_free(void *shared_pointer)=0
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
bool can_map_host
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
virtual bool alloc_device(void *&device_pointer, const size_t size)=0
size_t device_working_headroom
friend class device_memory
virtual void * shared_to_device_pointer(const void *shared_pointer)=0
virtual void generic_copy_to(device_memory &mem)
virtual void move_textures_to_host(const size_t size, const size_t headroom, const bool for_texture)
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size)=0
virtual bool load_texture_info()
size_t map_host_limit
virtual void free_device(void *device_pointer)=0
thread_mutex device_mem_map_mutex
virtual void generic_free(device_memory &mem)
virtual void init_host_memory(const size_t preferred_texture_headroom=0, const size_t preferred_working_headroom=0)
size_t device_mem_in_use
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, const size_t pitch_padding=0)
device_vector< TextureInfo > texture_info
MemMap device_mem_map
~GPUDevice() noexcept(false) override
size_t system_physical_ram()
Definition system.cpp:227
max
Definition text_draw.cc:251
std::mutex thread_mutex
Definition thread.h:27
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:28
uint64_t device_ptr
Definition types_base.h:44