14#include "device/cpu/kernel.h"
37bool Device::need_types_update =
true;
38bool Device::need_devices_update =
true;
46uint Device::devices_initialized_mask = 0;
56 BVH2 *
const bvh2 =
static_cast<BVH2 *
>(bvh);
70 if (!
info.multi_devices.empty()) {
124 if (device ==
nullptr) {
133 if (strcmp(name,
"CPU") == 0) {
136 if (strcmp(name,
"CUDA") == 0) {
139 if (strcmp(name,
"OPTIX") == 0) {
142 if (strcmp(name,
"MULTI") == 0) {
145 if (strcmp(name,
"HIP") == 0) {
148 if (strcmp(name,
"METAL") == 0) {
151 if (strcmp(name,
"ONEAPI") == 0) {
154 if (strcmp(name,
"HIPRT") == 0) {
226#if defined(WITH_CUDA) || defined(WITH_OPTIX)
236 devices.push_back(
info);
251 devices.push_back(
info);
265 devices.push_back(
info);
279 devices.push_back(
info);
290 devices.push_back(
info);
303 devices.push_back(
info);
325 capabilities +=
"\nCPU device capabilities: ";
334 capabilities +=
"\nCUDA device capabilities:\n";
346 capabilities +=
"\nHIP device capabilities:\n";
358 capabilities +=
"\noneAPI device capabilities:\n";
370 capabilities +=
"\nMetal device capabilities:\n";
384 assert(!subdevices.empty());
386 if (subdevices.size() == 1) {
388 return subdevices.front();
394 info.description =
"Multi Device";
397 info.has_nanovdb =
true;
398 info.has_mnee =
true;
400 info.has_guiding =
true;
401 info.has_profiling =
true;
402 info.has_peer_memory =
false;
403 info.use_hardware_raytracing =
false;
408 if (device.type ==
DEVICE_CPU && subdevices.size() > 1) {
411 const int cpu_threads =
max(orig_cpu_threads - (subdevices.size() - 1),
size_t(0));
413 VLOG_INFO <<
"CPU render threads reduced from " << orig_cpu_threads <<
" to "
414 << cpu_threads <<
", to dedicate to GPU.";
416 if (cpu_threads >= 1) {
419 info.multi_devices.push_back(cpu_device);
426 VLOG_INFO <<
"CPU render threads disabled for interactive render.";
431 info.multi_devices.push_back(device);
435 info.id += device.id;
439 info.type = device.type;
441 else if (device.type !=
info.type) {
446 info.has_nanovdb &= device.has_nanovdb;
447 info.has_mnee &= device.has_mnee;
448 info.has_osl &= device.has_osl;
449 info.has_guiding &= device.has_guiding;
450 info.has_profiling &= device.has_profiling;
451 info.has_peer_memory |= device.has_peer_memory;
452 info.use_hardware_raytracing |= device.use_hardware_raytracing;
453 info.denoisers &= device.denoisers;
466 devices_initialized_mask = 0;
467 cuda_devices.free_memory();
468 optix_devices.free_memory();
469 hip_devices.free_memory();
470 oneapi_devices.free_memory();
471 cpu_devices.free_memory();
472 metal_devices.free_memory();
477 LOG(FATAL) <<
"Device does not support queues.";
491 LOG(FATAL) <<
"Device does not support CPU kernels.";
524 const size_t preferred_working_headroom)
529 const size_t default_limit = 4 * 1024 * 1024 * 1024LL;
532 if (system_ram > 0) {
533 if (system_ram / 2 > default_limit) {
541 VLOG_WARNING <<
"Mapped host memory disabled, failed to get system RAM";
576 bool max_is_image =
false;
581 Mem *cmem = &pair.second;
591 const bool is_image = is_texture && (mem.
data_height > 1);
594 if (!is_texture || cmem->
array) {
599 if (for_texture && !is_image) {
604 if (is_image > max_is_image || (is_image == max_is_image && mem.
device_size > max_size)) {
605 max_is_image = is_image;
616 VLOG_WORK <<
"Move memory from device to host: " << max_mem->
name;
641 void *device_pointer =
nullptr;
644 bool mem_alloc_result =
false;
645 const char *status =
"";
656 const bool is_image = is_texture && (mem.
data_height > 1);
672 if (mem_alloc_result) {
674 status =
" in device memory";
680 void *shared_pointer =
nullptr;
685 mem_alloc_result =
true;
692 assert((mem_alloc_result && shared_pointer !=
nullptr) ||
693 (!mem_alloc_result && shared_pointer ==
nullptr));
696 if (mem_alloc_result) {
699 status =
" in host memory";
703 if (!mem_alloc_result) {
705 status =
" failed, out of device memory";
706 set_error(
"System is out of GPU memory");
709 status =
" failed, out of device and host memory";
710 set_error(
"System is out of GPU and shared host memory");
731 if (shared_pointer !=
nullptr) {
772 assert(!
"GPU device should not copy memory back to host");
813 return (shared_pointer && device_pointer &&
821 if (this->type ==
type) {
825 if (info.contains_device_type(
type)) {
void BLI_kdtree_nd_ free(KDTree *tree)
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(const size_t size, const int alignment)
void util_aligned_free(void *ptr, const size_t size)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
void refit(Progress &progress)
void build(Progress &progress, Stats *stats)
vector< DeviceInfo > multi_devices
bool contains_device_type(const DeviceType type) const
virtual void host_free(const MemoryType type, void *host_pointer, const size_t size)
static void free_memory()
static DeviceInfo dummy_device(const string &error_msg="")
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
static DeviceInfo get_multi_device(const vector< DeviceInfo > &subdevices, const int threads, bool background)
static const CPUKernels & get_cpu_kernels()
virtual ~Device() noexcept(false)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
static DeviceType type_from_string(const char *name)
virtual void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &)
virtual void set_error(const string &error)
static string device_capabilities(const uint device_type_mask=DEVICE_MASK_ALL)
static vector< DeviceType > available_types()
static string string_from_type(DeviceType type)
virtual OSLGlobals * get_cpu_osl_memory()
static vector< DeviceInfo > available_devices(const uint device_type_mask=DEVICE_MASK_ALL)
static unique_ptr< Device > create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
virtual void * host_alloc(const MemoryType type, const size_t size)
static int max_concurrency()
bool is_resident(Device *sub_device) const
void * host_alloc(const size_t size)
bool is_shared(Device *sub_device) const
device_ptr device_pointer
void device_move_to_host()
#define MIN_ALIGNMENT_CPU_DATA_TYPES
void device_cpu_info(vector< DeviceInfo > &devices)
string device_cpu_capabilities()
CCL_NAMESPACE_BEGIN unique_ptr< Device > device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define CCL_NAMESPACE_END
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
CCL_NAMESPACE_BEGIN bool device_cuda_init()
unique_ptr< Device > device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
unique_ptr< Device > device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_hip_info(vector< DeviceInfo > &devices)
string device_hip_capabilities()
unique_ptr< Device > device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, const bool headless)
CCL_NAMESPACE_BEGIN bool device_hip_init()
unique_ptr< Device > device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
unique_ptr< Device > device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
void device_oneapi_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN bool device_oneapi_init()
string device_oneapi_capabilities()
CCL_NAMESPACE_BEGIN bool device_optix_init()
void device_optix_info(const vector< DeviceInfo > &cuda_devices, vector< DeviceInfo > &devices)
unique_ptr< Device > device_optix_create(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
#define assert(assertion)
#define DCHECK(expression)
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
bool is_shared(const void *shared_pointer, const device_ptr device_pointer, Device *sub_device) override
virtual bool shared_alloc(void *&shared_pointer, const size_t size)=0
virtual void shared_free(void *shared_pointer)=0
GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
size_t device_texture_headroom
virtual void get_device_memory_info(size_t &total, size_t &free)=0
virtual bool alloc_device(void *&device_pointer, const size_t size)=0
size_t device_working_headroom
friend class device_memory
virtual void * shared_to_device_pointer(const void *shared_pointer)=0
virtual void generic_copy_to(device_memory &mem)
virtual void move_textures_to_host(const size_t size, const size_t headroom, const bool for_texture)
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size)=0
virtual bool load_texture_info()
virtual void free_device(void *device_pointer)=0
thread_mutex device_mem_map_mutex
virtual void generic_free(device_memory &mem)
virtual void init_host_memory(const size_t preferred_texture_headroom=0, const size_t preferred_working_headroom=0)
virtual GPUDevice::Mem * generic_alloc(device_memory &mem, const size_t pitch_padding=0)
device_vector< TextureInfo > texture_info
~GPUDevice() noexcept(false) override
size_t system_physical_ram()
std::unique_lock< std::mutex > thread_scoped_lock