34bool HIPDevice::have_precompiled_kernels()
36 string fatbins_path =
path_get(
"lib");
45void HIPDevice::set_error(
const string &
error)
50 fprintf(stderr,
"\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
52 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
58 :
GPUDevice(info, stats, profiler, headless)
61 static_assert(
sizeof(texMemObject) ==
sizeof(hipTextureObject_t));
62 static_assert(
sizeof(arrayMemObject) ==
sizeof(hArray));
72 need_texture_info =
false;
77 hipError_t
result = hipInit(0);
78 if (
result != hipSuccess) {
79 set_error(
string_printf(
"Failed to initialize HIP runtime (%s)", hipewErrorString(
result)));
84 result = hipDeviceGet(&hipDevice, hipDevId);
85 if (
result != hipSuccess) {
86 set_error(
string_printf(
"Failed to get HIP device handle from ordinal (%s)",
87 hipewErrorString(
result)));
95 hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
96 can_map_host = value != 0;
99 hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
101 unsigned int ctx_flags = hipDeviceLmemResizeToMax;
103 ctx_flags |= hipDeviceMapHost;
108 result = hipCtxCreate(&hipContext, ctx_flags, hipDevice);
110 if (
result != hipSuccess) {
116 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
117 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
118 hipDevArchitecture = major * 100 + minor * 10;
121 hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion));
124 hipCtxPopCurrent(
nullptr);
127HIPDevice::~HIPDevice()
131 hip_assert(hipModuleUnload(hipModule));
133 hip_assert(hipCtxDestroy(hipContext));
136bool HIPDevice::support_device(
const uint )
138 if (hipSupportsDevice(hipDevId)) {
142 hipDeviceProp_t props;
143 hipGetDeviceProperties(&props, hipDevId);
145 set_error(
string_printf(
"HIP backend requires AMD RDNA graphics card or up, but found %s.",
150bool HIPDevice::check_peer_access(
Device *peer_device)
152 if (peer_device ==
this) {
159 HIPDevice *
const peer_device_hip =
static_cast<HIPDevice *
>(peer_device);
162 hip_assert(hipDeviceCanAccessPeer(&can_access, hipDevice, peer_device_hip->hipDevice));
163 if (can_access == 0) {
168 hip_assert(hipDeviceGetP2PAttribute(
169 &can_access, hipDevP2PAttrHipArrayAccessSupported, hipDevice, peer_device_hip->hipDevice));
170 if (can_access == 0) {
176 const HIPContextScope scope(
this);
177 hipError_t
result = hipCtxEnablePeerAccess(peer_device_hip->hipContext, 0);
178 if (
result != hipSuccess) {
179 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
180 hipewErrorString(
result)));
185 const HIPContextScope scope(peer_device_hip);
186 hipError_t
result = hipCtxEnablePeerAccess(hipContext, 0);
187 if (
result != hipSuccess) {
188 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
189 hipewErrorString(
result)));
197bool HIPDevice::use_adaptive_compilation()
205string HIPDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
208 const string source_path =
path_get(
"source");
209 const string include_path = source_path;
215 include_path.c_str());
216 if (use_adaptive_compilation()) {
217 cflags +=
" -D__KERNEL_FEATURES__=" +
to_string(kernel_features);
220 const char *extra_cflags = getenv(
"CYCLES_HIP_EXTRA_CFLAGS");
222 cflags += string(
" ") + string(extra_cflags);
226 cflags +=
" -DWITH_NANOVDB";
229# ifdef WITH_CYCLES_DEBUG
230 cflags +=
" -DWITH_CYCLES_DEBUG";
236string HIPDevice::compile_kernel(
const uint kernel_features,
const char *name,
const char *base)
240 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
241 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
242 const std::string arch = hipDeviceArch(hipDevId);
245 if (!use_adaptive_compilation()) {
247 VLOG_INFO <<
"Testing for pre-compiled kernel " << fatbin <<
".";
249 VLOG_INFO <<
"Using precompiled kernel.";
255 string source_path =
path_get(
"source");
261 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
264 const char *
const kernel_ext =
"genco";
265 std::string
options =
"-Wno-parentheses-equality -Wno-unused-value -ffast-math";
266 if (hipNeedPreciseMath(arch)) {
268 " -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals "
269 "-fgpu-flush-denormals-to-zero -ffp-contract=off");
273 options.append(
" -save-temps");
275 if (major == 9 && minor == 0) {
279 options.append(
" --offload-arch=").append(arch);
281 const string include_path = source_path;
283 "cycles_%s_%s_%s", name, arch.c_str(), kernel_md5.c_str());
285 VLOG_INFO <<
"Testing for locally compiled kernel " << fatbin <<
".";
287 VLOG_INFO <<
"Using locally compiled kernel.";
292 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
293 if (!hipSupportsDevice(hipDevId)) {
295 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
296 "Your GPU is not supported.",
302 string_printf(
"HIP binary kernel for this graphics card compute "
303 "capability (%d.%d) not found.",
312 const char *
const hipcc = hipewCompilerPath();
313 if (hipcc ==
nullptr) {
315 "HIP hipcc compiler not found. "
316 "Install HIP toolkit in default location.");
320# ifdef WITH_HIP_SDK_5
321 int hip_major_ver = hipRuntimeVersion / 10000000;
322 if (hip_major_ver > 5) {
324 "HIP Runtime version %d does not work with kernels compiled with HIP SDK 5\n",
329 const int hipcc_hip_version = hipewCompilerVersion();
330 VLOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
339 string command =
string_printf(
"%s %s -I \"%s\" --%s \"%s\" -o \"%s\" %s",
342 include_path.c_str(),
346 common_cflags.c_str());
348 printf(
"Compiling %sHIP kernel ...\n%s\n",
349 (use_adaptive_compilation()) ?
"adaptive " :
"",
353 command =
"call " + command;
355 if (system(command.c_str()) != 0) {
357 "Failed to execute compilation command, "
358 "see console for details.");
365 "HIP kernel compilation failed, "
366 "see console for details.");
370 printf(
"Kernel compilation finished in %.2lfs.\n",
time_dt() - starttime);
375bool HIPDevice::load_kernels(
const uint kernel_features)
382 if (use_adaptive_compilation()) {
383 VLOG_INFO <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
389 if (hipContext ==
nullptr) {
394 if (!support_device(kernel_features)) {
399 const char *kernel_name =
"kernel";
400 string fatbin = compile_kernel(kernel_features, kernel_name);
401 if (fatbin.empty()) {
406 HIPContextScope scope(
this);
412 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
415 result = hipErrorFileNotFound;
418 if (
result != hipSuccess) {
420 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(
result)));
423 if (
result == hipSuccess) {
425 reserve_local_memory(kernel_features);
428 return (
result == hipSuccess);
431void HIPDevice::reserve_local_memory(
const uint kernel_features)
436 size_t total = 0, free_before = 0, free_after = 0;
439 HIPContextScope scope(
this);
440 hipMemGetInfo(&free_before, &total);
454 HIPDeviceQueue queue(
this);
461 queue.init_execution();
462 queue.enqueue(test_kernel, 1, args);
467 HIPContextScope scope(
this);
468 hipMemGetInfo(&free_after, &total);
476 const size_t keep_mb = 1024;
478 while (free_after > keep_mb * 1024 * 1024LL) {
480 hip_assert(hipMalloc(&tmp, 10 * 1024 * 1024LL));
481 hipMemGetInfo(&free_after, &total);
486void HIPDevice::get_device_memory_info(
size_t &total,
size_t &
free)
488 HIPContextScope scope(
this);
490 hipMemGetInfo(&
free, &total);
493bool HIPDevice::alloc_device(
void *&device_pointer,
const size_t size)
495 HIPContextScope scope(
this);
497 hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer,
size);
498 return mem_alloc_result == hipSuccess;
501void HIPDevice::free_device(
void *device_pointer)
503 HIPContextScope scope(
this);
505 hip_assert(hipFree((hipDeviceptr_t)device_pointer));
508bool HIPDevice::shared_alloc(
void *&shared_pointer,
const size_t size)
510 HIPContextScope scope(
this);
512 hipError_t mem_alloc_result = hipHostMalloc(
513 &shared_pointer,
size, hipHostMallocMapped | hipHostMallocWriteCombined);
515 return mem_alloc_result == hipSuccess;
518void HIPDevice::shared_free(
void *shared_pointer)
520 HIPContextScope scope(
this);
522 hipHostFree(shared_pointer);
525void *HIPDevice::shared_to_device_pointer(
const void *shared_pointer)
527 HIPContextScope scope(
this);
528 void *device_pointer =
nullptr;
530 hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, (
void *)shared_pointer, 0));
531 return device_pointer;
534void HIPDevice::copy_host_to_device(
void *device_pointer,
void *host_pointer,
const size_t size)
536 const HIPContextScope scope(
this);
538 hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer,
size));
544 assert(!
"mem_alloc not supported for textures.");
547 assert(!
"mem_alloc not supported for global memory.");
565 generic_copy_to(mem);
568 generic_copy_to(mem);
584 assert(!
"mem_move_to_host only supported for texture and global memory");
588void HIPDevice::mem_copy_from(
589 device_memory &mem,
const size_t y,
size_t w,
const size_t h,
size_t elem)
592 assert(!
"mem_copy_from not supported for textures.");
595 const size_t size = elem *
w * h;
596 const size_t offset = elem *
y *
w;
599 const HIPContextScope scope(
this);
600 hip_assert(hipMemcpyDtoH(
619 const HIPContextScope scope(
this);
645void HIPDevice::const_copy_to(
const char *name,
void *host,
const size_t size)
647 HIPContextScope scope(
this);
651 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
655# define KERNEL_DATA_ARRAY(data_type, data_name) \
656 if (strcmp(name, #data_name) == 0) { \
657 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
662# include "kernel/data_arrays.h"
663# undef KERNEL_DATA_ARRAY
670 generic_copy_to(mem);
680 generic_copy_to(mem);
683 generic_copy_to(mem);
701static hip_Memcpy2D tex_2d_copy_param(
const device_texture &mem,
const int pitch_alignment)
704 const size_t src_pitch = tex_src_pitch(mem);
705 const size_t dst_pitch =
align_up(src_pitch, pitch_alignment);
708 memset(¶m, 0,
sizeof(param));
709 param.dstMemoryType = hipMemoryTypeDevice;
711 param.dstPitch = dst_pitch;
712 param.srcMemoryType = hipMemoryTypeHost;
714 param.srcPitch = src_pitch;
715 param.WidthInBytes = param.srcPitch;
723 const size_t src_pitch = tex_src_pitch(mem);
726 memset(¶m, 0,
sizeof(HIP_MEMCPY3D));
727 param.dstMemoryType = hipMemoryTypeArray;
729 param.srcMemoryType = hipMemoryTypeHost;
731 param.srcPitch = src_pitch;
732 param.WidthInBytes = param.srcPitch;
740 HIPContextScope scope(
this);
742 hipTextureAddressMode address_mode = hipAddressModeWrap;
745 address_mode = hipAddressModeWrap;
748 address_mode = hipAddressModeClamp;
751 address_mode = hipAddressModeBorder;
754 address_mode = hipAddressModeMirror;
761 hipTextureFilterMode filter_mode;
763 filter_mode = hipFilterModePoint;
766 filter_mode = hipFilterModeLinear;
773 format = HIP_AD_FORMAT_UNSIGNED_INT8;
776 format = HIP_AD_FORMAT_UNSIGNED_INT16;
779 format = HIP_AD_FORMAT_UNSIGNED_INT32;
782 format = HIP_AD_FORMAT_SIGNED_INT32;
785 format = HIP_AD_FORMAT_FLOAT;
788 format = HIP_AD_FORMAT_HALF;
796 hArray array_3d =
nullptr;
800 cmem = &device_mem_map[&mem];
805 cmem->array =
reinterpret_cast<arrayMemObject
>(array_3d);
810 HIP_ARRAY3D_DESCRIPTOR desc;
823 hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
833 const HIP_MEMCPY3D param = tex_3d_copy_param(mem);
834 hip_assert(hipDrvMemcpy3D(¶m));
837 cmem = &device_mem_map[&mem];
839 cmem->array =
reinterpret_cast<arrayMemObject
>(array_3d);
843 const size_t dst_pitch =
align_up(tex_src_pitch(mem), pitch_alignment);
844 const size_t dst_size = dst_pitch * mem.
data_height;
846 cmem = generic_alloc(mem, dst_size - mem.
memory_size());
851 const hip_Memcpy2D param = tex_2d_copy_param(mem, pitch_alignment);
852 hip_assert(hipDrvMemcpy2DUnaligned(¶m));
856 cmem = generic_alloc(mem);
873 hipResourceDesc resDesc;
874 memset(&resDesc, 0,
sizeof(resDesc));
877 resDesc.resType = hipResourceTypeArray;
878 resDesc.res.array.h_Array = array_3d;
882 const size_t dst_pitch =
align_up(tex_src_pitch(mem), pitch_alignment);
884 resDesc.resType = hipResourceTypePitch2D;
886 resDesc.res.pitch2D.format =
format;
890 resDesc.res.pitch2D.pitchInBytes = dst_pitch;
893 resDesc.resType = hipResourceTypeLinear;
895 resDesc.res.linear.format =
format;
900 hipTextureDesc texDesc;
901 memset(&texDesc, 0,
sizeof(texDesc));
902 texDesc.addressMode[0] = address_mode;
903 texDesc.addressMode[1] = address_mode;
904 texDesc.addressMode[2] = address_mode;
905 texDesc.filterMode = filter_mode;
906 texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
909 cmem = &device_mem_map[&mem];
911 if (hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc,
nullptr) != hipSuccess) {
913 "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
927 if (slot >= texture_info.size()) {
929 texture_info.resize(slot + 128);
931 texture_info[slot] = tex_info;
932 need_texture_info =
true;
944 bool texture_allocated =
false;
947 texture_allocated = mem.
slot < texture_info.size() && texture_info[mem.
slot].data != 0;
949 if (!texture_allocated) {
956 HIPContextScope scope(
this);
957 const HIP_MEMCPY3D param = tex_3d_copy_param(mem);
958 hip_assert(hipDrvMemcpy3D(¶m));
961 HIPContextScope scope(
this);
962 const hip_Memcpy2D param = tex_2d_copy_param(mem, pitch_alignment);
963 hip_assert(hipDrvMemcpy2DUnaligned(¶m));
966 generic_copy_to(mem);
973 HIPContextScope scope(
this);
977 auto it = device_mem_map.find(&mem);
978 if (it == device_mem_map.end()) {
982 const Mem &cmem = it->second;
990 if (cmem.texobject) {
992 hipTexObjectDestroy(cmem.texobject);
997 device_mem_map.erase(device_mem_map.find(&mem));
999 else if (cmem.array) {
1001 hipArrayDestroy(
reinterpret_cast<hArray
>(cmem.array));
1006 device_mem_map.erase(device_mem_map.find(&mem));
1016 return make_unique<HIPDeviceQueue>(
this);
1028 HIPContextScope scope(
this);
1030 switch (interop_device.
type) {
1041 int num_all_devices = 0;
1042 hip_assert(hipGetDeviceCount(&num_all_devices));
1044 if (num_all_devices == 0) {
1049 uint num_gl_devices = 0;
1050 hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
1053 for (hipDevice_t gl_device : gl_devices) {
1054 if (gl_device == hipDevice) {
1062 VLOG_INFO <<
"Graphics interop: found matching OpenGL device for HIP";
1065 VLOG_INFO <<
"Graphics interop: no matching OpenGL device for HIP";
1081int HIPDevice::get_num_multiprocessors()
1083 return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount, 0);
1086int HIPDevice::get_max_num_threads_per_multiprocessor()
1088 return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor, 0);
1091bool HIPDevice::get_device_attribute(hipDeviceAttribute_t attribute,
int *value)
1093 HIPContextScope scope(
this);
1095 return hipDeviceGetAttribute(value, attribute, hipDevice) == hipSuccess;
1098int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute,
1099 const int default_value)
1102 if (!get_device_attribute(attribute, &value)) {
1103 return default_value;
void BLI_kdtree_nd_ free(KDTree *tree)
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
virtual void set_error(const string &error)
void mem_alloc(const size_t size)
void mem_free(const size_t size)
bool is_resident(Device *sub_device) const
size_t memory_elements_size(const int elements)
bool is_shared(Device *sub_device) const
device_ptr device_pointer
static constexpr size_t datatype_size(DataType datatype)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
DebugFlags & DebugFlags()
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
#define assert(assertion)
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
string util_md5_string(const string &str)
static void error(const char *str)
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()
ccl_device_inline size_t align_up(const size_t offset, const size_t alignment)
@ IMAGE_DATA_TYPE_NANOVDB_FP16
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
@ IMAGE_DATA_TYPE_NANOVDB_FPN