33bool HIPDevice::have_precompiled_kernels()
35 string fatbins_path =
path_get(
"lib");
44void HIPDevice::set_error(
const string &
error)
49 fprintf(stderr,
"\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
51 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
57 :
GPUDevice(info, stats, profiler, headless)
60 static_assert(
sizeof(texMemObject) ==
sizeof(hipTextureObject_t));
61 static_assert(
sizeof(arrayMemObject) ==
sizeof(hArray));
71 need_texture_info =
false;
76 hipError_t
result = hipInit(0);
77 if (
result != hipSuccess) {
78 set_error(
string_printf(
"Failed to initialize HIP runtime (%s)", hipewErrorString(
result)));
83 result = hipDeviceGet(&hipDevice, hipDevId);
84 if (
result != hipSuccess) {
85 set_error(
string_printf(
"Failed to get HIP device handle from ordinal (%s)",
86 hipewErrorString(
result)));
94 hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
95 can_map_host = value != 0;
98 hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
100 unsigned int ctx_flags = hipDeviceLmemResizeToMax;
102 ctx_flags |= hipDeviceMapHost;
107 result = hipCtxCreate(&hipContext, ctx_flags, hipDevice);
109 if (
result != hipSuccess) {
115 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
116 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
117 hipDevArchitecture = major * 100 + minor * 10;
120 hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion));
123 hipCtxPopCurrent(
NULL);
126HIPDevice::~HIPDevice()
130 hip_assert(hipModuleUnload(hipModule));
132 hip_assert(hipCtxDestroy(hipContext));
135bool HIPDevice::support_device(
const uint )
137 if (hipSupportsDevice(hipDevId)) {
142 hipDeviceProp_t props;
143 hipGetDeviceProperties(&props, hipDevId);
145 set_error(
string_printf(
"HIP backend requires AMD RDNA graphics card or up, but found %s.",
151bool HIPDevice::check_peer_access(
Device *peer_device)
153 if (peer_device ==
this) {
160 HIPDevice *
const peer_device_hip =
static_cast<HIPDevice *
>(peer_device);
163 hip_assert(hipDeviceCanAccessPeer(&can_access, hipDevice, peer_device_hip->hipDevice));
164 if (can_access == 0) {
169 hip_assert(hipDeviceGetP2PAttribute(
170 &can_access, hipDevP2PAttrHipArrayAccessSupported, hipDevice, peer_device_hip->hipDevice));
171 if (can_access == 0) {
177 const HIPContextScope scope(
this);
178 hipError_t
result = hipCtxEnablePeerAccess(peer_device_hip->hipContext, 0);
179 if (
result != hipSuccess) {
180 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
181 hipewErrorString(
result)));
186 const HIPContextScope scope(peer_device_hip);
187 hipError_t
result = hipCtxEnablePeerAccess(hipContext, 0);
188 if (
result != hipSuccess) {
189 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
190 hipewErrorString(
result)));
198bool HIPDevice::use_adaptive_compilation()
206string HIPDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
209 const string source_path =
path_get(
"source");
210 const string include_path = source_path;
217 include_path.c_str());
218 if (use_adaptive_compilation()) {
219 cflags +=
" -D__KERNEL_FEATURES__=" +
to_string(kernel_features);
224string HIPDevice::compile_kernel(
const uint kernel_features,
const char *name,
const char *base)
228 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
229 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
230 const std::string arch = hipDeviceArch(hipDevId);
233 if (!use_adaptive_compilation()) {
235 VLOG_INFO <<
"Testing for pre-compiled kernel " << fatbin <<
".";
237 VLOG_INFO <<
"Using precompiled kernel.";
243 string source_path =
path_get(
"source");
249 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
252 const char *
const kernel_ext =
"genco";
255 options.append(
"Wno-parentheses-equality -Wno-unused-value -ffast-math");
257 options.append(
"Wno-parentheses-equality -Wno-unused-value -O3 -ffast-math");
260 options.append(
" -save-temps");
262 options.append(
" --offload-arch=").append(arch.c_str());
264 const string include_path = source_path;
266 "cycles_%s_%s_%s", name, arch.c_str(), kernel_md5.c_str());
268 VLOG_INFO <<
"Testing for locally compiled kernel " << fatbin <<
".";
270 VLOG_INFO <<
"Using locally compiled kernel.";
275 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
276 if (!hipSupportsDevice(hipDevId)) {
278 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
279 "Your GPU is not supported.",
285 string_printf(
"HIP binary kernel for this graphics card compute "
286 "capability (%d.%d) not found.",
295 const char *
const hipcc = hipewCompilerPath();
298 "HIP hipcc compiler not found. "
299 "Install HIP toolkit in default location.");
303 const int hipcc_hip_version = hipewCompilerVersion();
304 VLOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
305 if (hipcc_hip_version < 40) {
307 "Unsupported HIP version %d.%d detected, "
308 "you need HIP 4.0 or newer.\n",
309 hipcc_hip_version / 10,
310 hipcc_hip_version % 10);
321 string command =
string_printf(
"%s -%s -I %s --%s %s -o \"%s\"",
324 include_path.c_str(),
329 printf(
"Compiling %sHIP kernel ...\n%s\n",
330 (use_adaptive_compilation()) ?
"adaptive " :
"",
334 command =
"call " + command;
336 if (system(command.c_str()) != 0) {
338 "Failed to execute compilation command, "
339 "see console for details.");
346 "HIP kernel compilation failed, "
347 "see console for details.");
351 printf(
"Kernel compilation finished in %.2lfs.\n",
time_dt() - starttime);
356bool HIPDevice::load_kernels(
const uint kernel_features)
363 if (use_adaptive_compilation()) {
364 VLOG_INFO <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
374 if (!support_device(kernel_features)) {
379 const char *kernel_name =
"kernel";
380 string fatbin = compile_kernel(kernel_features, kernel_name);
385 HIPContextScope scope(
this);
391 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
393 result = hipErrorFileNotFound;
397 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(
result)));
399 if (
result == hipSuccess) {
401 reserve_local_memory(kernel_features);
404 return (
result == hipSuccess);
407void HIPDevice::reserve_local_memory(
const uint kernel_features)
412 size_t total = 0, free_before = 0, free_after = 0;
415 HIPContextScope scope(
this);
416 hipMemGetInfo(&free_before, &total);
430 HIPDeviceQueue
queue(
this);
437 queue.init_execution();
438 queue.enqueue(test_kernel, 1, args);
443 HIPContextScope scope(
this);
444 hipMemGetInfo(&free_after, &total);
452 const size_t keep_mb = 1024;
454 while (free_after > keep_mb * 1024 * 1024LL) {
456 hip_assert(hipMalloc(&tmp, 10 * 1024 * 1024LL));
457 hipMemGetInfo(&free_after, &total);
462void HIPDevice::get_device_memory_info(
size_t &total,
size_t &
free)
464 HIPContextScope scope(
this);
466 hipMemGetInfo(&
free, &total);
469bool HIPDevice::alloc_device(
void *&device_pointer,
size_t size)
471 HIPContextScope scope(
this);
473 hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer,
size);
474 return mem_alloc_result == hipSuccess;
477void HIPDevice::free_device(
void *device_pointer)
479 HIPContextScope scope(
this);
481 hip_assert(hipFree((hipDeviceptr_t)device_pointer));
484bool HIPDevice::alloc_host(
void *&shared_pointer,
size_t size)
486 HIPContextScope scope(
this);
488 hipError_t mem_alloc_result = hipHostMalloc(
489 &shared_pointer,
size, hipHostMallocMapped | hipHostMallocWriteCombined);
491 return mem_alloc_result == hipSuccess;
494void HIPDevice::free_host(
void *shared_pointer)
496 HIPContextScope scope(
this);
498 hipHostFree(shared_pointer);
501void HIPDevice::transform_host_pointer(
void *&device_pointer,
void *&shared_pointer)
503 HIPContextScope scope(
this);
505 hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
508void HIPDevice::copy_host_to_device(
void *device_pointer,
void *host_pointer,
size_t size)
510 const HIPContextScope scope(
this);
512 hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer,
size));
518 assert(!
"mem_alloc not supported for textures.");
521 assert(!
"mem_alloc not supported for global memory.");
542 generic_copy_to(mem);
546void HIPDevice::mem_copy_from(
device_memory &mem,
size_t y,
size_t w,
size_t h,
size_t elem)
549 assert(!
"mem_copy_from not supported for textures.");
552 const size_t size = elem *
w * h;
553 const size_t offset = elem *
y *
w;
556 const HIPContextScope scope(
this);
557 hip_assert(hipMemcpyDtoH(
579 const HIPContextScope scope(
this);
605void HIPDevice::const_copy_to(
const char *name,
void *host,
size_t size)
607 HIPContextScope scope(
this);
611 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
615# define KERNEL_DATA_ARRAY(data_type, data_name) \
616 if (strcmp(name, #data_name) == 0) { \
617 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
622# include "kernel/data_arrays.h"
623# undef KERNEL_DATA_ARRAY
630 generic_copy_to(mem);
645 HIPContextScope scope(
this);
650 hipTextureAddressMode address_mode = hipAddressModeWrap;
653 address_mode = hipAddressModeWrap;
656 address_mode = hipAddressModeClamp;
659 address_mode = hipAddressModeBorder;
662 address_mode = hipAddressModeMirror;
669 hipTextureFilterMode filter_mode;
671 filter_mode = hipFilterModePoint;
674 filter_mode = hipFilterModeLinear;
681 format = HIP_AD_FORMAT_UNSIGNED_INT8;
684 format = HIP_AD_FORMAT_UNSIGNED_INT16;
687 format = HIP_AD_FORMAT_UNSIGNED_INT32;
690 format = HIP_AD_FORMAT_SIGNED_INT32;
693 format = HIP_AD_FORMAT_FLOAT;
696 format = HIP_AD_FORMAT_HALF;
704 hArray array_3d =
NULL;
706 size_t dst_pitch = src_pitch;
710 cmem = &device_mem_map[&mem];
715 cmem->array =
reinterpret_cast<arrayMemObject
>(array_3d);
718 dst_pitch =
align_up(src_pitch, pitch_alignment);
723 HIP_ARRAY3D_DESCRIPTOR desc;
736 hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
743 memset(¶m, 0,
sizeof(HIP_MEMCPY3D));
744 param.dstMemoryType = get_memory_type(hipMemoryTypeArray);
745 param.dstArray = array_3d;
746 param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
748 param.srcPitch = src_pitch;
749 param.WidthInBytes = param.srcPitch;
753 hip_assert(hipDrvMemcpy3D(¶m));
760 cmem = &device_mem_map[&mem];
762 cmem->array =
reinterpret_cast<arrayMemObject
>(array_3d);
766 dst_pitch =
align_up(src_pitch, pitch_alignment);
769 cmem = generic_alloc(mem, dst_size - mem.
memory_size());
775 memset(¶m, 0,
sizeof(param));
776 param.dstMemoryType = get_memory_type(hipMemoryTypeDevice);
778 param.dstPitch = dst_pitch;
779 param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
781 param.srcPitch = src_pitch;
782 param.WidthInBytes = param.srcPitch;
785 hip_assert(hipDrvMemcpy2DUnaligned(¶m));
789 cmem = generic_alloc(mem);
799 if (slot >= texture_info.size()) {
802 texture_info.resize(slot + 128);
806 texture_info[slot] = mem.
info;
807 need_texture_info =
true;
815 hipResourceDesc resDesc;
816 memset(&resDesc, 0,
sizeof(resDesc));
819 resDesc.resType = hipResourceTypeArray;
820 resDesc.res.array.h_Array = array_3d;
824 resDesc.resType = hipResourceTypePitch2D;
826 resDesc.res.pitch2D.format =
format;
830 resDesc.res.pitch2D.pitchInBytes = dst_pitch;
833 resDesc.resType = hipResourceTypeLinear;
835 resDesc.res.linear.format =
format;
840 hipTextureDesc texDesc;
841 memset(&texDesc, 0,
sizeof(texDesc));
842 texDesc.addressMode[0] = address_mode;
843 texDesc.addressMode[1] = address_mode;
844 texDesc.addressMode[2] = address_mode;
845 texDesc.filterMode = filter_mode;
846 texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
849 cmem = &device_mem_map[&mem];
851 if (hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc,
NULL) != hipSuccess) {
853 "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
857 texture_info[slot].data = (
uint64_t)cmem->texobject;
867 HIPContextScope scope(
this);
869 DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
870 const Mem &cmem = device_mem_map[&mem];
872 if (cmem.texobject) {
874 hipTexObjectDestroy(cmem.texobject);
879 device_mem_map.erase(device_mem_map.find(&mem));
881 else if (cmem.array) {
883 hipArrayDestroy(
reinterpret_cast<hArray
>(cmem.array));
888 device_mem_map.erase(device_mem_map.find(&mem));
899 return make_unique<HIPDeviceQueue>(
this);
902bool HIPDevice::should_use_graphics_interop()
918 HIPContextScope scope(
this);
920 int num_all_devices = 0;
921 hip_assert(hipGetDeviceCount(&num_all_devices));
923 if (num_all_devices == 0) {
928 uint num_gl_devices = 0;
929 hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
931 for (hipDevice_t gl_device : gl_devices) {
932 if (gl_device == hipDevice) {
941int HIPDevice::get_num_multiprocessors()
943 return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount, 0);
946int HIPDevice::get_max_num_threads_per_multiprocessor()
948 return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor, 0);
951bool HIPDevice::get_device_attribute(hipDeviceAttribute_t
attribute,
int *value)
953 HIPContextScope scope(
this);
955 return hipDeviceGetAttribute(value,
attribute, hipDevice) == hipSuccess;
958int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t
attribute,
int default_value)
961 if (!get_device_attribute(
attribute, &value)) {
962 return default_value;
967hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type)
969 return get_hip_memory_type(mem_type, hipRuntimeVersion);
void BLI_kdtree_nd_ free(KDTree *tree)
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color attribute
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
virtual void set_error(const string &error)
void mem_free(size_t size)
void mem_alloc(size_t size)
bool is_resident(Device *sub_device) const
size_t memory_elements_size(int elements)
device_ptr device_pointer
static constexpr size_t datatype_size(DataType datatype)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
DebugFlags & DebugFlags()
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
#define DCHECK(expression)
string util_md5_string(const string &str)
static void error(const char *str)
ThreadQueue * queue
all scheduled work for the cpu
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
unsigned __int64 uint64_t
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()
@ IMAGE_DATA_TYPE_NANOVDB_FP16
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
@ IMAGE_DATA_TYPE_NANOVDB_FPN
ccl_device_inline size_t align_up(size_t offset, size_t alignment)