Blender V4.3
hip/device_impl.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_HIP
6
7# include <climits>
8# include <limits.h>
9# include <stdio.h>
10# include <stdlib.h>
11# include <string.h>
12
14
15# include "util/debug.h"
16# include "util/foreach.h"
17# include "util/log.h"
18# include "util/map.h"
19# include "util/md5.h"
20# include "util/path.h"
21# include "util/string.h"
22# include "util/system.h"
23# include "util/time.h"
24# include "util/types.h"
25# include "util/windows.h"
26
28
30
31class HIPDevice;
32
33bool HIPDevice::have_precompiled_kernels()
34{
35 string fatbins_path = path_get("lib");
36 return path_exists(fatbins_path);
37}
38
39BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
40{
41 return BVH_LAYOUT_BVH2;
42}
43
44void HIPDevice::set_error(const string &error)
45{
47
48 if (first_error) {
49 fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
50 fprintf(stderr,
51 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
52 first_error = false;
53 }
54}
55
56HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
57 : GPUDevice(info, stats, profiler, headless)
58{
59 /* Verify that base class types can be used with specific backend types */
60 static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
61 static_assert(sizeof(arrayMemObject) == sizeof(hArray));
62
63 first_error = true;
64
65 hipDevId = info.num;
66 hipDevice = 0;
67 hipContext = 0;
68
69 hipModule = 0;
70
71 need_texture_info = false;
72
73 pitch_alignment = 0;
74
75 /* Initialize HIP. */
76 hipError_t result = hipInit(0);
77 if (result != hipSuccess) {
78 set_error(string_printf("Failed to initialize HIP runtime (%s)", hipewErrorString(result)));
79 return;
80 }
81
82 /* Setup device and context. */
83 result = hipDeviceGet(&hipDevice, hipDevId);
84 if (result != hipSuccess) {
85 set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
86 hipewErrorString(result)));
87 return;
88 }
89
90 /* hipDeviceMapHost for mapping host memory when out of device memory.
91 * hipDeviceLmemResizeToMax for reserving local memory ahead of render,
92 * so we can predict which memory to map to host. */
93 int value;
94 hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
95 can_map_host = value != 0;
96
97 hip_assert(
98 hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
99
100 unsigned int ctx_flags = hipDeviceLmemResizeToMax;
101 if (can_map_host) {
102 ctx_flags |= hipDeviceMapHost;
103 init_host_memory();
104 }
105
106 /* Create context. */
107 result = hipCtxCreate(&hipContext, ctx_flags, hipDevice);
108
109 if (result != hipSuccess) {
110 set_error(string_printf("Failed to create HIP context (%s)", hipewErrorString(result)));
111 return;
112 }
113
114 int major, minor;
115 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
116 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
117 hipDevArchitecture = major * 100 + minor * 10;
118
119 /* Get hip runtime Version needed for memory types. */
120 hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion));
121
122 /* Pop context set by hipCtxCreate. */
123 hipCtxPopCurrent(NULL);
124}
125
126HIPDevice::~HIPDevice()
127{
128 texture_info.free();
129 if (hipModule) {
130 hip_assert(hipModuleUnload(hipModule));
131 }
132 hip_assert(hipCtxDestroy(hipContext));
133}
134
135bool HIPDevice::support_device(const uint /*kernel_features*/)
136{
137 if (hipSupportsDevice(hipDevId)) {
138 return true;
139 }
140 else {
141 /* We only support Navi and above. */
142 hipDeviceProp_t props;
143 hipGetDeviceProperties(&props, hipDevId);
144
145 set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
146 props.name));
147 return false;
148 }
149}
150
151bool HIPDevice::check_peer_access(Device *peer_device)
152{
153 if (peer_device == this) {
154 return false;
155 }
156 if (peer_device->info.type != DEVICE_HIP && peer_device->info.type != DEVICE_OPTIX) {
157 return false;
158 }
159
160 HIPDevice *const peer_device_hip = static_cast<HIPDevice *>(peer_device);
161
162 int can_access = 0;
163 hip_assert(hipDeviceCanAccessPeer(&can_access, hipDevice, peer_device_hip->hipDevice));
164 if (can_access == 0) {
165 return false;
166 }
167
168 // Ensure array access over the link is possible as well (for 3D textures)
169 hip_assert(hipDeviceGetP2PAttribute(
170 &can_access, hipDevP2PAttrHipArrayAccessSupported, hipDevice, peer_device_hip->hipDevice));
171 if (can_access == 0) {
172 return false;
173 }
174
175 // Enable peer access in both directions
176 {
177 const HIPContextScope scope(this);
178 hipError_t result = hipCtxEnablePeerAccess(peer_device_hip->hipContext, 0);
179 if (result != hipSuccess) {
180 set_error(string_printf("Failed to enable peer access on HIP context (%s)",
181 hipewErrorString(result)));
182 return false;
183 }
184 }
185 {
186 const HIPContextScope scope(peer_device_hip);
187 hipError_t result = hipCtxEnablePeerAccess(hipContext, 0);
188 if (result != hipSuccess) {
189 set_error(string_printf("Failed to enable peer access on HIP context (%s)",
190 hipewErrorString(result)));
191 return false;
192 }
193 }
194
195 return true;
196}
197
198bool HIPDevice::use_adaptive_compilation()
199{
201}
202
203/* Common HIPCC flags which stays the same regardless of shading model,
204 * kernel sources md5 and only depends on compiler or compilation settings.
205 */
206string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
207{
208 const int machine = system_cpu_bits();
209 const string source_path = path_get("source");
210 const string include_path = source_path;
211 string cflags = string_printf(
212 "-m%d "
213 "--use_fast_math "
214 "-DHIPCC "
215 "-I\"%s\"",
216 machine,
217 include_path.c_str());
218 if (use_adaptive_compilation()) {
219 cflags += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
220 }
221 return cflags;
222}
223
224string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
225{
226 /* Compute kernel name. */
227 int major, minor;
228 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
229 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
230 const std::string arch = hipDeviceArch(hipDevId);
231
232 /* Attempt to use kernel provided with Blender. */
233 if (!use_adaptive_compilation()) {
234 const string fatbin = path_get(string_printf("lib/%s_%s.fatbin.zst", name, arch.c_str()));
235 VLOG_INFO << "Testing for pre-compiled kernel " << fatbin << ".";
236 if (path_exists(fatbin)) {
237 VLOG_INFO << "Using precompiled kernel.";
238 return fatbin;
239 }
240 }
241
242 /* Try to use locally compiled kernel. */
243 string source_path = path_get("source");
244 const string source_md5 = path_files_md5_hash(source_path);
245
246 /* We include cflags into md5 so changing hip toolkit or changing other
247 * compiler command line arguments makes sure fatbin gets re-built.
248 */
249 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
250 const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
251
252 const char *const kernel_ext = "genco";
253 std::string options;
254# ifdef _WIN32
255 options.append("Wno-parentheses-equality -Wno-unused-value -ffast-math");
256# else
257 options.append("Wno-parentheses-equality -Wno-unused-value -O3 -ffast-math");
258# endif
259# ifndef NDEBUG
260 options.append(" -save-temps");
261# endif
262 options.append(" --offload-arch=").append(arch.c_str());
263
264 const string include_path = source_path;
265 const string fatbin_file = string_printf(
266 "cycles_%s_%s_%s", name, arch.c_str(), kernel_md5.c_str());
267 const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
268 VLOG_INFO << "Testing for locally compiled kernel " << fatbin << ".";
269 if (path_exists(fatbin)) {
270 VLOG_INFO << "Using locally compiled kernel.";
271 return fatbin;
272 }
273
274# ifdef _WIN32
275 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
276 if (!hipSupportsDevice(hipDevId)) {
277 set_error(
278 string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
279 "Your GPU is not supported.",
280 major,
281 minor));
282 }
283 else {
284 set_error(
285 string_printf("HIP binary kernel for this graphics card compute "
286 "capability (%d.%d) not found.",
287 major,
288 minor));
289 }
290 return string();
291 }
292# endif
293
294 /* Compile. */
295 const char *const hipcc = hipewCompilerPath();
296 if (hipcc == NULL) {
297 set_error(
298 "HIP hipcc compiler not found. "
299 "Install HIP toolkit in default location.");
300 return string();
301 }
302
303 const int hipcc_hip_version = hipewCompilerVersion();
304 VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
305 if (hipcc_hip_version < 40) {
306 printf(
307 "Unsupported HIP version %d.%d detected, "
308 "you need HIP 4.0 or newer.\n",
309 hipcc_hip_version / 10,
310 hipcc_hip_version % 10);
311 return string();
312 }
313
314 double starttime = time_dt();
315
317
318 source_path = path_join(path_join(source_path, "kernel"),
319 path_join("device", path_join(base, string_printf("%s.cpp", name))));
320
321 string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
322 hipcc,
323 options.c_str(),
324 include_path.c_str(),
325 kernel_ext,
326 source_path.c_str(),
327 fatbin.c_str());
328
329 printf("Compiling %sHIP kernel ...\n%s\n",
330 (use_adaptive_compilation()) ? "adaptive " : "",
331 command.c_str());
332
333# ifdef _WIN32
334 command = "call " + command;
335# endif
336 if (system(command.c_str()) != 0) {
337 set_error(
338 "Failed to execute compilation command, "
339 "see console for details.");
340 return string();
341 }
342
343 /* Verify if compilation succeeded */
344 if (!path_exists(fatbin)) {
345 set_error(
346 "HIP kernel compilation failed, "
347 "see console for details.");
348 return string();
349 }
350
351 printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
352
353 return fatbin;
354}
355
356bool HIPDevice::load_kernels(const uint kernel_features)
357{
358 /* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
359 *
360 * Currently re-loading kernels will invalidate memory pointers.
361 */
362 if (hipModule) {
363 if (use_adaptive_compilation()) {
364 VLOG_INFO << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
365 }
366 return true;
367 }
368
369 /* check if hip init succeeded */
370 if (hipContext == 0)
371 return false;
372
373 /* check if GPU is supported */
374 if (!support_device(kernel_features)) {
375 return false;
376 }
377
378 /* get kernel */
379 const char *kernel_name = "kernel";
380 string fatbin = compile_kernel(kernel_features, kernel_name);
381 if (fatbin.empty())
382 return false;
383
384 /* open module */
385 HIPContextScope scope(this);
386
387 string fatbin_data;
388 hipError_t result;
389
390 if (path_read_compressed_text(fatbin, fatbin_data))
391 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
392 else
393 result = hipErrorFileNotFound;
394
395 if (result != hipSuccess)
396 set_error(string_printf(
397 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(result)));
398
399 if (result == hipSuccess) {
400 kernels.load(this);
401 reserve_local_memory(kernel_features);
402 }
403
404 return (result == hipSuccess);
405}
406
407void HIPDevice::reserve_local_memory(const uint kernel_features)
408{
409 /* Together with hipDeviceLmemResizeToMax, this reserves local memory
410 * needed for kernel launches, so that we can reliably figure out when
411 * to allocate scene data in mapped host memory. */
412 size_t total = 0, free_before = 0, free_after = 0;
413
414 {
415 HIPContextScope scope(this);
416 hipMemGetInfo(&free_before, &total);
417 }
418
419 {
420 /* Use the biggest kernel for estimation. */
421 const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
423 (kernel_features & KERNEL_FEATURE_MNEE) ?
426
427 /* Launch kernel, using just 1 block appears sufficient to reserve memory for all
428 * multiprocessors. It would be good to do this in parallel for the multi GPU case
429 * still to make it faster. */
430 HIPDeviceQueue queue(this);
431
432 device_ptr d_path_index = 0;
433 device_ptr d_render_buffer = 0;
434 int d_work_size = 0;
435 DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
436
437 queue.init_execution();
438 queue.enqueue(test_kernel, 1, args);
439 queue.synchronize();
440 }
441
442 {
443 HIPContextScope scope(this);
444 hipMemGetInfo(&free_after, &total);
445 }
446
447 VLOG_INFO << "Local memory reserved " << string_human_readable_number(free_before - free_after)
448 << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
449
450# if 0
451 /* For testing mapped host memory, fill up device memory. */
452 const size_t keep_mb = 1024;
453
454 while (free_after > keep_mb * 1024 * 1024LL) {
455 hipDeviceptr_t tmp;
456 hip_assert(hipMalloc(&tmp, 10 * 1024 * 1024LL));
457 hipMemGetInfo(&free_after, &total);
458 }
459# endif
460}
461
462void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
463{
464 HIPContextScope scope(this);
465
466 hipMemGetInfo(&free, &total);
467}
468
469bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
470{
471 HIPContextScope scope(this);
472
473 hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
474 return mem_alloc_result == hipSuccess;
475}
476
477void HIPDevice::free_device(void *device_pointer)
478{
479 HIPContextScope scope(this);
480
481 hip_assert(hipFree((hipDeviceptr_t)device_pointer));
482}
483
484bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
485{
486 HIPContextScope scope(this);
487
488 hipError_t mem_alloc_result = hipHostMalloc(
489 &shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
490
491 return mem_alloc_result == hipSuccess;
492}
493
494void HIPDevice::free_host(void *shared_pointer)
495{
496 HIPContextScope scope(this);
497
498 hipHostFree(shared_pointer);
499}
500
501void HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
502{
503 HIPContextScope scope(this);
504
505 hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
506}
507
508void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
509{
510 const HIPContextScope scope(this);
511
512 hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
513}
514
515void HIPDevice::mem_alloc(device_memory &mem)
516{
517 if (mem.type == MEM_TEXTURE) {
518 assert(!"mem_alloc not supported for textures.");
519 }
520 else if (mem.type == MEM_GLOBAL) {
521 assert(!"mem_alloc not supported for global memory.");
522 }
523 else {
524 generic_alloc(mem);
525 }
526}
527
528void HIPDevice::mem_copy_to(device_memory &mem)
529{
530 if (mem.type == MEM_GLOBAL) {
531 global_free(mem);
532 global_alloc(mem);
533 }
534 else if (mem.type == MEM_TEXTURE) {
535 tex_free((device_texture &)mem);
536 tex_alloc((device_texture &)mem);
537 }
538 else {
539 if (!mem.device_pointer) {
540 generic_alloc(mem);
541 }
542 generic_copy_to(mem);
543 }
544}
545
546void HIPDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
547{
548 if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
549 assert(!"mem_copy_from not supported for textures.");
550 }
551 else if (mem.host_pointer) {
552 const size_t size = elem * w * h;
553 const size_t offset = elem * y * w;
554
555 if (mem.device_pointer) {
556 const HIPContextScope scope(this);
557 hip_assert(hipMemcpyDtoH(
558 (char *)mem.host_pointer + offset, (hipDeviceptr_t)mem.device_pointer + offset, size));
559 }
560 else {
561 memset((char *)mem.host_pointer + offset, 0, size);
562 }
563 }
564}
565
566void HIPDevice::mem_zero(device_memory &mem)
567{
568 if (!mem.device_pointer) {
569 mem_alloc(mem);
570 }
571 if (!mem.device_pointer) {
572 return;
573 }
574
575 /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
576 * regardless of mem.host_pointer and mem.shared_pointer. */
577 thread_scoped_lock lock(device_mem_map_mutex);
578 if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
579 const HIPContextScope scope(this);
580 hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
581 }
582 else if (mem.host_pointer) {
583 memset(mem.host_pointer, 0, mem.memory_size());
584 }
585}
586
587void HIPDevice::mem_free(device_memory &mem)
588{
589 if (mem.type == MEM_GLOBAL) {
590 global_free(mem);
591 }
592 else if (mem.type == MEM_TEXTURE) {
593 tex_free((device_texture &)mem);
594 }
595 else {
596 generic_free(mem);
597 }
598}
599
600device_ptr HIPDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
601{
602 return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
603}
604
605void HIPDevice::const_copy_to(const char *name, void *host, size_t size)
606{
607 HIPContextScope scope(this);
608 hipDeviceptr_t mem;
609 size_t bytes;
610
611 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params"));
612 assert(bytes == sizeof(KernelParamsHIP));
613
614 /* Update data storage pointers in launch parameters. */
615# define KERNEL_DATA_ARRAY(data_type, data_name) \
616 if (strcmp(name, #data_name) == 0) { \
617 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
618 return; \
619 }
621 KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
622# include "kernel/data_arrays.h"
623# undef KERNEL_DATA_ARRAY
624}
625
626void HIPDevice::global_alloc(device_memory &mem)
627{
628 if (mem.is_resident(this)) {
629 generic_alloc(mem);
630 generic_copy_to(mem);
631 }
632
633 const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
634}
635
636void HIPDevice::global_free(device_memory &mem)
637{
638 if (mem.is_resident(this) && mem.device_pointer) {
639 generic_free(mem);
640 }
641}
642
643void HIPDevice::tex_alloc(device_texture &mem)
644{
645 HIPContextScope scope(this);
646
647 size_t dsize = datatype_size(mem.data_type);
648 size_t size = mem.memory_size();
649
650 hipTextureAddressMode address_mode = hipAddressModeWrap;
651 switch (mem.info.extension) {
652 case EXTENSION_REPEAT:
653 address_mode = hipAddressModeWrap;
654 break;
655 case EXTENSION_EXTEND:
656 address_mode = hipAddressModeClamp;
657 break;
658 case EXTENSION_CLIP:
659 address_mode = hipAddressModeBorder;
660 break;
661 case EXTENSION_MIRROR:
662 address_mode = hipAddressModeMirror;
663 break;
664 default:
665 assert(0);
666 break;
667 }
668
669 hipTextureFilterMode filter_mode;
671 filter_mode = hipFilterModePoint;
672 }
673 else {
674 filter_mode = hipFilterModeLinear;
675 }
676
677 /* Image Texture Storage */
678 hipArray_Format format;
679 switch (mem.data_type) {
680 case TYPE_UCHAR:
681 format = HIP_AD_FORMAT_UNSIGNED_INT8;
682 break;
683 case TYPE_UINT16:
684 format = HIP_AD_FORMAT_UNSIGNED_INT16;
685 break;
686 case TYPE_UINT:
687 format = HIP_AD_FORMAT_UNSIGNED_INT32;
688 break;
689 case TYPE_INT:
690 format = HIP_AD_FORMAT_SIGNED_INT32;
691 break;
692 case TYPE_FLOAT:
693 format = HIP_AD_FORMAT_FLOAT;
694 break;
695 case TYPE_HALF:
696 format = HIP_AD_FORMAT_HALF;
697 break;
698 default:
699 assert(0);
700 return;
701 }
702
703 Mem *cmem = NULL;
704 hArray array_3d = NULL;
705 size_t src_pitch = mem.data_width * dsize * mem.data_elements;
706 size_t dst_pitch = src_pitch;
707
708 if (!mem.is_resident(this)) {
709 thread_scoped_lock lock(device_mem_map_mutex);
710 cmem = &device_mem_map[&mem];
711 cmem->texobject = 0;
712
713 if (mem.data_depth > 1) {
714 array_3d = (hArray)mem.device_pointer;
715 cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
716 }
717 else if (mem.data_height > 0) {
718 dst_pitch = align_up(src_pitch, pitch_alignment);
719 }
720 }
721 else if (mem.data_depth > 1) {
722 /* 3D texture using array, there is no API for linear memory. */
723 HIP_ARRAY3D_DESCRIPTOR desc;
724
725 desc.Width = mem.data_width;
726 desc.Height = mem.data_height;
727 desc.Depth = mem.data_depth;
728 desc.Format = format;
729 desc.NumChannels = mem.data_elements;
730 desc.Flags = 0;
731
732 VLOG_WORK << "Array 3D allocate: " << mem.name << ", "
733 << string_human_readable_number(mem.memory_size()) << " bytes. ("
735
736 hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
737
738 if (!array_3d) {
739 return;
740 }
741
742 HIP_MEMCPY3D param;
743 memset(&param, 0, sizeof(HIP_MEMCPY3D));
744 param.dstMemoryType = get_memory_type(hipMemoryTypeArray);
745 param.dstArray = array_3d;
746 param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
747 param.srcHost = mem.host_pointer;
748 param.srcPitch = src_pitch;
749 param.WidthInBytes = param.srcPitch;
750 param.Height = mem.data_height;
751 param.Depth = mem.data_depth;
752
753 hip_assert(hipDrvMemcpy3D(&param));
754
755 mem.device_pointer = (device_ptr)array_3d;
756 mem.device_size = size;
757 stats.mem_alloc(size);
758
759 thread_scoped_lock lock(device_mem_map_mutex);
760 cmem = &device_mem_map[&mem];
761 cmem->texobject = 0;
762 cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
763 }
764 else if (mem.data_height > 0) {
765 /* 2D texture, using pitch aligned linear memory. */
766 dst_pitch = align_up(src_pitch, pitch_alignment);
767 size_t dst_size = dst_pitch * mem.data_height;
768
769 cmem = generic_alloc(mem, dst_size - mem.memory_size());
770 if (!cmem) {
771 return;
772 }
773
774 hip_Memcpy2D param;
775 memset(&param, 0, sizeof(param));
776 param.dstMemoryType = get_memory_type(hipMemoryTypeDevice);
777 param.dstDevice = mem.device_pointer;
778 param.dstPitch = dst_pitch;
779 param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
780 param.srcHost = mem.host_pointer;
781 param.srcPitch = src_pitch;
782 param.WidthInBytes = param.srcPitch;
783 param.Height = mem.data_height;
784
785 hip_assert(hipDrvMemcpy2DUnaligned(&param));
786 }
787 else {
788 /* 1D texture, using linear memory. */
789 cmem = generic_alloc(mem);
790 if (!cmem) {
791 return;
792 }
793
794 hip_assert(hipMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
795 }
796
797 /* Resize once */
798 const uint slot = mem.slot;
799 if (slot >= texture_info.size()) {
800 /* Allocate some slots in advance, to reduce amount
801 * of re-allocations. */
802 texture_info.resize(slot + 128);
803 }
804
805 /* Set Mapping and tag that we need to (re-)upload to device */
806 texture_info[slot] = mem.info;
807 need_texture_info = true;
808
813 {
814 /* Bindless textures. */
815 hipResourceDesc resDesc;
816 memset(&resDesc, 0, sizeof(resDesc));
817
818 if (array_3d) {
819 resDesc.resType = hipResourceTypeArray;
820 resDesc.res.array.h_Array = array_3d;
821 resDesc.flags = 0;
822 }
823 else if (mem.data_height > 0) {
824 resDesc.resType = hipResourceTypePitch2D;
825 resDesc.res.pitch2D.devPtr = mem.device_pointer;
826 resDesc.res.pitch2D.format = format;
827 resDesc.res.pitch2D.numChannels = mem.data_elements;
828 resDesc.res.pitch2D.height = mem.data_height;
829 resDesc.res.pitch2D.width = mem.data_width;
830 resDesc.res.pitch2D.pitchInBytes = dst_pitch;
831 }
832 else {
833 resDesc.resType = hipResourceTypeLinear;
834 resDesc.res.linear.devPtr = mem.device_pointer;
835 resDesc.res.linear.format = format;
836 resDesc.res.linear.numChannels = mem.data_elements;
837 resDesc.res.linear.sizeInBytes = mem.device_size;
838 }
839
840 hipTextureDesc texDesc;
841 memset(&texDesc, 0, sizeof(texDesc));
842 texDesc.addressMode[0] = address_mode;
843 texDesc.addressMode[1] = address_mode;
844 texDesc.addressMode[2] = address_mode;
845 texDesc.filterMode = filter_mode;
846 texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
847
848 thread_scoped_lock lock(device_mem_map_mutex);
849 cmem = &device_mem_map[&mem];
850
851 if (hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL) != hipSuccess) {
852 set_error(
853 "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
854 "exceeded.");
855 }
856
857 texture_info[slot].data = (uint64_t)cmem->texobject;
858 }
859 else {
860 texture_info[slot].data = (uint64_t)mem.device_pointer;
861 }
862}
863
864void HIPDevice::tex_free(device_texture &mem)
865{
866 if (mem.device_pointer) {
867 HIPContextScope scope(this);
868 thread_scoped_lock lock(device_mem_map_mutex);
869 DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
870 const Mem &cmem = device_mem_map[&mem];
871
872 if (cmem.texobject) {
873 /* Free bindless texture. */
874 hipTexObjectDestroy(cmem.texobject);
875 }
876
877 if (!mem.is_resident(this)) {
878 /* Do not free memory here, since it was allocated on a different device. */
879 device_mem_map.erase(device_mem_map.find(&mem));
880 }
881 else if (cmem.array) {
882 /* Free array. */
883 hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
884 stats.mem_free(mem.device_size);
885 mem.device_pointer = 0;
886 mem.device_size = 0;
887
888 device_mem_map.erase(device_mem_map.find(&mem));
889 }
890 else {
891 lock.unlock();
892 generic_free(mem);
893 }
894 }
895}
896
897unique_ptr<DeviceQueue> HIPDevice::gpu_queue_create()
898{
899 return make_unique<HIPDeviceQueue>(this);
900}
901
902bool HIPDevice::should_use_graphics_interop()
903{
904 /* Check whether this device is part of OpenGL context.
905 *
906 * Using HIP device for graphics interoperability which is not part of the OpenGL context is
907 * possible, but from the empiric measurements it can be considerably slower than using naive
908 * pixels copy. */
909
910 if (headless) {
911 /* Avoid any call which might involve interaction with a graphics backend when we know that
912 * we don't have active graphics context. This avoids potential crash in the driver. */
913 return false;
914 }
915
916 /* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
917# if 0
918 HIPContextScope scope(this);
919
920 int num_all_devices = 0;
921 hip_assert(hipGetDeviceCount(&num_all_devices));
922
923 if (num_all_devices == 0) {
924 return false;
925 }
926
927 vector<hipDevice_t> gl_devices(num_all_devices);
928 uint num_gl_devices = 0;
929 hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
930
931 for (hipDevice_t gl_device : gl_devices) {
932 if (gl_device == hipDevice) {
933 return true;
934 }
935 }
936# endif
937
938 return false;
939}
940
941int HIPDevice::get_num_multiprocessors()
942{
943 return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount, 0);
944}
945
946int HIPDevice::get_max_num_threads_per_multiprocessor()
947{
948 return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor, 0);
949}
950
951bool HIPDevice::get_device_attribute(hipDeviceAttribute_t attribute, int *value)
952{
953 HIPContextScope scope(this);
954
955 return hipDeviceGetAttribute(value, attribute, hipDevice) == hipSuccess;
956}
957
958int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value)
959{
960 int value = 0;
961 if (!get_device_attribute(attribute, &value)) {
962 return default_value;
963 }
964 return value;
965}
966
967hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type)
968{
969 return get_hip_memory_type(mem_type, hipRuntimeVersion);
970}
971
973
974#endif
void BLI_kdtree_nd_ free(KDTree *tree)
unsigned int uint
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color attribute
volatile int lock
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
HIP hip
Definition debug.h:126
DeviceType type
virtual void set_error(const string &error)
DeviceInfo info
void mem_free(size_t size)
Definition util/stats.h:26
void mem_alloc(size_t size)
Definition util/stats.h:20
bool is_resident(Device *sub_device) const
Definition memory.cpp:127
size_t memory_elements_size(int elements)
#define printf
static constexpr size_t datatype_size(DataType datatype)
@ MEM_TEXTURE
@ TYPE_UINT16
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
Definition data_arrays.h:6
DebugFlags & DebugFlags()
Definition debug.h:142
#define CCL_NAMESPACE_END
@ DEVICE_OPTIX
@ DEVICE_HIP
#define NULL
static const char * to_string(const Interpolation &interp)
Definition gl_shader.cc:82
KernelData
@ BVH_LAYOUT_BVH2
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
format
#define VLOG_INFO
Definition log.h:72
#define DCHECK(expression)
Definition log.h:51
#define VLOG_WORK
Definition log.h:75
string util_md5_string(const string &str)
Definition md5.cpp:373
static void error(const char *str)
ThreadQueue * queue
all scheduled work for the cpu
int BVHLayoutMask
Definition params.h:51
string path_cache_get(const string &sub)
Definition path.cpp:362
string path_get(const string &sub)
Definition path.cpp:339
string path_files_md5_hash(const string &dir)
Definition path.cpp:612
string path_join(const string &dir, const string &file)
Definition path.cpp:417
bool path_exists(const string &path)
Definition path.cpp:565
void path_create_directories(const string &filepath)
Definition path.cpp:648
bool path_read_compressed_text(const string &path, string &text)
Definition path.cpp:754
unsigned __int64 uint64_t
Definition stdint.h:90
string string_human_readable_size(size_t size)
Definition string.cpp:245
string string_human_readable_number(size_t num)
Definition string.cpp:266
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
bool adaptive_compile
Definition debug.h:71
uint interpolation
int system_cpu_bits()
Definition system.cpp:137
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:30
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:36
@ IMAGE_DATA_TYPE_NANOVDB_FP16
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
@ IMAGE_DATA_TYPE_NANOVDB_FPN
@ INTERPOLATION_CLOSEST
@ EXTENSION_REPEAT
@ EXTENSION_CLIP
@ EXTENSION_EXTEND
@ EXTENSION_MIRROR
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
Definition util/types.h:48
uint64_t device_ptr
Definition util/types.h:45