Blender V4.5
cycles/device/memory.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7/* Device Memory
8 *
9 * Data types for allocating, copying and freeing device memory. */
10
11#include "util/array.h"
12#include "util/half.h"
13#include "util/string.h"
14#include "util/texture.h"
15#include "util/types.h"
16
18
19class Device;
20class GPUDevice;
21class CUDADevice;
22class OptiXDevice;
23class HIPDevice;
24class HIPRTDevice;
25class MetalDevice;
26class OneapiDevice;
27
35
36/* Supported Data Types */
37
48
49static constexpr size_t datatype_size(DataType datatype)
50{
51 switch (datatype) {
52 case TYPE_UNKNOWN:
53 return 1;
54 case TYPE_UCHAR:
55 return sizeof(uchar);
56 case TYPE_FLOAT:
57 return sizeof(float);
58 case TYPE_UINT:
59 return sizeof(uint);
60 case TYPE_UINT16:
61 return sizeof(uint16_t);
62 case TYPE_INT:
63 return sizeof(int);
64 case TYPE_HALF:
65 return sizeof(half);
66 case TYPE_UINT64:
67 return sizeof(uint64_t);
68 default:
69 return 0;
70 }
71}
72
73/* Traits for data types */
74
75template<typename T> struct device_type_traits {
77 static const size_t num_elements = sizeof(T);
78};
79
80template<> struct device_type_traits<uchar> {
82 static const size_t num_elements = 1;
83 static_assert(sizeof(uchar) == num_elements * datatype_size(data_type));
84};
85
86template<> struct device_type_traits<uchar2> {
88 static const size_t num_elements = 2;
89 static_assert(sizeof(uchar2) == num_elements * datatype_size(data_type));
90};
91
92template<> struct device_type_traits<uchar3> {
94 static const size_t num_elements = 3;
95 static_assert(sizeof(uchar3) == num_elements * datatype_size(data_type));
96};
97
98template<> struct device_type_traits<uchar4> {
100 static const size_t num_elements = 4;
101 static_assert(sizeof(uchar4) == num_elements * datatype_size(data_type));
102};
103
104template<> struct device_type_traits<uint> {
106 static const size_t num_elements = 1;
107 static_assert(sizeof(uint) == num_elements * datatype_size(data_type));
108};
109
110template<> struct device_type_traits<uint2> {
112 static const size_t num_elements = 2;
113 static_assert(sizeof(uint2) == num_elements * datatype_size(data_type));
114};
115
116template<> struct device_type_traits<uint3> {
117 /* uint3 has different size depending on the device, can't use it for interchanging
118 * memory between CPU and GPU.
119 *
120 * Leave body empty to trigger a compile error if used. */
121};
122
123template<> struct device_type_traits<uint4> {
125 static const size_t num_elements = 4;
126 static_assert(sizeof(uint4) == num_elements * datatype_size(data_type));
127};
128
129template<> struct device_type_traits<int> {
131 static const size_t num_elements = 1;
132 static_assert(sizeof(int) == num_elements * datatype_size(data_type));
133};
134
135template<> struct device_type_traits<int2> {
137 static const size_t num_elements = 2;
138 static_assert(sizeof(int2) == num_elements * datatype_size(data_type));
139};
140
141template<> struct device_type_traits<int3> {
142 /* int3 has different size depending on the device, can't use it for interchanging
143 * memory between CPU and GPU.
144 *
145 * Leave body empty to trigger a compile error if used. */
146};
147
148template<> struct device_type_traits<int4> {
150 static const size_t num_elements = 4;
151 static_assert(sizeof(int4) == num_elements * datatype_size(data_type));
152};
153
154template<> struct device_type_traits<float> {
156 static const size_t num_elements = 1;
157 static_assert(sizeof(float) == num_elements * datatype_size(data_type));
158};
159
160template<> struct device_type_traits<float2> {
162 static const size_t num_elements = 2;
163 static_assert(sizeof(float2) == num_elements * datatype_size(data_type));
164};
165
166template<> struct device_type_traits<float3> {
167 /* float3 has different size depending on the device, can't use it for interchanging
168 * memory between CPU and GPU.
169 *
170 * Leave body empty to trigger a compile error if used. */
171};
172
175 static const size_t num_elements = 3;
176 static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
177};
178
179template<> struct device_type_traits<float4> {
181 static const size_t num_elements = 4;
182 static_assert(sizeof(float4) == num_elements * datatype_size(data_type));
183};
184
185template<> struct device_type_traits<half> {
187 static const size_t num_elements = 1;
188 static_assert(sizeof(half) == num_elements * datatype_size(data_type));
189};
190
191template<> struct device_type_traits<ushort4> {
193 static const size_t num_elements = 4;
194 static_assert(sizeof(ushort4) == num_elements * datatype_size(data_type));
195};
196
197template<> struct device_type_traits<uint16_t> {
199 static const size_t num_elements = 1;
200 static_assert(sizeof(uint16_t) == num_elements * datatype_size(data_type));
201};
202
203template<> struct device_type_traits<half4> {
205 static const size_t num_elements = 4;
206 static_assert(sizeof(half4) == num_elements * datatype_size(data_type));
207};
208
209template<> struct device_type_traits<uint64_t> {
211 static const size_t num_elements = 1;
212 static_assert(sizeof(uint64_t) == num_elements * datatype_size(data_type));
213};
214
215/* Device Memory
216 *
217 * Base class for all device memory. This should not be allocated directly,
218 * instead the appropriate subclass can be used. */
219
221 public:
222 size_t memory_size()
223 {
225 }
226 size_t memory_elements_size(const int elements)
227 {
228 return elements * data_elements * datatype_size(data_type);
229 }
230
231 /* Data information. */
234 size_t data_size;
240 const char *name;
242
243 /* Pointers. */
248 /* reference counter for shared_pointer */
250 bool move_to_host = false;
251
252 virtual ~device_memory();
253
254 void swap_device(Device *new_device, const size_t new_device_size, device_ptr new_device_ptr);
255 void restore_device();
256
257 bool is_resident(Device *sub_device) const;
258 bool is_shared(Device *sub_device) const;
259
260 /* No copying and allowed.
261 *
262 * This is because device implementation might need to register device memory in an allocation
263 * map of some sort and use pointer as a key to identify blocks. Moving data from one place to
264 * another bypassing device allocation routines will make those maps hard to maintain. */
265 device_memory(const device_memory &) = delete;
266 device_memory(device_memory &&other) noexcept = delete;
269
270 protected:
271 friend class Device;
272 friend class GPUDevice;
273 friend class CUDADevice;
274 friend class OptiXDevice;
275 friend class HIPDevice;
276 friend class HIPRTDevice;
277 friend class MetalDevice;
278 friend class OneapiDevice;
279
280 /* Only create through subclasses. */
282
283 /* Host allocation on the device. All host_pointer memory should be
284 * allocated with these functions, for devices that support using
285 * the same pointer for host and device. */
286 void *host_alloc(const size_t size);
287
288 /* Device memory allocation and copying. */
289 void device_alloc();
290 void device_copy_to();
291 void device_move_to_host();
292 void device_copy_from(const size_t y, const size_t w, size_t h, const size_t elem);
293 void device_zero();
294
295 /* Memory can only be freed on host and device together. */
297
298 bool device_is_cpu();
299
305};
306
307/* Device Only Memory
308 *
309 * Working memory only needed by the device, with no corresponding allocation
310 * on the host. Only used internally in the device implementations. */
311
312template<typename T> class device_only_memory : public device_memory {
313 public:
314 device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false)
315 : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
316 {
319 }
320
321 device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) {}
322
324 {
325 free();
326 }
327
328 void alloc_to_device(const size_t num, bool shrink_to_fit = true)
329 {
330 size_t new_size = num;
331 bool reallocate;
332
333 if (shrink_to_fit) {
334 reallocate = (data_size != new_size);
335 }
336 else {
337 reallocate = (data_size < new_size);
338 }
339
340 if (reallocate) {
342 data_size = new_size;
343 device_alloc();
344 }
345 }
346
347 void free()
348 {
350 data_size = 0;
351 }
352
354 {
355 device_zero();
356 }
357};
358
359/* Device Vector
360 *
361 * Data vector to exchange data between host and device. Memory will be
362 * allocated on the host first with alloc() and resize, and then filled
363 * in and copied to the device with copy_to_device(). Or alternatively
364 * allocated and set to zero on the device with zero_to_device().
365 *
366 * When using memory type MEM_GLOBAL, a pointer to this memory will be
367 * automatically attached to kernel globals, using the provided name
368 * matching an entry in kernel/data_arrays.h. */
369
370template<typename T> class device_vector : public device_memory {
371 public:
382
383 ~device_vector() override
384 {
385 free();
386 }
387
388 /* Host memory allocation. */
389 T *alloc(const size_t width, const size_t height = 0, const size_t depth = 0)
390 {
391 size_t new_size = size(width, height, depth);
392
393 if (new_size != data_size) {
395 host_pointer = host_alloc(sizeof(T) * new_size);
396 modified = true;
398 }
399
400 data_size = new_size;
401 data_width = width;
402 data_height = height;
403 data_depth = depth;
404
405 return data();
406 }
407
408 /* Host memory resize. Only use this if the original data needs to be
409 * preserved or memory needs to be initialized, it is faster to call
410 * alloc() if it can be discarded. */
411 T *resize(const size_t width, const size_t height = 0, const size_t depth = 0)
412 {
413 size_t new_size = size(width, height, depth);
414
415 if (new_size != data_size) {
416 void *new_ptr = host_alloc(sizeof(T) * new_size);
417
418 if (new_ptr) {
419 size_t min_size = (new_size < data_size) ? new_size : data_size;
420 for (size_t i = 0; i < min_size; i++) {
421 ((T *)new_ptr)[i] = ((T *)host_pointer)[i];
422 }
423 for (size_t i = data_size; i < new_size; i++) {
424 ((T *)new_ptr)[i] = T();
425 }
426 }
427
429 host_pointer = new_ptr;
431 }
432
433 data_size = new_size;
434 data_width = width;
435 data_height = height;
436 data_depth = depth;
437
438 return data();
439 }
440
441 /* Take over data from an existing array. */
443 {
445
446 data_size = from.size();
447 data_width = 0;
448 data_height = 0;
449 data_depth = 0;
452 }
453
454 /* Free device and host memory. */
455 void free()
456 {
458
459 data_size = 0;
460 data_width = 0;
461 data_height = 0;
462 data_depth = 0;
463 host_pointer = 0;
464 modified = true;
465 need_realloc_ = true;
467 }
468
469 void free_if_need_realloc(bool force_free)
470 {
471 if (need_realloc_ || force_free) {
472 free();
473 }
474 }
475
476 bool is_modified() const
477 {
478 return modified;
479 }
480
482 {
483 return need_realloc_;
484 }
485
487 {
488 modified = true;
489 }
490
492 {
493 need_realloc_ = true;
494 tag_modified();
495 }
496
497 size_t size() const
498 {
499 return data_size;
500 }
501
503 {
504 return (T *)host_pointer;
505 }
506
507 const T *data() const
508 {
509 return (T *)host_pointer;
510 }
511
512 T &operator[](size_t i)
513 {
514 assert(i < data_size);
515 return data()[i];
516 }
517
519 {
520 if (data_size != 0) {
522 }
523 }
524
526 {
527 if (!modified) {
528 return;
529 }
530
532 }
533
535 {
536 modified = false;
537 need_realloc_ = false;
538 }
539
541 {
542 device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
543 }
544
545 void copy_from_device(const size_t y, const size_t w, size_t h)
546 {
547 device_copy_from(y, w, h, sizeof(T));
548 }
549
551 {
552 device_zero();
553 }
554
555 protected:
556 size_t size(const size_t width, const size_t height, const size_t depth)
557 {
558 return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
559 }
560};
561
562/* Device Sub Memory
563 *
564 * Pointer into existing memory. It is not allocated separately, but created
565 * from an already allocated base memory. It is freed automatically when it
566 * goes out of scope, which should happen before base memory is freed.
567 *
568 * NOTE: some devices require offset and size of the sub_ptr to be properly
569 * aligned to device->mem_address_alingment(). */
570
572 public:
573 device_sub_ptr(device_memory &mem, const size_t offset, const size_t size);
575
577 {
578 return ptr;
579 }
580
581 protected:
582 /* No copying. */
584
587};
588
589/* Device Texture
590 *
591 * 2D or 3D image texture memory. */
592
594 public:
596 const char *name,
597 const uint slot,
598 ImageDataType image_data_type,
599 InterpolationType interpolation,
600 ExtensionType extension);
601 ~device_texture() override;
602
603 void *alloc(const size_t width, const size_t height, const size_t depth = 0);
604 void copy_to_device();
605
608
609 protected:
610 size_t size(const size_t width, const size_t height, const size_t depth)
611 {
612 return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
613 }
614};
615
ATTR_WARN_UNUSED_RESULT const size_t num
unsigned char uchar
unsigned int uint
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
T * steal_pointer()
size_t size() const
friend class CUDADevice
bool is_resident(Device *sub_device) const
Definition memory.cpp:134
void * host_alloc(const size_t size)
Definition memory.cpp:42
device_memory(const device_memory &)=delete
void device_zero()
Definition memory.cpp:102
void device_alloc()
Definition memory.cpp:76
void device_copy_to()
Definition memory.cpp:82
void swap_device(Device *new_device, const size_t new_device_size, device_ptr new_device_ptr)
Definition memory.cpp:114
friend class HIPRTDevice
size_t memory_elements_size(const int elements)
device_ptr original_device_ptr
void host_and_device_free()
Definition memory.cpp:57
bool is_shared(Device *sub_device) const
Definition memory.cpp:139
device_memory(device_memory &&other) noexcept=delete
bool device_is_cpu()
Definition memory.cpp:109
void device_move_to_host()
Definition memory.cpp:89
void restore_device()
Definition memory.cpp:127
device_memory & operator=(const device_memory &)=delete
device_memory & operator=(device_memory &&)=delete
virtual ~device_memory()
Definition memory.cpp:36
friend class OneapiDevice
void device_copy_from(const size_t y, const size_t w, size_t h, const size_t elem)
Definition memory.cpp:96
friend class MetalDevice
friend class OptiXDevice
void alloc_to_device(const size_t num, bool shrink_to_fit=true)
device_only_memory(device_only_memory &&other) noexcept
device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback=false)
device_sub_ptr(device_memory &mem, const size_t offset, const size_t size)
Definition memory.cpp:146
device_sub_ptr & operator=(const device_sub_ptr &)
device_ptr operator*() const
size_t size(const size_t width, const size_t height, const size_t depth)
void copy_to_device()
Definition memory.cpp:242
~device_texture() override
Definition memory.cpp:214
void * alloc(const size_t width, const size_t height, const size_t depth=0)
Definition memory.cpp:220
device_texture(Device *device, const char *name, const uint slot, ImageDataType image_data_type, InterpolationType interpolation, ExtensionType extension)
Definition memory.cpp:159
size_t size() const
device_vector(Device *device, const char *name, MemoryType type)
bool is_modified() const
void steal_data(array< T > &from)
size_t size(const size_t width, const size_t height, const size_t depth)
T * resize(const size_t width, const size_t height=0, const size_t depth=0)
void free_if_need_realloc(bool force_free)
T * alloc(const size_t width, const size_t height=0, const size_t depth=0)
const T * data() const
T & operator[](size_t i)
void copy_from_device(const size_t y, const size_t w, size_t h)
Definition half.h:41
static constexpr size_t datatype_size(DataType datatype)
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
@ TYPE_UNKNOWN
@ TYPE_UINT16
@ TYPE_UINT64
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
#define assert(assertion)
#define T
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const DataType data_type
static const size_t num_elements
static const DataType data_type
Definition half.h:60
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
uint64_t device_ptr
Definition types_base.h:44
ImageDataType
InterpolationType
ExtensionType