Blender V4.3
vk_data_conversion.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
10#include "vk_device.hh"
11
13
14#include "BLI_color.hh"
15#include "BLI_math_half.hh"
16
17namespace blender::gpu {
18
19/* -------------------------------------------------------------------- */
22
80
82 const eGPUTextureFormat device_format)
83{
84 if (host_format != device_format) {
85 if (host_format == GPU_RGB16F && device_format == GPU_RGBA16F) {
87 }
88 if (host_format == GPU_RGB32F && device_format == GPU_RGBA32F) {
90 }
91 if (host_format == GPU_DEPTH_COMPONENT24 && device_format == GPU_DEPTH_COMPONENT32F) {
93 }
94
96 }
97
98 switch (device_format) {
99 case GPU_RGBA32F:
100 case GPU_RG32F:
101 case GPU_R32F:
104
105 case GPU_RGBA16F:
106 case GPU_RG16F:
107 case GPU_R16F:
108 case GPU_RGB16F:
110
111 case GPU_RGBA8:
112 case GPU_RG8:
113 case GPU_R8:
115
116 case GPU_RGBA8_SNORM:
117 case GPU_RGB8_SNORM:
118 case GPU_RG8_SNORM:
119 case GPU_R8_SNORM:
121
122 case GPU_RGBA16:
123 case GPU_RG16:
124 case GPU_R16:
126
127 case GPU_RGBA16_SNORM:
128 case GPU_RGB16_SNORM:
129 case GPU_RG16_SNORM:
130 case GPU_R16_SNORM:
132
133 case GPU_SRGB8_A8:
135
138
141
145 case GPU_RGBA8_DXT1:
146 case GPU_RGBA8_DXT3:
147 case GPU_RGBA8_DXT5:
148 /* Not an actual "conversion", but compressed texture upload code
149 * pretends that host data is a float. It is actually raw BCn bits. */
151
152 case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */
153 case GPU_RGBA8UI:
154 case GPU_RGBA8I:
155 case GPU_RGBA16UI:
156 case GPU_RGBA16I:
157 case GPU_RGBA32UI:
158 case GPU_RGBA32I:
159 case GPU_RG8UI:
160 case GPU_RG8I:
161 case GPU_RG16UI:
162 case GPU_RG16I:
163 case GPU_RG32UI:
164 case GPU_RG32I:
165 case GPU_R8UI:
166 case GPU_R8I:
167 case GPU_R16UI:
168 case GPU_R16I:
169 case GPU_R32UI:
170 case GPU_R32I:
171 case GPU_RGB10_A2:
172 case GPU_RGB10_A2UI:
175 case GPU_RGB8UI:
176 case GPU_RGB8I:
177 case GPU_RGB8:
178 case GPU_RGB16UI:
179 case GPU_RGB16I:
180 case GPU_RGB16:
181 case GPU_RGB32UI:
182 case GPU_RGB32I:
183 case GPU_SRGB8:
184 case GPU_RGB9_E5:
187 }
189}
190
192{
193 switch (device_format) {
194 case GPU_RGBA32I:
195 case GPU_RG32I:
196 case GPU_R32I:
198
199 case GPU_RGBA16I:
200 case GPU_RG16I:
201 case GPU_R16I:
203
204 case GPU_RGBA8I:
205 case GPU_RG8I:
206 case GPU_R8I:
208
209 case GPU_RGBA8UI:
210 case GPU_RGBA8:
211 case GPU_RGBA16UI:
212 case GPU_RGBA16F:
213 case GPU_RGBA16:
214 case GPU_RGBA32UI:
215 case GPU_RGBA32F:
216 case GPU_RG8UI:
217 case GPU_RG8:
218 case GPU_RG16UI:
219 case GPU_RG16F:
220 case GPU_RG32UI:
221 case GPU_RG32F:
222 case GPU_RG16:
223 case GPU_R8UI:
224 case GPU_R8:
225 case GPU_R16UI:
226 case GPU_R16F:
227 case GPU_R16:
228 case GPU_R32UI:
229 case GPU_R32F:
230 case GPU_RGB10_A2:
231 case GPU_RGB10_A2UI:
235 case GPU_SRGB8_A8:
236 case GPU_RGBA8_SNORM:
237 case GPU_RGBA16_SNORM:
238 case GPU_RGB8UI:
239 case GPU_RGB8I:
240 case GPU_RGB8:
241 case GPU_RGB8_SNORM:
242 case GPU_RGB16UI:
243 case GPU_RGB16I:
244 case GPU_RGB16F:
245 case GPU_RGB16:
246 case GPU_RGB16_SNORM:
247 case GPU_RGB32UI:
248 case GPU_RGB32I:
249 case GPU_RGB32F:
250 case GPU_RG8_SNORM:
251 case GPU_RG16_SNORM:
252 case GPU_R8_SNORM:
253 case GPU_R16_SNORM:
257 case GPU_RGBA8_DXT1:
258 case GPU_RGBA8_DXT3:
259 case GPU_RGBA8_DXT5:
260 case GPU_SRGB8:
261 case GPU_RGB9_E5:
266 }
268}
269
271{
272 switch (device_format) {
273 case GPU_RGBA32UI:
274 case GPU_RG32UI:
275 case GPU_R32UI:
278
279 case GPU_RGBA16UI:
280 case GPU_RG16UI:
281 case GPU_R16UI:
282 case GPU_RGB16UI:
284
285 case GPU_RGBA8UI:
286 case GPU_RG8UI:
287 case GPU_R8UI:
289
295 case GPU_RGBA8I:
296 case GPU_RGBA8:
297 case GPU_RGBA16I:
298 case GPU_RGBA16F:
299 case GPU_RGBA16:
300 case GPU_RGBA32I:
301 case GPU_RGBA32F:
302 case GPU_RG8I:
303 case GPU_RG8:
304 case GPU_RG16I:
305 case GPU_RG16F:
306 case GPU_RG16:
307 case GPU_RG32I:
308 case GPU_RG32F:
309 case GPU_R8I:
310 case GPU_R8:
311 case GPU_R16I:
312 case GPU_R16F:
313 case GPU_R16:
314 case GPU_R32I:
315 case GPU_R32F:
316 case GPU_RGB10_A2:
317 case GPU_RGB10_A2UI:
319 case GPU_SRGB8_A8:
320 case GPU_RGBA8_SNORM:
321 case GPU_RGBA16_SNORM:
322 case GPU_RGB8UI:
323 case GPU_RGB8I:
324 case GPU_RGB8:
325 case GPU_RGB8_SNORM:
326 case GPU_RGB16I:
327 case GPU_RGB16F:
328 case GPU_RGB16:
329 case GPU_RGB16_SNORM:
330 case GPU_RGB32UI:
331 case GPU_RGB32I:
332 case GPU_RGB32F:
333 case GPU_RG8_SNORM:
334 case GPU_RG16_SNORM:
335 case GPU_R8_SNORM:
336 case GPU_R16_SNORM:
340 case GPU_RGBA8_DXT1:
341 case GPU_RGBA8_DXT3:
342 case GPU_RGBA8_DXT5:
343 case GPU_SRGB8:
344 case GPU_RGB9_E5:
347 }
349}
350
352{
353 switch (device_format) {
354 case GPU_RGBA16F:
355 case GPU_RG16F:
356 case GPU_R16F:
358
359 case GPU_RGBA8UI:
360 case GPU_RGBA8I:
361 case GPU_RGBA8:
362 case GPU_RGBA16UI:
363 case GPU_RGBA16I:
364 case GPU_RGBA16:
365 case GPU_RGBA32UI:
366 case GPU_RGBA32I:
367 case GPU_RGBA32F:
368 case GPU_RG8UI:
369 case GPU_RG8I:
370 case GPU_RG8:
371 case GPU_RG16UI:
372 case GPU_RG16I:
373 case GPU_RG16:
374 case GPU_RG32UI:
375 case GPU_RG32I:
376 case GPU_RG32F:
377 case GPU_R8UI:
378 case GPU_R8I:
379 case GPU_R8:
380 case GPU_R16UI:
381 case GPU_R16I:
382 case GPU_R16:
383 case GPU_R32UI:
384 case GPU_R32I:
385 case GPU_R32F:
386 case GPU_RGB10_A2:
387 case GPU_RGB10_A2UI:
391 case GPU_SRGB8_A8:
392 case GPU_RGBA8_SNORM:
393 case GPU_RGBA16_SNORM:
394 case GPU_RGB8UI:
395 case GPU_RGB8I:
396 case GPU_RGB8:
397 case GPU_RGB8_SNORM:
398 case GPU_RGB16UI:
399 case GPU_RGB16I:
400 case GPU_RGB16F:
401 case GPU_RGB16:
402 case GPU_RGB16_SNORM:
403 case GPU_RGB32UI:
404 case GPU_RGB32I:
405 case GPU_RGB32F:
406 case GPU_RG8_SNORM:
407 case GPU_RG16_SNORM:
408 case GPU_R8_SNORM:
409 case GPU_R16_SNORM:
413 case GPU_RGBA8_DXT1:
414 case GPU_RGBA8_DXT3:
415 case GPU_RGBA8_DXT5:
416 case GPU_SRGB8:
417 case GPU_RGB9_E5:
422 }
424}
425
427{
428 switch (device_format) {
429 case GPU_RGBA8UI:
430 case GPU_RGBA8:
431 case GPU_RG8UI:
432 case GPU_RG8:
433 case GPU_R8UI:
434 case GPU_R8:
435 case GPU_SRGB8_A8:
437
438 case GPU_RGBA8I:
439 case GPU_RGBA16UI:
440 case GPU_RGBA16I:
441 case GPU_RGBA16F:
442 case GPU_RGBA16:
443 case GPU_RGBA32UI:
444 case GPU_RGBA32I:
445 case GPU_RGBA32F:
446 case GPU_RG8I:
447 case GPU_RG16UI:
448 case GPU_RG16I:
449 case GPU_RG16F:
450 case GPU_RG16:
451 case GPU_RG32UI:
452 case GPU_RG32I:
453 case GPU_RG32F:
454 case GPU_R8I:
455 case GPU_R16UI:
456 case GPU_R16I:
457 case GPU_R16F:
458 case GPU_R16:
459 case GPU_R32UI:
460 case GPU_R32I:
461 case GPU_R32F:
462 case GPU_RGB10_A2:
463 case GPU_RGB10_A2UI:
467 case GPU_RGBA8_SNORM:
468 case GPU_RGBA16_SNORM:
469 case GPU_RGB8UI:
470 case GPU_RGB8I:
471 case GPU_RGB8:
472 case GPU_RGB8_SNORM:
473 case GPU_RGB16UI:
474 case GPU_RGB16I:
475 case GPU_RGB16F:
476 case GPU_RGB16:
477 case GPU_RGB16_SNORM:
478 case GPU_RGB32UI:
479 case GPU_RGB32I:
480 case GPU_RGB32F:
481 case GPU_RG8_SNORM:
482 case GPU_RG16_SNORM:
483 case GPU_R8_SNORM:
484 case GPU_R16_SNORM:
488 case GPU_RGBA8_DXT1:
489 case GPU_RGBA8_DXT3:
490 case GPU_RGBA8_DXT5:
491 case GPU_SRGB8:
492 case GPU_RGB9_E5:
497 }
499}
500
502{
503 if (device_format == GPU_R11F_G11F_B10F) {
505 }
507}
508
516
518 const eGPUTextureFormat host_texture_format,
519 const eGPUTextureFormat device_format)
520{
521 BLI_assert(validate_data_format(device_format, host_format));
522
523 switch (host_format) {
524 case GPU_DATA_FLOAT:
525 return type_of_conversion_float(host_texture_format, device_format);
526 case GPU_DATA_UINT:
527 return type_of_conversion_uint(device_format);
528 case GPU_DATA_INT:
529 return type_of_conversion_int(device_format);
531 return type_of_conversion_half(device_format);
532 case GPU_DATA_UBYTE:
533 return type_of_conversion_ubyte(device_format);
535 return type_of_conversion_r11g11b10(device_format);
537 return type_of_conversion_r10g10b10a2(device_format);
538
541 }
542
544}
545
547{
548#define CASE_SINGLE(a, b) \
549 case ConversionType::a##_TO_##b: \
550 return ConversionType::b##_TO_##a;
551
552#define CASE_PAIR(a, b) \
553 CASE_SINGLE(a, b) \
554 CASE_SINGLE(b, a)
555
556 switch (type) {
559
560 CASE_PAIR(FLOAT, UNORM8)
561 CASE_PAIR(FLOAT, SNORM8)
562 CASE_PAIR(FLOAT, UNORM16)
563 CASE_PAIR(FLOAT, SNORM16)
564 CASE_PAIR(FLOAT, UNORM32)
571 CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
572 CASE_PAIR(UINT, DEPTH_COMPONENT24)
573 CASE_PAIR(FLOAT, B10F_G11F_R11F)
576
579 }
580
581#undef CASE_PAIR
582#undef CASE_SINGLE
583
585}
586
587/* \} */
588
589/* -------------------------------------------------------------------- */
592
593static uint32_t float_to_uint32_t(float value)
594{
595 union {
596 float fl;
597 uint32_t u;
598 } float_to_bits;
599 float_to_bits.fl = value;
600 return float_to_bits.u;
601}
602
603static float uint32_t_to_float(uint32_t value)
604{
605 union {
606 float fl;
607 uint32_t u;
608 } float_to_bits;
609 float_to_bits.u = value;
610 return float_to_bits.fl;
611}
612
613template<typename InnerType> struct ComponentValue {
614 InnerType value;
615};
616template<typename InnerType> struct PixelValue {
617 InnerType value;
618};
619
630/* NOTE: Vulkan stores R11_G11_B10 in reverse component order. */
631class B10F_G11G_R11F : public PixelValue<uint32_t> {};
632
633class HALF4 : public PixelValue<uint64_t> {
634 public:
636 {
637 return value & 0xffff;
638 }
639
640 void set_r(uint64_t new_value)
641 {
642 value = (value & 0xffffffffffff0000) | (new_value & 0xffff);
643 }
645 {
646 return (value >> 16) & 0xffff;
647 }
648
649 void set_g(uint64_t new_value)
650 {
651 value = (value & 0xffffffff0000ffff) | ((new_value & 0xffff) << 16);
652 }
654 {
655 return (value >> 32) & 0xffff;
656 }
657
658 void set_b(uint64_t new_value)
659 {
660 value = (value & 0xffff0000ffffffff) | ((new_value & 0xffff) << 32);
661 }
662
663 void set_a(uint64_t new_value)
664 {
665 value = (value & 0xffffffffffff) | ((new_value & 0xffff) << 48);
666 }
667};
668
669class DepthComponent24 : public ComponentValue<uint32_t> {
670 public:
671 operator uint32_t() const
672 {
673 return value;
674 }
675
677 {
678 value = new_value;
679 return *this;
680 }
681
682 /* Depth component24 are 4 bytes, but 1 isn't used. */
683 static constexpr size_t used_byte_size()
684 {
685 return 3;
686 }
687};
688
689template<typename InnerType> struct SignedNormalized {
690 static_assert(std::is_same<InnerType, uint8_t>() || std::is_same<InnerType, uint16_t>());
691 InnerType value;
692
693 static constexpr int32_t scalar()
694 {
695 return (1 << (sizeof(InnerType) * 8 - 1));
696 }
697
698 static constexpr int32_t delta()
699 {
700 return (1 << (sizeof(InnerType) * 8 - 1)) - 1;
701 }
702
703 static constexpr int32_t max()
704 {
705 return ((1 << (sizeof(InnerType) * 8)) - 1);
706 }
707};
708
709template<typename InnerType> struct UnsignedNormalized {
710 static_assert(std::is_same<InnerType, uint8_t>() || std::is_same<InnerType, uint16_t>() ||
711 std::is_same<InnerType, uint32_t>() ||
712 std::is_same<InnerType, DepthComponent24>());
713 InnerType value;
714
715 static constexpr size_t used_byte_size()
716 {
717 if constexpr (std::is_same<InnerType, DepthComponent24>()) {
718 return InnerType::used_byte_size();
719 }
720 else {
721 return sizeof(InnerType);
722 }
723 }
724
725 static constexpr uint32_t scalar()
726 {
727 if constexpr (std::is_same<InnerType, DepthComponent24>()) {
728 return (1 << (used_byte_size() * 8)) - 1;
729 }
730 else {
731 return std::numeric_limits<InnerType>::max();
732 }
733 }
734
735 static constexpr uint32_t max()
736 {
737 if constexpr (std::is_same<InnerType, DepthComponent24>()) {
738 return (1 << (used_byte_size() * 8)) - 1;
739 }
740 else {
741 return std::numeric_limits<InnerType>::max();
742 }
743 }
744};
745
746template<typename StorageType> void convert(SignedNormalized<StorageType> &dst, const F32 &src)
747{
748 static constexpr int32_t scalar = SignedNormalized<StorageType>::scalar();
749 static constexpr int32_t delta = SignedNormalized<StorageType>::delta();
751 dst.value = clamp_i((src.value * scalar + delta), 0, max);
752}
753
754template<typename StorageType> void convert(F32 &dst, const SignedNormalized<StorageType> &src)
755{
756 static constexpr int32_t scalar = SignedNormalized<StorageType>::scalar();
757 static constexpr int32_t delta = SignedNormalized<StorageType>::delta();
758 dst.value = float(int32_t(src.value) - delta) / scalar;
759}
760
761template<typename StorageType> void convert(UnsignedNormalized<StorageType> &dst, const F32 &src)
762{
763 static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
764 static constexpr uint32_t max = scalar;
765 /* When converting a DEPTH32F to DEPTH24 the scalar gets to large where 1.0 will wrap around and
766 * become 0. Make sure that depth 1.0 will not wrap around. Without this gpu_select_pick will
767 * fail as all depth 1.0 will occlude previous depths. */
768 dst.value = src.value >= 1.0f ? max : max_ff(src.value * float(scalar), 0.0);
769}
770
771template<typename StorageType> void convert(F32 &dst, const UnsignedNormalized<StorageType> &src)
772{
773 static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
774 dst.value = float(uint32_t(src.value)) / float(scalar);
775}
776
777template<typename StorageType>
778void convert(UnsignedNormalized<StorageType> & /*dst*/, const UI32 & /*src*/)
779{
781}
782
783template<typename StorageType> void convert(UI32 &dst, const UnsignedNormalized<StorageType> &src)
784{
785 static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
786 dst.value = uint32_t(src.value) & scalar;
787}
788
789/* Copy the contents of src to dst with out performing any actual conversion. */
790template<typename DestinationType, typename SourceType>
791void convert(DestinationType &dst, const SourceType &src)
792{
793 static_assert(std::is_same<DestinationType, UI8>() || std::is_same<DestinationType, UI16>() ||
794 std::is_same<DestinationType, UI32>() || std::is_same<DestinationType, I8>() ||
795 std::is_same<DestinationType, I16>() || std::is_same<DestinationType, I32>());
796 static_assert(std::is_same<SourceType, UI8>() || std::is_same<SourceType, UI16>() ||
797 std::is_same<SourceType, UI32>() || std::is_same<SourceType, I8>() ||
798 std::is_same<SourceType, I16>() || std::is_same<SourceType, I32>());
799 static_assert(!std::is_same<DestinationType, SourceType>());
800 dst.value = src.value;
801}
802
803static void convert(SRGBA8 &dst, const FLOAT4 &src)
804{
805 dst.value = src.value.encode();
806}
807
808static void convert(FLOAT4 &dst, const SRGBA8 &src)
809{
810 dst.value = src.value.decode();
811}
812
813static void convert(FLOAT3 &dst, const HALF4 &src)
814{
815 dst.value.x = math::half_to_float(src.get_r());
816 dst.value.y = math::half_to_float(src.get_g());
817 dst.value.z = math::half_to_float(src.get_b());
818}
819
820static void convert(HALF4 &dst, const FLOAT3 &src)
821{
822 dst.set_r(math::float_to_half(src.value.x));
823 dst.set_g(math::float_to_half(src.value.y));
824 dst.set_b(math::float_to_half(src.value.z));
825 dst.set_a(0x3c00); /* FP16 1.0 */
826}
827
828static void convert(FLOAT3 &dst, const FLOAT4 &src)
829{
830 dst.value.x = src.value.r;
831 dst.value.y = src.value.g;
832 dst.value.z = src.value.b;
833}
834
835static void convert(FLOAT4 &dst, const FLOAT3 &src)
836{
837 dst.value.r = src.value.x;
838 dst.value.g = src.value.y;
839 dst.value.b = src.value.z;
840 dst.value.a = 1.0f;
841}
842
843constexpr uint32_t MASK_10_BITS = 0b1111111111;
844constexpr uint32_t MASK_11_BITS = 0b11111111111;
845constexpr uint8_t SHIFT_B = 22;
846constexpr uint8_t SHIFT_G = 11;
847constexpr uint8_t SHIFT_R = 0;
848
858
866
867/* \} */
868
869template<typename DestinationType, typename SourceType>
871{
872 BLI_assert(src.size() == dst.size());
873 for (int64_t index : IndexRange(src.size())) {
874 convert(dst[index], src[index]);
875 }
876}
877
878template<typename DestinationType, typename SourceType>
879void convert_per_component(void *dst_memory,
880 const void *src_memory,
881 size_t buffer_size,
882 eGPUTextureFormat device_format)
883{
884 size_t total_components = to_component_len(device_format) * buffer_size;
885 Span<SourceType> src = Span<SourceType>(static_cast<const SourceType *>(src_memory),
886 total_components);
888 static_cast<DestinationType *>(dst_memory), total_components);
890}
891
892template<typename DestinationType, typename SourceType>
893void convert_per_pixel(void *dst_memory, const void *src_memory, size_t buffer_size)
894{
895 Span<SourceType> src = Span<SourceType>(static_cast<const SourceType *>(src_memory),
896 buffer_size);
898 static_cast<DestinationType *>(dst_memory), buffer_size);
900}
901
902static void convert_buffer(void *dst_memory,
903 const void *src_memory,
904 size_t buffer_size,
905 eGPUTextureFormat device_format,
906 ConversionType type)
907{
908 switch (type) {
910 return;
911
914 memcpy(dst_memory, src_memory, buffer_size * to_bytesize(device_format));
915 return;
916
918 convert_per_component<UI16, UI32>(dst_memory, src_memory, buffer_size, device_format);
919 break;
920
922 convert_per_component<UI32, UI16>(dst_memory, src_memory, buffer_size, device_format);
923 break;
924
926 convert_per_component<UI8, UI32>(dst_memory, src_memory, buffer_size, device_format);
927 break;
928
930 convert_per_component<UI32, UI8>(dst_memory, src_memory, buffer_size, device_format);
931 break;
932
934 convert_per_component<I16, I32>(dst_memory, src_memory, buffer_size, device_format);
935 break;
936
938 convert_per_component<I32, I16>(dst_memory, src_memory, buffer_size, device_format);
939 break;
940
942 convert_per_component<I8, I32>(dst_memory, src_memory, buffer_size, device_format);
943 break;
944
946 convert_per_component<I32, I8>(dst_memory, src_memory, buffer_size, device_format);
947 break;
948
951 dst_memory, src_memory, buffer_size, device_format);
952 break;
955 dst_memory, src_memory, buffer_size, device_format);
956 break;
957
960 dst_memory, src_memory, buffer_size, device_format);
961 break;
964 dst_memory, src_memory, buffer_size, device_format);
965 break;
966
969 dst_memory, src_memory, buffer_size, device_format);
970 break;
973 dst_memory, src_memory, buffer_size, device_format);
974 break;
975
978 dst_memory, src_memory, buffer_size, device_format);
979 break;
982 dst_memory, src_memory, buffer_size, device_format);
983 break;
984
987 dst_memory, src_memory, buffer_size, device_format);
988 break;
991 dst_memory, src_memory, buffer_size, device_format);
992 break;
993
995 blender::math::float_to_half_array(static_cast<const float *>(src_memory),
996 static_cast<uint16_t *>(dst_memory),
997 to_component_len(device_format) * buffer_size);
998 break;
1000 blender::math::half_to_float_array(static_cast<const uint16_t *>(src_memory),
1001 static_cast<float *>(dst_memory),
1002 to_component_len(device_format) * buffer_size);
1003 break;
1004
1006 convert_per_pixel<SRGBA8, FLOAT4>(dst_memory, src_memory, buffer_size);
1007 break;
1009 convert_per_pixel<FLOAT4, SRGBA8>(dst_memory, src_memory, buffer_size);
1010 break;
1011
1014 dst_memory, src_memory, buffer_size, device_format);
1015 break;
1018 dst_memory, src_memory, buffer_size, device_format);
1019 break;
1022 dst_memory, src_memory, buffer_size, device_format);
1023 break;
1025 convert_per_pixel<B10F_G11G_R11F, FLOAT3>(dst_memory, src_memory, buffer_size);
1026 break;
1027
1029 convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
1030 break;
1031
1033 convert_per_pixel<HALF4, FLOAT3>(dst_memory, src_memory, buffer_size);
1034 break;
1036 convert_per_pixel<FLOAT3, HALF4>(dst_memory, src_memory, buffer_size);
1037 break;
1038
1040 convert_per_pixel<FLOAT4, FLOAT3>(dst_memory, src_memory, buffer_size);
1041 break;
1043 convert_per_pixel<FLOAT3, FLOAT4>(dst_memory, src_memory, buffer_size);
1044 break;
1045 }
1046}
1047
1048/* -------------------------------------------------------------------- */
1051
1052void convert_host_to_device(void *dst_buffer,
1053 const void *src_buffer,
1054 size_t buffer_size,
1055 eGPUDataFormat host_format,
1056 eGPUTextureFormat host_texture_format,
1057 eGPUTextureFormat device_format)
1058{
1059 ConversionType conversion_type = host_to_device(host_format, host_texture_format, device_format);
1060 BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
1061 convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
1062}
1063
1064void convert_device_to_host(void *dst_buffer,
1065 const void *src_buffer,
1066 size_t buffer_size,
1067 eGPUDataFormat host_format,
1068 eGPUTextureFormat host_texture_format,
1069 eGPUTextureFormat device_format)
1070{
1071 ConversionType conversion_type = reversed(
1072 host_to_device(host_format, host_texture_format, device_format));
1074 "Data conversion between host_format and device_format isn't supported (yet).");
1075 convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
1076}
1077
1078/* \} */
1079
1080/* -------------------------------------------------------------------- */
1083
1085 GPUVertCompType comp_type,
1086 GPUVertFetchMode fetch_mode)
1087{
1088 return attribute.comp_type == comp_type && attribute.fetch_mode == fetch_mode;
1089}
1090
1091static bool attribute_check(const GPUVertAttr attribute, GPUVertCompType comp_type, uint comp_len)
1092{
1093 return attribute.comp_type == comp_type && attribute.comp_len == comp_len;
1094}
1095
1097{
1098 source_format_ = nullptr;
1099 device_format_ = nullptr;
1100 GPU_vertformat_clear(&converted_format_);
1101
1102 needs_conversion_ = false;
1103}
1104
1106{
1107 return device_format_ != nullptr;
1108}
1109
1111 const VKWorkarounds &workarounds)
1112{
1113 source_format_ = vertex_format;
1114 device_format_ = vertex_format;
1115
1116 update_conversion_flags(*source_format_, workarounds);
1117 if (needs_conversion_) {
1118 init_device_format(workarounds);
1119 }
1120}
1121
1123{
1125 return *device_format_;
1126}
1127
1129{
1131 return needs_conversion_;
1132}
1133
1134void VertexFormatConverter::update_conversion_flags(const GPUVertFormat &vertex_format,
1135 const VKWorkarounds &workarounds)
1136{
1137 needs_conversion_ = false;
1138
1139 for (int attr_index : IndexRange(vertex_format.attr_len)) {
1140 const GPUVertAttr &vert_attr = vertex_format.attrs[attr_index];
1141 update_conversion_flags(vert_attr, workarounds);
1142 }
1143}
1144
1145void VertexFormatConverter::update_conversion_flags(const GPUVertAttr &vertex_attribute,
1146 const VKWorkarounds &workarounds)
1147{
1148 /* I32/U32 to F32 conversion doesn't exist in vulkan. */
1149 if (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
1150 ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32))
1151 {
1152 needs_conversion_ = true;
1153 }
1154 /* r8g8b8 formats will be stored as r8g8b8a8. */
1155 else if (workarounds.vertex_formats.r8g8b8 && attribute_check(vertex_attribute, GPU_COMP_U8, 3))
1156 {
1157 needs_conversion_ = true;
1158 }
1159}
1160
1161void VertexFormatConverter::init_device_format(const VKWorkarounds &workarounds)
1162{
1163 BLI_assert(needs_conversion_);
1164 GPU_vertformat_copy(&converted_format_, *source_format_);
1165 bool needs_repack = false;
1166
1167 for (int attr_index : IndexRange(converted_format_.attr_len)) {
1168 GPUVertAttr &vert_attr = converted_format_.attrs[attr_index];
1169 make_device_compatible(vert_attr, workarounds, needs_repack);
1170 }
1171
1172 if (needs_repack) {
1173 VertexFormat_pack(&converted_format_);
1174 }
1175 device_format_ = &converted_format_;
1176}
1177
1178void VertexFormatConverter::make_device_compatible(GPUVertAttr &vertex_attribute,
1179 const VKWorkarounds &workarounds,
1180 bool &r_needs_repack) const
1181{
1182 if (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
1183 ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32))
1184 {
1185 vertex_attribute.fetch_mode = GPU_FETCH_FLOAT;
1186 vertex_attribute.comp_type = GPU_COMP_F32;
1187 }
1188 else if (workarounds.vertex_formats.r8g8b8 && attribute_check(vertex_attribute, GPU_COMP_U8, 3))
1189 {
1190 vertex_attribute.comp_len = 4;
1191 vertex_attribute.size = 4;
1192 r_needs_repack = true;
1193 }
1194}
1195
1196void VertexFormatConverter::convert(void *device_data,
1197 const void *source_data,
1198 const uint vertex_len) const
1199{
1200 BLI_assert(needs_conversion_);
1201 if (source_data != device_data) {
1202 memcpy(device_data, source_data, device_format_->stride * vertex_len);
1203 }
1204
1205 const void *source_row_data = static_cast<const uint8_t *>(source_data);
1206 void *device_row_data = static_cast<uint8_t *>(device_data);
1207 for (int vertex_index : IndexRange(vertex_len)) {
1208 UNUSED_VARS(vertex_index);
1209 convert_row(device_row_data, source_row_data);
1210 source_row_data = static_cast<const uint8_t *>(source_row_data) + source_format_->stride;
1211 device_row_data = static_cast<uint8_t *>(device_row_data) + device_format_->stride;
1212 }
1213}
1214
1215void VertexFormatConverter::convert_row(void *device_row_data, const void *source_row_data) const
1216{
1217 for (int attr_index : IndexRange(source_format_->attr_len)) {
1218 const GPUVertAttr &device_attribute = device_format_->attrs[attr_index];
1219 const GPUVertAttr &source_attribute = source_format_->attrs[attr_index];
1220 convert_attribute(device_row_data, source_row_data, device_attribute, source_attribute);
1221 }
1222}
1223
1224void VertexFormatConverter::convert_attribute(void *device_row_data,
1225 const void *source_row_data,
1226 const GPUVertAttr &device_attribute,
1227 const GPUVertAttr &source_attribute) const
1228{
1229 const void *source_attr_data = static_cast<const uint8_t *>(source_row_data) +
1230 source_attribute.offset;
1231 void *device_attr_data = static_cast<uint8_t *>(device_row_data) + device_attribute.offset;
1232 if (source_attribute.comp_len == device_attribute.comp_len &&
1233 source_attribute.comp_type == device_attribute.comp_type &&
1234 source_attribute.fetch_mode == device_attribute.fetch_mode)
1235 {
1236 /* This check is done first to improve possible branch prediction. */
1237 }
1238 else if (attribute_check(source_attribute, GPU_COMP_I32, GPU_FETCH_INT_TO_FLOAT) &&
1239 attribute_check(device_attribute, GPU_COMP_F32, GPU_FETCH_FLOAT))
1240 {
1241 for (int component : IndexRange(source_attribute.comp_len)) {
1242 const int32_t *component_in = static_cast<const int32_t *>(source_attr_data) + component;
1243 float *component_out = static_cast<float *>(device_attr_data) + component;
1244 *component_out = float(*component_in);
1245 }
1246 }
1247 else if (attribute_check(source_attribute, GPU_COMP_U32, GPU_FETCH_INT_TO_FLOAT) &&
1248 attribute_check(device_attribute, GPU_COMP_F32, GPU_FETCH_FLOAT))
1249 {
1250 for (int component : IndexRange(source_attribute.comp_len)) {
1251 const uint32_t *component_in = static_cast<const uint32_t *>(source_attr_data) + component;
1252 float *component_out = static_cast<float *>(device_attr_data) + component;
1253 *component_out = float(*component_in);
1254 }
1255 }
1256 else if (attribute_check(source_attribute, GPU_COMP_U8, 3) &&
1257 attribute_check(device_attribute, GPU_COMP_U8, 4))
1258 {
1259 const uchar3 *attr_in = static_cast<const uchar3 *>(source_attr_data);
1260 uchar4 *attr_out = static_cast<uchar4 *>(device_attr_data);
1261 *attr_out = uchar4(attr_in->x, attr_in->y, attr_in->z, 255);
1262 }
1263 else {
1265 }
1266}
1267
1268/* \} */
1269
1270} // namespace blender::gpu
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
MINLINE float max_ff(float a, float b)
MINLINE int clamp_i(int value, int min, int max)
unsigned int uint
#define UNUSED_VARS(...)
#define ELEM(...)
eGPUDataFormat
@ GPU_DATA_HALF_FLOAT
@ GPU_DATA_UINT_24_8
@ GPU_DATA_INT
@ GPU_DATA_10_11_11_REV
@ GPU_DATA_UBYTE
@ GPU_DATA_UINT
@ GPU_DATA_2_10_10_10_REV
@ GPU_DATA_FLOAT
eGPUTextureFormat
@ GPU_RGB16
@ GPU_R16UI
@ GPU_RGB8
@ GPU_DEPTH32F_STENCIL8
@ GPU_SRGB8
@ GPU_R16I
@ GPU_SRGB8_A8
@ GPU_RG8_SNORM
@ GPU_DEPTH24_STENCIL8
@ GPU_RGB10_A2
@ GPU_RGB8I
@ GPU_R32I
@ GPU_RGBA8_SNORM
@ GPU_RGB10_A2UI
@ GPU_RG8UI
@ GPU_RGB16I
@ GPU_RGBA16_SNORM
@ GPU_RGB9_E5
@ GPU_SRGB8_A8_DXT5
@ GPU_RG8I
@ GPU_RG16I
@ GPU_RG32UI
@ GPU_RGB32I
@ GPU_RG8
@ GPU_RG32I
@ GPU_SRGB8_A8_DXT1
@ GPU_RGBA32UI
@ GPU_R8I
@ GPU_R16
@ GPU_RG16UI
@ GPU_RGBA8I
@ GPU_RGBA8_DXT1
@ GPU_RGBA8UI
@ GPU_RGB32F
@ GPU_RGBA16UI
@ GPU_RGBA16I
@ GPU_R8UI
@ GPU_RGBA16
@ GPU_SRGB8_A8_DXT3
@ GPU_RGB8_SNORM
@ GPU_RGBA8_DXT3
@ GPU_RGB32UI
@ GPU_R8_SNORM
@ GPU_RG32F
@ GPU_R8
@ GPU_RGB16_SNORM
@ GPU_DEPTH_COMPONENT24
@ GPU_RG16_SNORM
@ GPU_RGB8UI
@ GPU_RGB16F
@ GPU_RGB16UI
@ GPU_RGBA32I
@ GPU_RGBA8_DXT5
@ GPU_DEPTH_COMPONENT32F
@ GPU_R16_SNORM
@ GPU_DEPTH_COMPONENT16
GPUVertFetchMode
@ GPU_FETCH_FLOAT
@ GPU_FETCH_INT_TO_FLOAT
void GPU_vertformat_clear(GPUVertFormat *)
void GPU_vertformat_copy(GPUVertFormat *dest, const GPUVertFormat &src)
GPUVertCompType
@ GPU_COMP_F32
@ GPU_COMP_I32
@ GPU_COMP_U32
@ GPU_COMP_U8
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color attribute
ChannelStorageType r
Definition BLI_color.hh:88
ChannelStorageType g
Definition BLI_color.hh:88
ChannelStorageType b
Definition BLI_color.hh:88
ChannelStorageType a
Definition BLI_color.hh:88
ColorSceneLinearByteEncoded4b< Alpha > encode() const
Definition BLI_color.hh:163
ColorSceneLinear4f< Alpha > decode() const
Definition BLI_color.hh:224
constexpr int64_t size() const
Definition BLI_span.hh:494
constexpr int64_t size() const
Definition BLI_span.hh:253
DepthComponent24 & operator=(uint32_t new_value)
static constexpr size_t used_byte_size()
void set_g(uint64_t new_value)
void set_r(uint64_t new_value)
void set_a(uint64_t new_value)
void set_b(uint64_t new_value)
local_group_size(16, 16) .push_constant(Type b
additional_info("compositor_sum_float_shared") .push_constant(Type additional_info("compositor_sum_float_shared") .push_constant(Type GPU_RGBA32F
@ HALF
GPU_SHADER_INTERFACE_INFO(depth_2d_update_iface, "").smooth(Type fragColor push_constant(Type::VEC2, "extent") .push_constant(Type source_data
DOF_TILES_FLATTEN_GROUP_SIZE coc_tx GPU_R11F_G11F_B10F
draw_view in_light_buf[] float
out_radiance out_gbuf_normal out_gbuf_closure2 GPU_RG16
SHADOW_TILEMAP_RES tiles_buf[] statistics_buf render_view_buf[SHADOW_VIEW_MAX] GPU_R32UI
RAYTRACE_GROUP_SIZE additional_info("eevee_shared", "eevee_gbuffer_data", "eevee_global_ubo", "eevee_sampling_data", "eevee_utility_texture", "eevee_hiz_data", "draw_view") .specialization_constant(Type RAYTRACE_GROUP_SIZE in_sh_0_tx in_sh_2_tx screen_normal_tx GPU_RGBA8
void VertexFormat_pack(GPUVertFormat *format)
ComponentValue< int16_t > I16
ComponentValue< uint8_t > UI8
constexpr uint32_t MASK_10_BITS
PixelValue< ColorSceneLinearByteEncoded4b< eAlpha::Premultiplied > > SRGBA8
static ConversionType type_of_conversion_float(const eGPUTextureFormat host_format, const eGPUTextureFormat device_format)
ComponentValue< uint32_t > UI32
constexpr uint8_t SHIFT_B
static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format)
static ConversionType reversed(ConversionType type)
static ConversionType type_of_conversion_half(eGPUTextureFormat device_format)
void convert_host_to_device(void *dst_buffer, const void *src_buffer, size_t buffer_size, eGPUDataFormat host_format, eGPUTextureFormat host_texture_format, eGPUTextureFormat device_format)
void convert_per_component(void *dst_memory, const void *src_memory, size_t buffer_size, eGPUTextureFormat device_format)
static ConversionType host_to_device(const eGPUDataFormat host_format, const eGPUTextureFormat host_texture_format, const eGPUTextureFormat device_format)
static uint32_t float_to_uint32_t(float value)
void convert(SignedNormalized< StorageType > &dst, const F32 &src)
ComponentValue< uint16_t > UI16
constexpr uint8_t SHIFT_R
constexpr uint8_t SHIFT_G
void convert_device_to_host(void *dst_buffer, const void *src_buffer, size_t buffer_size, eGPUDataFormat host_format, eGPUTextureFormat host_texture_format, eGPUTextureFormat device_format)
PixelValue< float3 > FLOAT3
constexpr bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat data_format)
static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format)
size_t to_bytesize(GPUIndexBufType type)
static bool attribute_check(const GPUVertAttr attribute, GPUVertCompType comp_type, GPUVertFetchMode fetch_mode)
ComponentValue< float > F32
ComponentValue< int32_t > I32
PixelValue< ColorSceneLinear4f< eAlpha::Premultiplied > > FLOAT4
static ConversionType type_of_conversion_r10g10b10a2(eGPUTextureFormat device_format)
ComponentValue< int8_t > I8
static void convert_buffer(void *dst_memory, const void *src_memory, size_t buffer_size, eGPUTextureFormat device_format, ConversionType type)
int to_component_len(eGPUTextureFormat format)
uint32_t convert_float_formats(uint32_t value)
static float uint32_t_to_float(uint32_t value)
void convert_per_pixel(void *dst_memory, const void *src_memory, size_t buffer_size)
static ConversionType type_of_conversion_r11g11b10(eGPUTextureFormat device_format)
static ConversionType type_of_conversion_int(eGPUTextureFormat device_format)
constexpr uint32_t MASK_11_BITS
void float_to_half_array(const float *src, uint16_t *dst, size_t length)
Definition math_half.cc:221
uint16_t float_to_half(float v)
Definition math_half.cc:27
void half_to_float_array(const uint16_t *src, float *dst, size_t length)
Definition math_half.cc:257
float half_to_float(uint16_t v)
Definition math_half.cc:91
blender::VecBase< uint8_t, 3 > uchar3
blender::VecBase< uint8_t, 4 > uchar4
@ FLOAT
unsigned short uint16_t
Definition stdint.h:79
unsigned int uint32_t
Definition stdint.h:80
__int64 int64_t
Definition stdint.h:89
signed int int32_t
Definition stdint.h:77
unsigned char uint8_t
Definition stdint.h:78
unsigned __int64 uint64_t
Definition stdint.h:90
#define CASE_PAIR(value_src, value_dst)
GPUVertAttr attrs[GPU_VERT_ATTR_MAX_LEN]
static constexpr int32_t scalar()
static constexpr int32_t delta()
static constexpr int32_t max()
static constexpr uint32_t max()
static constexpr size_t used_byte_size()
static constexpr uint32_t scalar()
void init(const GPUVertFormat *vertex_format, const VKWorkarounds &workarounds)
void convert(void *device_data, const void *src_data, const uint vertex_len) const
const GPUVertFormat & device_format_get() const
uchar y
uchar z
uchar x
float max