31static void get_hiprt_transform(
float matrix[][4],
Transform &tfm)
35 matrix[row][
col++] = tfm.
x.x;
36 matrix[row][
col++] = tfm.
x.y;
37 matrix[row][
col++] = tfm.
x.z;
38 matrix[row][
col++] = tfm.
x.w;
41 matrix[row][
col++] = tfm.
y.x;
42 matrix[row][
col++] = tfm.
y.y;
43 matrix[row][
col++] = tfm.
y.z;
44 matrix[row][
col++] = tfm.
y.w;
47 matrix[row][
col++] = tfm.
z.x;
48 matrix[row][
col++] = tfm.
z.y;
49 matrix[row][
col++] = tfm.
z.z;
50 matrix[row][
col++] = tfm.
z.w;
61 : HIPDevice(info, stats, profiler, headless),
64 functions_table(
NULL),
65 scratch_buffer_size(0),
67 prim_visibility(this,
"prim_visibility",
MEM_GLOBAL),
68 instance_transform_matrix(this,
"instance_transform_matrix",
MEM_READ_ONLY),
70 user_instance_id(this,
"user_instance_id",
MEM_GLOBAL),
73 custom_prim_info(this,
"custom_prim_info",
MEM_GLOBAL),
74 custom_prim_info_offset(this,
"custom_prim_info_offset",
MEM_GLOBAL),
76 prim_time_offset(this,
"prim_time_offset",
MEM_GLOBAL)
78 HIPContextScope scope(
this);
79 global_stack_buffer = {0};
80 hiprtContextCreationInput hiprt_context_input = {0};
81 hiprt_context_input.ctxt = hipContext;
82 hiprt_context_input.device = hipDevice;
83 hiprt_context_input.deviceType = hiprtDeviceAMD;
84 hiprtError rt_result = hiprtCreateContext(
85 HIPRT_API_VERSION, hiprt_context_input, &hiprt_context);
87 if (rt_result != hiprtSuccess) {
92 rt_result = hiprtCreateFuncTable(
93 hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
95 if (rt_result != hiprtSuccess) {
96 set_error(
string_printf(
"Failed to create HIPRT Function Table"));
100 hiprtSetLogLevel(hiprtLogLevelNone);
103HIPRTDevice::~HIPRTDevice()
105 HIPContextScope scope(
this);
106 user_instance_id.free();
107 prim_visibility.free();
108 hiprt_blas_ptr.free();
110 instance_transform_matrix.free();
111 transform_headers.free();
112 custom_prim_info_offset.free();
113 custom_prim_info.free();
114 prim_time_offset.free();
117 hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
118 hiprtDestroyFuncTable(hiprt_context, functions_table);
119 hiprtDestroyScene(hiprt_context, scene);
120 hiprtDestroyContext(hiprt_context);
125 return make_unique<HIPRTDeviceQueue>(
this);
128string HIPRTDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
130 string cflags = HIPDevice::compile_kernel_get_common_cflags(kernel_features);
132 cflags +=
" -D __HIPRT__ ";
137string HIPRTDevice::compile_kernel(
const uint kernel_features,
const char *name,
const char *base)
140 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
141 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
142 const std::string arch = hipDeviceArch(hipDevId);
144 if (!use_adaptive_compilation()) {
146 VLOG(1) <<
"Testing for pre-compiled kernel " << fatbin <<
".";
148 VLOG(1) <<
"Using precompiled kernel.";
153 string source_path =
path_get(
"source");
156 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
159 const string include_path = source_path;
161 "cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
164 "cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str());
167 "hiprt_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
170 const string hiprt_include_path =
path_join(source_path,
"kernel/device/hiprt");
172 VLOG(1) <<
"Testing for locally compiled kernel " << fatbin <<
".";
174 VLOG(1) <<
"Using locally compiled kernel.";
179 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
180 if (!hipSupportsDevice(hipDevId)) {
182 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
183 "Your GPU is not supported.",
189 string_printf(
"HIP binary kernel for this graphics card compute "
190 "capability (%d.%d) not found.",
198 const char *
const hipcc = hipewCompilerPath();
201 "HIP hipcc compiler not found. "
202 "Install HIP toolkit in default location.");
206 const int hipcc_hip_version = hipewCompilerVersion();
207 VLOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
208 if (hipcc_hip_version < 40) {
210 "Unsupported HIP version %d.%d detected, "
211 "you need HIP 4.0 or newer.\n",
212 hipcc_hip_version / 10,
213 hipcc_hip_version % 10);
220 rtc_options.append(
" --offload-arch=").append(arch.c_str());
221 rtc_options.append(
" -D __HIPRT__");
222 rtc_options.append(
" -ffast-math -O3 -std=c++17");
223 rtc_options.append(
" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
228 printf(
"Compiling %s and caching to %s", source_path.c_str(), fatbin.c_str());
234 string command =
string_printf(
"%s %s -I %s -I %s %s -o \"%s\"",
237 include_path.c_str(),
238 hiprt_include_path.c_str(),
240 cycles_bitcode.c_str());
242 printf(
"Compiling %sHIP kernel ...\n%s\n",
243 (use_adaptive_compilation()) ?
"adaptive " :
"",
247 command =
"call " + command;
249 if (system(command.c_str()) != 0) {
251 "Failed to execute compilation command, "
252 "see console for details.");
259 rtc_options.append(
" -x hip");
260 rtc_options.append(
" -D HIPRT_BITCODE_LINKING ");
262 string source_path =
path_join(hiprt_include_path,
"/hiprt/impl/hiprt_kernels_bitcode.h");
267 hiprt_include_path.c_str(),
269 hiprt_bitcode.c_str());
271 printf(
"Compiling %sHIP kernel ...\n%s\n",
272 (use_adaptive_compilation()) ?
"adaptive " :
"",
276 command =
"call " + command;
278 if (system(command.c_str()) != 0) {
280 "Failed to execute compilation command, "
281 "see console for details.");
288 string linker_options;
289 linker_options.append(
" --offload-arch=").append(arch.c_str());
290 linker_options.append(
" -fgpu-rdc --hip-link --cuda-device-only ");
292 string linker_command =
string_printf(
"clang++ %s \"%s\" \"%s\" -o \"%s\"",
293 linker_options.c_str(),
294 cycles_bitcode.c_str(),
295 hiprt_bitcode.c_str(),
299 linker_command =
"call " + linker_command;
301 if (system(linker_command.c_str()) != 0) {
303 "Failed to execute linking command, "
304 "see console for details.");
308 printf(
"Kernel compilation finished in %.2lfs.\n",
time_dt() - starttime);
313bool HIPRTDevice::load_kernels(
const uint kernel_features)
316 if (use_adaptive_compilation()) {
317 VLOG(1) <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
325 if (!support_device(kernel_features)) {
330 const char *kernel_name =
"kernel";
331 string fatbin = compile_kernel(kernel_features, kernel_name);
336 HIPContextScope scope(
this);
342 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
345 result = hipErrorFileNotFound;
349 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(
result)));
351 if (
result == hipSuccess) {
360 HIPRTDeviceQueue
queue(
this);
367 queue.init_execution();
368 queue.enqueue(test_kernel, 1, args);
373 return (
result == hipSuccess);
376void HIPRTDevice::const_copy_to(
const char *name,
void *host,
size_t size)
378 HIPContextScope scope(
this);
382 if (strcmp(name,
"data") == 0) {
385 *(hiprtScene *)&
data->device_bvh = scene;
388 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
391# define KERNEL_DATA_ARRAY(data_type, data_name) \
392 if (strcmp(name, #data_name) == 0) { \
393 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
405# include "kernel/data_arrays.h"
406# undef KERNEL_DATA_ARRAY
409hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh,
Mesh *mesh)
411 hiprtGeometryBuildInput geom_input;
418 const size_t num_verts = mesh->get_verts().size();
419 const size_t num_steps = mesh->get_motion_steps();
424 if (bvh->params.num_motion_triangle_steps == 0 || bvh->params.use_spatial_split) {
425 bvh->custom_primitive_bound.alloc(num_triangles);
426 bvh->custom_prim_info.resize(num_triangles);
427 for (
uint j = 0; j < num_triangles; j++) {
431 for (
size_t step = 0;
step < num_steps - 1;
step++) {
436 bvh->custom_primitive_bound[num_bounds] =
bounds;
437 bvh->custom_prim_info[num_bounds].x = j;
444 const int num_bvh_steps = bvh->params.num_motion_triangle_steps * 2 + 1;
445 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
447 bvh->custom_primitive_bound.alloc(num_triangles * num_bvh_steps);
448 bvh->custom_prim_info.resize(num_triangles * num_bvh_steps);
449 bvh->prims_time.resize(num_triangles * num_bvh_steps);
451 for (
uint j = 0; j < num_triangles; j++) {
456 prev_bounds.
grow(prev_verts[0]);
457 prev_bounds.
grow(prev_verts[1]);
458 prev_bounds.
grow(prev_verts[2]);
460 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
461 const float curr_time = (
float)(bvh_step)*num_bvh_steps_inv_1;
465 curr_bounds.
grow(curr_verts[0]);
466 curr_bounds.
grow(curr_verts[1]);
467 curr_bounds.
grow(curr_verts[2]);
471 const float prev_time = (
float)(bvh_step - 1) * num_bvh_steps_inv_1;
472 bvh->custom_primitive_bound[num_bounds] =
bounds;
473 bvh->custom_prim_info[num_bounds].x = j;
475 bvh->prims_time[num_bounds].x = curr_time;
476 bvh->prims_time[num_bounds].y = prev_time;
479 prev_bounds = curr_bounds;
484 bvh->custom_prim_aabb.aabbCount = num_bounds;
485 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
486 bvh->custom_primitive_bound.copy_to_device();
487 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
489 geom_input.type = hiprtPrimitiveTypeAABBList;
490 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
491 geom_input.geomType = Motion_Triangle;
494 size_t triangle_size = mesh->get_triangles().size();
495 void *triangle_data = mesh->get_triangles().data();
497 size_t vertex_size = mesh->get_verts().size();
498 void *vertex_data = mesh->get_verts().data();
501 bvh->triangle_mesh.triangleStride = 3 *
sizeof(
int);
502 bvh->triangle_mesh.vertexCount = vertex_size;
503 bvh->triangle_mesh.vertexStride =
sizeof(
float3);
505 bvh->triangle_index.host_pointer = triangle_data;
506 bvh->triangle_index.data_elements = 1;
507 bvh->triangle_index.data_type =
TYPE_INT;
508 bvh->triangle_index.data_size = triangle_size;
509 bvh->triangle_index.copy_to_device();
510 bvh->triangle_mesh.triangleIndices = (
void *)(bvh->triangle_index.device_pointer);
512 bvh->triangle_index.host_pointer = 0;
513 bvh->vertex_data.host_pointer = vertex_data;
514 bvh->vertex_data.data_elements = 4;
516 bvh->vertex_data.data_size = vertex_size;
517 bvh->vertex_data.copy_to_device();
518 bvh->triangle_mesh.vertices = (
void *)(bvh->vertex_data.device_pointer);
519 bvh->vertex_data.host_pointer = 0;
521 geom_input.type = hiprtPrimitiveTypeTriangleMesh;
522 geom_input.primitive.triangleMesh = bvh->triangle_mesh;
528hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh,
Hair *hair)
530 hiprtGeometryBuildInput geom_input;
537 if (curve_attr_mP ==
NULL || bvh->params.num_motion_curve_steps == 0) {
539 bvh->custom_prim_info.resize(num_segments);
540 bvh->custom_primitive_bound.alloc(num_segments);
543 size_t num_boxes = bvh->params.num_motion_curve_steps * 2 * num_segments;
544 bvh->custom_prim_info.resize(num_boxes);
545 bvh->prims_time.resize(num_boxes);
546 bvh->custom_primitive_bound.alloc(num_boxes);
551 float3 *curve_keys = hair->get_curve_keys().data();
553 for (
uint j = 0; j < num_curves; j++) {
555 const float *curve_radius = &hair->get_curve_radius()[0];
557 for (
int k = 0; k < curve.
num_keys - 1; k++) {
558 if (curve_attr_mP ==
NULL) {
560 current_keys[0] = curve_keys[
max(first_key + k - 1, first_key)];
561 current_keys[1] = curve_keys[first_key + k];
562 current_keys[2] = curve_keys[first_key + k + 1];
563 current_keys[3] = curve_keys[
min(first_key + k + 2, first_key + curve.
num_keys - 1)];
565 if (current_keys[0].
x == current_keys[1].
x && current_keys[1].
x == current_keys[2].
x &&
566 current_keys[2].
x == current_keys[3].
x && current_keys[0].
y == current_keys[1].
y &&
567 current_keys[1].
y == current_keys[2].
y && current_keys[2].
y == current_keys[3].
y &&
568 current_keys[0].
z == current_keys[1].
z && current_keys[1].
z == current_keys[2].
z &&
569 current_keys[2].
z == current_keys[3].
z)
576 bvh->custom_prim_info[num_bounds].x = j;
577 bvh->custom_prim_info[num_bounds].y = type;
578 bvh->custom_primitive_bound[num_bounds] =
bounds;
583 const size_t num_steps = hair->get_motion_steps();
585 const size_t num_keys = hair->get_curve_keys().size();
587 if (bvh->params.num_motion_curve_steps == 0 || bvh->params.use_spatial_split) {
590 for (
size_t step = 0;
step < num_steps - 1;
step++) {
595 bvh->custom_prim_info[num_bounds].x = j;
596 bvh->custom_prim_info[num_bounds].y = type;
597 bvh->custom_primitive_bound[num_bounds] =
bounds;
602 const int num_bvh_steps = bvh->params.num_motion_curve_steps * 2 + 1;
603 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
620 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
621 const float curr_time = (
float)(bvh_step)*num_bvh_steps_inv_1;
639 const float prev_time = (
float)(bvh_step - 1) * num_bvh_steps_inv_1;
641 bvh->custom_prim_info[num_bounds].x = j;
642 bvh->custom_prim_info[num_bounds].y = packed_type;
643 bvh->custom_primitive_bound[num_bounds] =
bounds;
644 bvh->prims_time[num_bounds].x = curr_time;
645 bvh->prims_time[num_bounds].y = prev_time;
648 prev_bounds = curr_bounds;
655 bvh->custom_prim_aabb.aabbCount = num_bounds;
656 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
657 bvh->custom_primitive_bound.copy_to_device();
658 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
660 geom_input.type = hiprtPrimitiveTypeAABBList;
661 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
662 geom_input.geomType =
Curve;
667hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh,
PointCloud *pointcloud)
669 hiprtGeometryBuildInput geom_input;
676 const float3 *points_data = pointcloud->get_points().data();
677 const float *radius_data = pointcloud->get_radius().data();
678 const size_t num_points = pointcloud->
num_points();
680 const size_t num_steps = pointcloud->get_motion_steps();
684 if (point_attr_mP ==
NULL) {
685 bvh->custom_prim_info.resize(num_points);
686 bvh->custom_primitive_bound.alloc(num_points);
687 for (
uint j = 0; j < num_points; j++) {
690 point.bounds_grow(points_data, radius_data,
bounds);
692 bvh->custom_primitive_bound[num_bounds] =
bounds;
693 bvh->custom_prim_info[num_bounds].x = j;
699 else if (bvh->params.num_motion_point_steps == 0 || bvh->params.use_spatial_split) {
700 bvh->custom_prim_info.resize(num_points);
701 bvh->custom_primitive_bound.alloc(num_points);
703 for (
uint j = 0; j < num_points; j++) {
706 point.bounds_grow(points_data, radius_data,
bounds);
707 for (
size_t step = 0;
step < num_steps - 1;
step++) {
708 point.bounds_grow(motion_data[step * num_points + j],
bounds);
711 bvh->custom_primitive_bound[num_bounds] =
bounds;
712 bvh->custom_prim_info[num_bounds].x = j;
719 const int num_bvh_steps = bvh->params.num_motion_point_steps * 2 + 1;
720 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
722 bvh->custom_prim_info.resize(num_points * num_bvh_steps);
723 bvh->custom_primitive_bound.alloc(num_points * num_bvh_steps);
724 bvh->prims_time.resize(num_points * num_bvh_steps);
726 for (
uint j = 0; j < num_points; j++) {
728 const size_t num_steps = pointcloud->get_motion_steps();
732 points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
734 point.bounds_grow(prev_key, prev_bounds);
736 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
737 const float curr_time = (
float)(bvh_step)*num_bvh_steps_inv_1;
739 points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
741 point.bounds_grow(curr_key, curr_bounds);
745 const float prev_time = (
float)(bvh_step - 1) * num_bvh_steps_inv_1;
746 bvh->custom_primitive_bound[num_bounds] =
bounds;
747 bvh->custom_prim_info[num_bounds].x = j;
749 bvh->prims_time[num_bounds].x = prev_time;
750 bvh->prims_time[num_bounds].y = curr_time;
753 prev_bounds = curr_bounds;
758 bvh->custom_prim_aabb.aabbCount = num_bounds;
759 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
760 bvh->custom_primitive_bound.copy_to_device();
761 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
763 geom_input.type = hiprtPrimitiveTypeAABBList;
764 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
765 geom_input.geomType =
Point;
770void HIPRTDevice::build_blas(BVHHIPRT *bvh,
Geometry *geom, hiprtBuildOptions
options)
772 hiprtGeometryBuildInput geom_input = {};
777 Mesh *mesh =
static_cast<Mesh *
>(geom);
782 geom_input = prepare_triangle_blas(bvh, mesh);
787 Hair *
const hair =
static_cast<Hair *const
>(geom);
792 geom_input = prepare_curve_blas(bvh, hair);
801 geom_input = prepare_point_blas(bvh, pointcloud);
806 assert(geom_input.geomType != hiprtInvalidValue);
809 size_t blas_scratch_buffer_size = 0;
810 hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
811 hiprt_context, geom_input,
options, blas_scratch_buffer_size);
813 if (rt_err != hiprtSuccess) {
814 set_error(
string_printf(
"Failed to get scratch buffer size for BLAS!"));
817 rt_err = hiprtCreateGeometry(hiprt_context, geom_input,
options, bvh->hiprt_geom);
819 if (rt_err != hiprtSuccess) {
822 bvh->geom_input = geom_input;
825 if (blas_scratch_buffer_size > scratch_buffer_size) {
826 scratch_buffer.alloc(blas_scratch_buffer_size);
827 scratch_buffer_size = blas_scratch_buffer_size;
828 scratch_buffer.zero_to_device();
830 rt_err = hiprtBuildGeometry(hiprt_context,
831 hiprtBuildOperationBuild,
834 (
void *)(scratch_buffer.device_pointer),
838 if (rt_err != hiprtSuccess) {
843hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
849 size_t num_object = objects.size();
850 if (num_object == 0) {
854 hiprtBuildOperation build_operation =
refit ? hiprtBuildOperationUpdate :
855 hiprtBuildOperationBuild;
859 unordered_map<Geometry *, int2> prim_info_map;
860 size_t custom_prim_offset = 0;
862 unordered_map<Geometry *, int> prim_time_map;
864 size_t num_instances = 0;
865 int blender_instance_id = 0;
867 user_instance_id.alloc(num_object);
868 prim_visibility.alloc(num_object);
869 hiprt_blas_ptr.alloc(num_object);
870 blas_ptr.alloc(num_object);
871 transform_headers.alloc(num_object);
872 custom_prim_info_offset.alloc(num_object);
873 prim_time_offset.alloc(num_object);
875 foreach (
Object *ob, objects) {
881 Transform current_transform = ob->get_tfm();
882 Geometry *geom = ob->get_geometry();
885 BVHHIPRT *current_bvh =
static_cast<BVHHIPRT *
>(geom->
bvh);
886 bool is_valid_geometry = current_bvh->geom_input.geomType != hiprtInvalidValue;
887 hiprtGeometry hiprt_geom_current = current_bvh->hiprt_geom;
889 hiprtFrameMatrix hiprt_transform_matrix = {{{0}}};
891 get_hiprt_transform(hiprt_transform_matrix.matrix, identity_matrix);
893 if (is_valid_geometry) {
894 bool is_custom_prim = current_bvh->custom_prim_info.size() > 0;
896 if (is_custom_prim) {
898 bool has_motion_blur = current_bvh->prims_time.size() > 0;
900 unordered_map<Geometry *, int2>::iterator it = prim_info_map.find(geom);
902 if (prim_info_map.find(geom) != prim_info_map.end()) {
904 custom_prim_info_offset[blender_instance_id] = it->second;
906 if (has_motion_blur) {
908 prim_time_offset[blender_instance_id] = prim_time_map[geom];
912 int offset = bvh->custom_prim_info.size();
914 prim_info_map[geom].x = offset;
915 prim_info_map[geom].y = custom_prim_offset;
917 bvh->custom_prim_info.resize(offset + current_bvh->custom_prim_info.size());
918 memcpy(bvh->custom_prim_info.data() + offset,
919 current_bvh->custom_prim_info.data(),
920 current_bvh->custom_prim_info.size() *
sizeof(
int2));
922 custom_prim_info_offset[blender_instance_id].x = offset;
923 custom_prim_info_offset[blender_instance_id].y = custom_prim_offset;
926 custom_prim_offset += ((
Hair *)geom)->num_curves();
929 custom_prim_offset += ((
PointCloud *)geom)->num_points();
932 custom_prim_offset += ((
Mesh *)geom)->num_triangles();
935 if (has_motion_blur) {
936 int time_offset = bvh->prims_time.size();
937 prim_time_map[geom] = time_offset;
939 bvh->prims_time.resize(time_offset + current_bvh->prims_time.size());
940 memcpy(bvh->prims_time.data() + time_offset,
941 current_bvh->prims_time.data(),
942 current_bvh->prims_time.size() *
sizeof(
float2));
944 prim_time_offset[blender_instance_id] = time_offset;
947 prim_time_offset[blender_instance_id] = -1;
951 custom_prim_info_offset[blender_instance_id] = {-1, -1};
953 hiprtTransformHeader current_header = {0};
954 current_header.frameCount = 1;
955 current_header.frameIndex = transform_matrix.
size();
956 if (ob->get_motion().size()) {
957 int motion_size = ob->get_motion().size();
958 assert(motion_size != 1);
961 float time_iternval = 1 / (
float)(motion_size - 1);
962 current_header.frameCount = motion_size;
965 tfm_hiprt_mb.resize(motion_size);
966 for (
int i = 0; i < motion_size; i++) {
967 get_hiprt_transform(tfm_hiprt_mb[i].matrix, tfm_array[i]);
968 tfm_hiprt_mb[i].time = (
float)i * time_iternval;
973 if (transform_applied)
974 current_transform = identity_matrix;
975 get_hiprt_transform(hiprt_transform_matrix.matrix, current_transform);
979 transform_headers[num_instances] = current_header;
981 user_instance_id[num_instances] = blender_instance_id;
982 prim_visibility[num_instances] =
mask;
983 hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
984 hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
987 blas_ptr[blender_instance_id] = (
uint64_t)hiprt_geom_current;
988 blender_instance_id++;
991 int frame_count = transform_matrix.
size();
992 hiprtSceneBuildInput scene_input_ptr = {0};
993 scene_input_ptr.instanceCount = num_instances;
994 scene_input_ptr.frameCount = frame_count;
995 scene_input_ptr.frameType = hiprtFrameTypeMatrix;
997 user_instance_id.copy_to_device();
998 prim_visibility.copy_to_device();
999 hiprt_blas_ptr.copy_to_device();
1000 blas_ptr.copy_to_device();
1001 transform_headers.copy_to_device();
1003 instance_transform_matrix.alloc(frame_count);
1004 instance_transform_matrix.host_free();
1005 instance_transform_matrix.host_pointer = transform_matrix.
data();
1006 instance_transform_matrix.data_elements =
sizeof(hiprtFrameMatrix);
1007 instance_transform_matrix.data_type =
TYPE_UCHAR;
1008 instance_transform_matrix.data_size = frame_count;
1009 instance_transform_matrix.copy_to_device();
1010 instance_transform_matrix.host_pointer = 0;
1013 scene_input_ptr.instanceMasks = (
void *)prim_visibility.device_pointer;
1014 scene_input_ptr.instances = (
void *)hiprt_blas_ptr.device_pointer;
1015 scene_input_ptr.instanceTransformHeaders = (
void *)transform_headers.device_pointer;
1016 scene_input_ptr.instanceFrames = (
void *)instance_transform_matrix.device_pointer;
1018 hiprtScene scene = 0;
1020 hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr,
options, scene);
1022 if (rt_err != hiprtSuccess) {
1026 size_t tlas_scratch_buffer_size;
1027 rt_err = hiprtGetSceneBuildTemporaryBufferSize(
1028 hiprt_context, scene_input_ptr,
options, tlas_scratch_buffer_size);
1030 if (rt_err != hiprtSuccess) {
1031 set_error(
string_printf(
"Failed to get scratch buffer size for TLAS"));
1034 if (tlas_scratch_buffer_size > scratch_buffer_size) {
1035 scratch_buffer.alloc(tlas_scratch_buffer_size);
1036 scratch_buffer.zero_to_device();
1039 rt_err = hiprtBuildScene(hiprt_context,
1043 (
void *)scratch_buffer.device_pointer,
1047 if (rt_err != hiprtSuccess) {
1051 scratch_buffer.free();
1052 scratch_buffer_size = 0;
1054 if (bvh->custom_prim_info.size()) {
1055 size_t data_size = bvh->custom_prim_info.size();
1056 custom_prim_info.alloc(data_size);
1057 custom_prim_info.host_free();
1058 custom_prim_info.host_pointer = bvh->custom_prim_info.data();
1059 custom_prim_info.data_elements = 2;
1060 custom_prim_info.data_type =
TYPE_INT;
1061 custom_prim_info.data_size = data_size;
1062 custom_prim_info.copy_to_device();
1063 custom_prim_info.host_pointer = 0;
1065 custom_prim_info_offset.copy_to_device();
1068 if (bvh->prims_time.size()) {
1069 size_t data_size = bvh->prims_time.size();
1070 prims_time.alloc(data_size);
1071 prims_time.host_free();
1072 prims_time.host_pointer = bvh->prims_time.data();
1073 prims_time.data_elements = 2;
1075 prims_time.data_size = data_size;
1076 prims_time.copy_to_device();
1077 prims_time.host_pointer = 0;
1079 prim_time_offset.copy_to_device();
1082 size_t table_ptr_size = 0;
1083 hipDeviceptr_t table_device_ptr;
1085 hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule,
"kernel_params"));
1087 size_t kernel_param_offset[4];
1088 int table_index = 0;
1094 for (
int index = 0;
index < table_index;
index++) {
1096 hip_assert(hipMemcpyHtoD(
1097 table_device_ptr + kernel_param_offset[index], &functions_table,
sizeof(
device_ptr)));
1105 progress.
set_substatus(
"Building HIPRT acceleration structure");
1108 options.buildFlags = hiprtBuildFlagBitPreferHighQualityBuild;
1110 BVHHIPRT *bvh_rt =
static_cast<BVHHIPRT *
>(bvh);
1111 HIPContextScope scope(
this);
1113 if (!bvh_rt->is_tlas()) {
1115 assert(geometry.size() == 1);
1117 build_blas(bvh_rt, geom,
options);
1123 hiprtDestroyScene(hiprt_context, scene);
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a point
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Attribute * find(ustring name) const
bool has_motion_blur() const
Curve get_curve(size_t i) const
size_t num_curves() const
size_t num_segments() const
PrimitiveType primitive_type() const override
void set_substatus(const string &substatus_)
void push_back_slow(const T &t)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define PRIMITIVE_PACK_SEGMENT(type, segment)
@ ATTR_STD_MOTION_VERTEX_POSITION
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
ccl_device_inline float4 mask(const int4 mask, const float4 a)
string util_md5_string(const string &str)
ThreadQueue * queue
all scheduled work for the cpu
T step(const T &edge, const T &value)
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
unsigned __int64 uint64_t
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
__forceinline void grow(const float3 &pt)
void cardinal_motion_keys(const float3 *curve_keys, const float *curve_radius, const float4 *key_steps, size_t num_curve_keys, size_t num_steps, float time, size_t k0, size_t k1, size_t k2, size_t k3, float4 r_keys[4]) const
void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox &bounds) const
void motion_verts(const float3 *verts, const float3 *vert_steps, size_t num_verts, size_t num_steps, float time, float3 r_verts[3]) const
void bounds_grow(const float3 *verts, BoundBox &bounds) const
Triangle get_triangle(size_t i) const
size_t num_triangles() const
PrimitiveType primitive_type() const override
bool is_traceable() const
uint visibility_for_tracing() const
Point get_point(int i) const
size_t num_points() const
VecBase< float, 4 > float4
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()