21# include <embree4/rtcore_geometry.h>
37 "Object and Embree max motion steps inconsistent");
39 "Object and Geometry max motion steps inconsistent");
41static size_t unaccounted_mem = 0;
43static bool rtc_memory_monitor_func(
void *userPtr,
const ssize_t bytes,
const bool )
66static void rtc_error_func(
void * ,
enum RTCError ,
const char *
str)
71static double progress_start_time = 0.0;
73static bool rtc_progress_func(
void *user_ptr,
const double n)
77 if (
time_dt() - progress_start_time < 0.25) {
81 const string msg =
string_printf(
"Building BVH %.0f%%", n * 100.0);
83 progress_start_time =
time_dt();
88BVHEmbree::BVHEmbree(
const BVHParams ¶ms_,
91 :
BVH(params_, geometry_, objects_),
94 build_quality(RTC_BUILD_QUALITY_REFIT)
99BVHEmbree::~BVHEmbree()
102 rtcReleaseScene(scene);
108 RTCDevice rtc_device_,
109 const bool rtc_device_is_sycl_)
111 rtc_device = rtc_device_;
112 rtc_device_is_sycl = rtc_device_is_sycl_;
115 rtcSetDeviceErrorFunction(rtc_device, rtc_error_func,
nullptr);
116 rtcSetDeviceMemoryMonitorFunction(rtc_device, rtc_memory_monitor_func, stats);
118 progress.set_substatus(
"Building BVH");
121 rtcReleaseScene(scene);
126 const bool compact =
params.use_compact_structure;
128 scene = rtcNewScene(rtc_device);
129 const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
130 (compact ? RTC_SCENE_FLAG_COMPACT : RTC_SCENE_FLAG_NONE) |
131 RTC_SCENE_FLAG_ROBUST |
132 RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS;
133 rtcSetSceneFlags(scene, scene_flags);
134 build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
135 (
params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH :
136 RTC_BUILD_QUALITY_MEDIUM);
137 rtcSetSceneBuildQuality(scene, build_quality);
140 for (
Object *ob : objects) {
142 if (!ob->is_traceable()) {
146 if (!ob->get_geometry()->is_instanced()) {
166 rtcSetSceneProgressMonitorFunction(scene, rtc_progress_func, &
progress);
167 rtcCommitScene(scene);
170const char *BVHEmbree::get_error_string(RTCError error_code)
172# if RTC_VERSION >= 40303
173 return rtcGetErrorString(error_code);
175 switch (error_code) {
178 case RTC_ERROR_UNKNOWN:
179 return "unknown error";
180 case RTC_ERROR_INVALID_ARGUMENT:
181 return "invalid argument error";
182 case RTC_ERROR_INVALID_OPERATION:
183 return "invalid operation error";
184 case RTC_ERROR_OUT_OF_MEMORY:
185 return "out of memory error";
186 case RTC_ERROR_UNSUPPORTED_CPU:
187 return "unsupported cpu error";
188 case RTC_ERROR_CANCELLED:
192 return "unknown error";
197# if defined(WITH_EMBREE_GPU) && RTC_VERSION >= 40302
206 for (
const RTCScene &embree_scene : scenes) {
207 RTCSceneFlags scene_flags = rtcGetSceneFlags(embree_scene);
208 scene_flags = scene_flags | RTC_SCENE_FLAG_PREFETCH_USM_SHARED_ON_GPU;
209 rtcSetSceneFlags(embree_scene, scene_flags);
210 rtcCommitScene(embree_scene);
213 RTCError error_code = rtcGetDeviceError(rtc_device);
214 if (error_code != RTC_ERROR_NONE) {
218 return RTC_ERROR_NONE;
222void BVHEmbree::add_object(
Object *ob,
const int i)
224 Geometry *geom = ob->get_geometry();
227 Mesh *mesh =
static_cast<Mesh *
>(geom);
229 add_triangles(ob, mesh,
i);
233 Hair *hair =
static_cast<Hair *
>(geom);
241 add_points(ob, pointcloud,
i);
246void BVHEmbree::add_instance(
Object *ob,
const int i)
248 BVHEmbree *instance_bvh =
static_cast<BVHEmbree *
>(ob->get_geometry()->bvh.get());
249 assert(instance_bvh !=
nullptr);
251 const size_t num_object_motion_steps = ob->
use_motion() ? ob->get_motion().size() : 1;
252 const size_t num_motion_steps =
min(num_object_motion_steps, (
size_t)RTC_MAX_TIME_STEP_COUNT);
253 assert(num_object_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
255 RTCGeometry geom_id = rtcNewGeometry(rtc_device, RTC_GEOMETRY_TYPE_INSTANCE);
256 rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
257 rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
262 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
263 RTCQuaternionDecomposition rtc_decomp;
264 rtcInitQuaternionDecomposition(&rtc_decomp);
265 rtcQuaternionDecompositionSetQuaternion(
266 &rtc_decomp, decomp[
step].
x.w, decomp[
step].x.x, decomp[
step].x.y, decomp[
step].x.z);
267 rtcQuaternionDecompositionSetScale(
268 &rtc_decomp, decomp[
step].
y.w, decomp[
step].z.w, decomp[
step].w.w);
269 rtcQuaternionDecompositionSetTranslation(
270 &rtc_decomp, decomp[
step].
y.x, decomp[
step].y.y, decomp[
step].y.z);
271 rtcQuaternionDecompositionSetSkew(
272 &rtc_decomp, decomp[
step].
z.x, decomp[
step].z.y, decomp[
step].w.x);
273 rtcSetGeometryTransformQuaternion(geom_id,
step, &rtc_decomp);
277 rtcSetGeometryTransform(
278 geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (
const float *)&ob->get_tfm());
281 rtcSetGeometryUserData(geom_id,
282#
if RTC_VERSION >= 40400
283 (
void *)rtcGetSceneTraversable(instance_bvh->scene)
285 (
void *)instance_bvh->scene
290 rtcSetGeometryEnableFilterFunctionFromArguments(geom_id,
true);
292 rtcCommitGeometry(geom_id);
293 rtcAttachGeometryByID(scene, geom_id,
i * 2);
294 rtcReleaseGeometry(geom_id);
297void BVHEmbree::add_triangles(
const Object *ob,
const Mesh *mesh,
const int i)
302 size_t num_motion_steps = 1;
306 num_motion_steps = mesh->get_motion_steps();
310 assert(num_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
311 num_motion_steps =
min(num_motion_steps, (
size_t)RTC_MAX_TIME_STEP_COUNT);
315 RTCGeometry geom_id = rtcNewGeometry(rtc_device, RTC_GEOMETRY_TYPE_TRIANGLE);
316 rtcSetGeometryBuildQuality(geom_id, build_quality);
317 rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
319 const int *triangles = mesh->get_triangles().data();
320 if (!rtc_device_is_sycl) {
321 rtcSetSharedGeometryBuffer(geom_id,
322 RTC_BUFFER_TYPE_INDEX,
335 int *triangles_buffer =
nullptr;
336# if RTC_VERSION >= 40400
337 rtcSetNewGeometryBufferHostDevice(
339 triangles_buffer = (
int *)rtcSetNewGeometryBuffer(
342 RTC_BUFFER_TYPE_INDEX,
347#
if RTC_VERSION >= 40400
349 (
void **)(&triangles_buffer),
354 if (triangles_buffer) {
355 static_assert(
sizeof(int) ==
sizeof(
uint));
356 std::memcpy(triangles_buffer, triangles,
sizeof(
int) * 3 * (num_triangles));
359 set_tri_vertex_buffer(geom_id, mesh,
false);
361 rtcSetGeometryUserData(geom_id, (
void *)prim_offset);
363 rtcSetGeometryEnableFilterFunctionFromArguments(geom_id,
true);
365 rtcCommitGeometry(geom_id);
366 rtcAttachGeometryByID(scene, geom_id,
i * 2);
367 rtcReleaseGeometry(geom_id);
370void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id,
const Mesh *mesh,
const bool update)
373 size_t num_motion_steps = 1;
378 num_motion_steps = mesh->get_motion_steps();
379 t_mid = (num_motion_steps - 1) / 2;
380 if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
382 num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
386 const size_t num_verts = mesh->get_verts().size();
388 for (
int t = 0; t < num_motion_steps; ++t) {
391 verts = mesh->get_verts().data();
394 const int t_ = (t > t_mid) ? (t - 1) : t;
399 rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
402 if (!rtc_device_is_sycl) {
403 static_assert(
sizeof(
float3) == 16,
404 "Embree requires that each buffer element be readable with 16-byte SSE load "
406 rtcSetSharedGeometryBuffer(geom_id,
407 RTC_BUFFER_TYPE_VERTEX,
424# if RTC_VERSION >= 40400
425 rtcSetNewGeometryBufferHostDevice(
430 RTC_BUFFER_TYPE_VERTEX,
435#
if RTC_VERSION >= 40400
437 (
void **)(&verts_buffer),
443 for (
size_t i = (
size_t)0;
i < num_verts; ++
i) {
458void pack_motion_verts(
const size_t num_curves,
461 const float *curve_radius,
464 for (
size_t j = 0; j < num_curves; ++j) {
468 for (; k < c.
num_keys + 1; ++k, ++fk) {
469 rtc_verts[k].x =
verts[fk].x;
470 rtc_verts[k].y =
verts[fk].y;
471 rtc_verts[k].z =
verts[fk].z;
472 rtc_verts[k].w = curve_radius[fk];
475 rtc_verts[0] = rtc_verts[1];
476 rtc_verts[k] = rtc_verts[k - 1];
481void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id,
const Hair *hair,
const bool update)
484 size_t num_motion_steps = 1;
488 num_motion_steps = hair->get_motion_steps();
494 for (
size_t j = 0; j < num_curves; ++j) {
500 size_t num_keys_embree = num_keys;
501 num_keys_embree += num_curves * 2;
504 const int t_mid = (num_motion_steps - 1) / 2;
505 const float *curve_radius = hair->get_curve_radius().data();
506 for (
int t = 0; t < num_motion_steps; ++t) {
511 float4 *rtc_verts =
nullptr;
513 rtc_verts = (
float4 *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
516# if RTC_VERSION >= 40400
517 rtcSetNewGeometryBufferHostDevice(
519 rtc_verts = (
float4 *)rtcSetNewGeometryBuffer(
522 RTC_BUFFER_TYPE_VERTEX,
527#
if RTC_VERSION >= 40400
529 (
void **)(&rtc_verts),
538 if (t == t_mid || attr_mP ==
nullptr) {
539 const float3 *
verts = hair->get_curve_keys().data();
540 pack_motion_verts<float3>(num_curves, hair,
verts, curve_radius, rtc_verts);
543 const int t_ = (t > t_mid) ? (t - 1) : t;
545 pack_motion_verts<float4>(num_curves, hair,
verts, curve_radius, rtc_verts);
550 rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
555void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
560 size_t num_motion_steps = 1;
564 num_motion_steps = pointcloud->get_motion_steps();
568 const size_t num_points = pointcloud->
num_points();
571 const int t_mid = (num_motion_steps - 1) / 2;
572 const float *radius = pointcloud->get_radius().data();
573 for (
int t = 0; t < num_motion_steps; ++t) {
579 float4 *rtc_verts =
nullptr;
581 rtc_verts = (
float4 *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
584# if RTC_VERSION >= 40400
585 rtcSetNewGeometryBufferHostDevice(
587 rtc_verts = (
float4 *)rtcSetNewGeometryBuffer(
590 RTC_BUFFER_TYPE_VERTEX,
595#
if RTC_VERSION >= 40400
597 (
void **)(&rtc_verts),
605 if (t == t_mid || attr_mP ==
nullptr) {
607 const float3 *
verts = pointcloud->get_points().data();
608 for (
size_t j = 0; j < num_points; ++j) {
609 rtc_verts[j].x =
verts[j].x;
610 rtc_verts[j].y =
verts[j].y;
611 rtc_verts[j].z =
verts[j].z;
612 rtc_verts[j].w = radius[j];
617 const int t_ = (t > t_mid) ? (t - 1) : t;
619 std::copy_n(
verts, num_points, rtc_verts);
624 rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
629void BVHEmbree::add_points(
const Object *ob,
const PointCloud *pointcloud,
const int i)
631 const size_t prim_offset = pointcloud->
prim_offset;
634 size_t num_motion_steps = 1;
638 num_motion_steps = pointcloud->get_motion_steps();
642 const enum RTCGeometryType type = RTC_GEOMETRY_TYPE_SPHERE_POINT;
644 RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
646 rtcSetGeometryBuildQuality(geom_id, build_quality);
647 rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
649 set_point_vertex_buffer(geom_id, pointcloud,
false);
651 rtcSetGeometryUserData(geom_id, (
void *)prim_offset);
653 rtcSetGeometryEnableFilterFunctionFromArguments(geom_id,
true);
655 rtcCommitGeometry(geom_id);
656 rtcAttachGeometryByID(scene, geom_id,
i * 2);
657 rtcReleaseGeometry(geom_id);
660void BVHEmbree::add_curves(
const Object *ob,
const Hair *hair,
const int i)
665 size_t num_motion_steps = 1;
669 num_motion_steps = hair->get_motion_steps();
673 assert(num_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
674 num_motion_steps =
min(num_motion_steps, (
size_t)RTC_MAX_TIME_STEP_COUNT);
677 size_t num_segments = 0;
678 for (
size_t j = 0; j < num_curves; ++j) {
685 RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
686 RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
688 RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
689 rtcSetGeometryTessellationRate(geom_id,
params.curve_subdivisions + 1);
690 unsigned *rtc_indices =
nullptr;
691# if RTC_VERSION >= 40400
692 rtcSetNewGeometryBufferHostDevice(
694 rtc_indices = (
unsigned *)rtcSetNewGeometryBuffer(
697 RTC_BUFFER_TYPE_INDEX,
702#
if RTC_VERSION >= 40400
704 (
void **)(&rtc_indices),
709 size_t rtc_index = 0;
710 for (
size_t j = 0; j < num_curves; ++j) {
713 rtc_indices[rtc_index] = c.
first_key + k;
715 rtc_indices[rtc_index] += j * 2;
721 rtcSetGeometryBuildQuality(geom_id, build_quality);
722 rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
724 set_curve_vertex_buffer(geom_id, hair,
false);
726 rtcSetGeometryUserData(geom_id, (
void *)prim_offset);
728 rtcSetGeometryEnableFilterFunctionFromArguments(geom_id,
true);
730 rtcCommitGeometry(geom_id);
731 rtcAttachGeometryByID(scene, geom_id,
i * 2 + 1);
732 rtcReleaseGeometry(geom_id);
737 progress.set_substatus(
"Refitting BVH nodes");
740 unsigned geom_id = 0;
741 for (
Object *ob : objects) {
742 if (!
params.top_level || (ob->
is_traceable() && !ob->get_geometry()->is_instanced())) {
743 Geometry *geom = ob->get_geometry();
746 Mesh *mesh =
static_cast<Mesh *
>(geom);
748 RTCGeometry geom = rtcGetGeometry(scene, geom_id);
749 set_tri_vertex_buffer(geom, mesh,
true);
750 rtcSetGeometryUserData(geom, (
void *)mesh->
prim_offset);
751 rtcCommitGeometry(geom);
755 Hair *hair =
static_cast<Hair *
>(geom);
757 RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1);
758 set_curve_vertex_buffer(geom, hair,
true);
760 rtcCommitGeometry(geom);
766 RTCGeometry geom = rtcGetGeometry(scene, geom_id);
767 set_point_vertex_buffer(geom, pointcloud,
true);
768 rtcCommitGeometry(geom);
775 rtcCommitScene(scene);
ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x)
ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Attribute * find(ustring name) const
static const uint MAX_MOTION_STEPS
bool is_pointcloud() const
virtual bool has_motion_blur() const
Curve get_curve(const size_t i) const
size_t curve_segment_offset
size_t num_curves() const
CurveShapeType curve_shape
void mem_alloc(const size_t size)
void mem_free(const size_t size)
#define SIMD_SET_FLUSH_TO_ZERO
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
void add_curves(bke::CurvesGeometry &curves, const Span< int > new_sizes)
static void update(bNodeTree *ntree)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
bool has_motion_blur() const override
size_t num_triangles() const
static const uint MAX_MOTION_STEPS
bool is_traceable() const
uint visibility_for_tracing() const
size_t num_points() const
CCL_NAMESPACE_BEGIN double time_dt()