24 # include <OSL/oslexec.h>
28 # include <embree3/rtcore.h>
50 #include "bvh/bvh_embree.h"
83 F kernel_default,
F kernel_sse2,
F kernel_sse3,
F kernel_sse41,
F kernel_avx,
F kernel_avx2)
85 const char *architecture_name =
"default";
95 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
97 architecture_name =
"AVX2";
102 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
104 architecture_name =
"AVX";
109 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
111 architecture_name =
"SSE4.1";
116 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
118 architecture_name =
"SSE3";
123 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
125 architecture_name =
"SSE2";
137 VLOG(1) <<
"Will be using " << architecture_name <<
" kernels.";
160 int num_global_elements,
185 OSLGlobals osl_globals;
187 #ifdef WITH_OPENIMAGEDENOISE
188 oidn::DeviceRef oidn_device;
189 oidn::FilterRef oidn_filter;
193 RTCScene embree_scene =
NULL;
194 RTCDevice embree_device;
211 int,
TileInfo *, int, int,
float *,
float *,
float *,
float *,
float *,
int *, int, int)>
214 int,
TileInfo *, int, int, int, int,
float *,
float *,
float,
int *, int, int)>
216 KernelFunctions<void (*)(int, int, int,
int *,
float *,
float *, int,
int *)>
218 KernelFunctions<void (*)(int, int,
float *,
float *,
float *,
float *,
int *, int)>
220 KernelFunctions<void (*)(int, int,
float *,
float *,
float *,
float *,
int *, int)>
224 int, int,
float *,
float *,
float *,
float *,
int *, int, int, int,
float,
float)>
229 int, int,
float *,
float *,
float *,
float *,
float *,
int *, int, int, int)>
234 float *,
TileInfo *, int, int, int,
float *,
int *,
int *, int, int, bool, int,
float)>
278 #define KERNEL_FUNCTIONS(name) \
279 KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
280 KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
281 KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
284 :
Device(info_, stats_, profiler_, background_),
316 embree_device = rtcNewDevice(
"verbose=0");
320 VLOG(1) <<
"Will be using split kernel.";
324 #define REGISTER_SPLIT_KERNEL(name) \
325 split_kernels[#name] = KernelFunctions<void (*)(KernelGlobals *, KernelData *)>( \
326 KERNEL_FUNCTIONS(name))
349 #undef REGISTER_SPLIT_KERNEL
350 #undef KERNEL_FUNCTIONS
356 rtcReleaseDevice(embree_device);
373 return bvh_layout_mask;
387 assert(!
"mem_alloc not supported for textures.");
390 assert(!
"mem_alloc not supported for global memory.");
394 VLOG(1) <<
"Buffer allocate: " << mem.
name <<
", "
424 assert(!
"mem_copy_to not supported for pixels.");
478 if (strcmp(name,
"__data") == 0) {
483 data->bvh.scene = embree_scene;
491 VLOG(1) <<
"Global memory allocate: " << mem.
name <<
", "
513 VLOG(1) <<
"Texture allocate: " << mem.
name <<
", "
556 BVHEmbree *
const bvh_embree =
static_cast<BVHEmbree *
>(bvh);
558 bvh_embree->refit(progress);
561 bvh_embree->build(progress, &
stats, embree_device);
565 embree_scene = bvh_embree->scene;
594 int r =
task->nlm_state.r;
595 int f =
task->nlm_state.f;
596 float a =
task->nlm_state.a;
597 float k_2 =
task->nlm_state.k_2;
600 int h = rect.
w - rect.
y;
602 int channel_offset =
task->nlm_state.is_color ?
task->buffer.pass_stride : 0;
604 float *temporary_mem = (
float *)
task->buffer.temporary_mem.device_pointer;
605 float *blurDifference = temporary_mem;
606 float *difference = temporary_mem +
task->buffer.pass_stride;
607 float *weightAccum = temporary_mem + 2 *
task->buffer.pass_stride;
609 memset(weightAccum, 0,
sizeof(
float) *
w * h);
610 memset((
float *)out_ptr, 0,
sizeof(
float) *
w * h);
612 for (
int i = 0; i < (2 *
r + 1) * (2 *
r + 1); i++) {
613 int dy = i / (2 *
r + 1) -
r;
614 int dx = i % (2 *
r + 1) -
r;
616 int local_rect[4] = {
617 max(0, -dx),
max(0, -dy), rect.
z - rect.
x -
max(0, dx), rect.
w - rect.
y -
max(0, dy)};
621 (
float *)variance_ptr,
648 int local_rect[4] = {0, 0, rect.
z - rect.
x, rect.
w - rect.
y};
658 for (
int y = 0;
y <
task->filter_area.w;
y++) {
659 for (
int x = 0;
x <
task->filter_area.z;
x++) {
662 x +
task->filter_area.x,
663 y +
task->filter_area.y,
664 y *
task->filter_area.z +
x,
665 (
float *)
task->storage.transform.device_pointer,
666 (
int *)
task->storage.rank.device_pointer,
668 task->buffer.pass_stride,
669 task->buffer.frame_stride,
670 task->buffer.use_time,
672 task->pca_threshold);
686 float *temporary_mem = (
float *)
task->buffer.temporary_mem.device_pointer;
687 float *difference = temporary_mem;
688 float *blurDifference = temporary_mem +
task->buffer.pass_stride;
690 int r =
task->radius;
691 int frame_offset = frame *
task->buffer.frame_stride;
692 for (
int i = 0; i < (2 *
r + 1) * (2 *
r + 1); i++) {
693 int dy = i / (2 *
r + 1) -
r;
694 int dx = i % (2 *
r + 1) -
r;
696 int local_rect[4] = {
max(0, -dx),
698 task->reconstruction_state.source_w -
max(0, dx),
699 task->reconstruction_state.source_h -
max(0, dy)};
703 (
float *)color_variance_ptr,
708 task->buffer.pass_stride,
714 blurDifference, difference, local_rect,
task->buffer.stride, 4);
718 task->tile_info->frames[frame],
720 (
float *)
task->buffer.mem.device_pointer,
721 (
float *)
task->storage.transform.device_pointer,
722 (
int *)
task->storage.rank.device_pointer,
723 (
float *)
task->storage.XtWX.device_pointer,
726 &
task->reconstruction_state.filter_window.x,
729 task->buffer.pass_stride,
731 task->buffer.use_time);
739 for (
int y = 0;
y <
task->filter_area.w;
y++) {
740 for (
int x = 0;
x <
task->filter_area.z;
x++) {
743 y *
task->filter_area.z +
x,
745 (
int *)
task->storage.rank.device_pointer,
746 (
float *)
task->storage.XtWX.device_pointer,
748 &
task->reconstruction_state.buffer_params.x,
749 task->render_buffer.samples);
765 for (
int y = rect.
y;
y < rect.
w;
y++) {
766 for (
int x = rect.
x;
x < rect.
z;
x++) {
770 (
float *)variance_ptr,
789 for (
int y =
task->rect.y; y < task->rect.w;
y++) {
790 for (
int x =
task->rect.x; x < task->rect.z;
x++) {
797 (
float *)sample_variance_ptr,
798 (
float *)sv_variance_ptr,
799 (
float *)buffer_variance_ptr,
801 task->render_buffer.pass_stride,
802 task->render_buffer.offset);
817 for (
int y =
task->rect.y; y < task->rect.w;
y++) {
818 for (
int x =
task->rect.x; x < task->rect.z;
x++) {
826 (
float *)variance_ptr,
829 task->render_buffer.pass_stride,
830 task->render_buffer.offset);
841 for (
int y = 0;
y <
task->filter_area.w;
y++) {
842 for (
int x = 0;
x <
task->filter_area.z;
x++) {
844 x +
task->filter_area.x,
845 y +
task->filter_area.y,
846 &
task->reconstruction_state.buffer_params.x,
864 for (
int y =
task->rect.y; y < task->rect.w;
y++) {
865 for (
int x =
task->rect.x; x < task->rect.z;
x++) {
869 (
float *)variance_ptr,
873 task->buffer.pass_stride);
894 for (
int y = wtile.
y;
y < wtile.
y + wtile.
h; ++
y) {
895 for (
int x = wtile.
x;
x < wtile.
x + wtile.
w; ++
x) {
904 for (
int y = wtile.
y;
y < wtile.
y + wtile.
h; ++
y) {
907 for (
int x = wtile.
x;
x < wtile.
x + wtile.
w; ++
x) {
915 float *render_buffer = (
float *)tile.
buffer;
916 for (
int y = tile.
y;
y < tile.
y + tile.
h;
y++) {
917 for (
int x = tile.
x;
x < tile.
x + tile.
w;
x++) {
923 if (sample_multiplier != 1.0f) {
945 float *render_buffer = (
float *)tile.
buffer;
954 if (
task.need_finish_queue ==
false)
964 for (
int y = tile.
y;
y < tile.
y + tile.
h;
y++) {
965 for (
int x = tile.
x;
x < tile.
x + tile.
w;
x++) {
974 for (
int y = tile.
y;
y < tile.
y + tile.
h;
y++) {
975 for (
int x = tile.
x;
x < tile.
x + tile.
w;
x++) {
982 if (
task.adaptive_sampling.use &&
task.adaptive_sampling.need_filter(
sample)) {
985 const int num_progress_samples = end_sample -
sample;
987 task.update_progress(&tile, tile.
w * tile.
h * num_progress_samples);
992 task.update_progress(&tile, tile.
w * tile.
h);
1005 const size_t offset,
1013 #ifdef WITH_OPENIMAGEDENOISE
1027 oidn_device = oidn::newDevice();
1028 oidn_device.commit();
1031 oidn_filter = oidn_device.newFilter(
"RT");
1032 oidn_filter.set(
"hdr",
true);
1033 oidn_filter.set(
"srgb",
false);
1052 {
"output", 0,
false,
true},
1056 for (
int i = 0; passes[i].name; i++) {
1057 if (!passes[i].use) {
1062 const int64_t buffer_offset = (pixel_offset *
task.pass_stride + passes[i].offset);
1066 if (passes[i].scale && scale != 1.0f) {
1070 scaled_buffer.
resize(
w * h * 3);
1072 for (
int y = 0;
y < h;
y++) {
1073 const float *pass_row =
buffer + buffer_offset +
y * row_stride;
1074 float *scaled_row = scaled_buffer.
data() +
y *
w * 3;
1076 for (
int x = 0;
x <
w;
x++) {
1077 scaled_row[
x * 3 + 0] = pass_row[
x * pixel_stride + 0] * scale;
1078 scaled_row[
x * 3 + 1] = pass_row[
x * pixel_stride + 1] * scale;
1079 scaled_row[
x * 3 + 2] = pass_row[
x * pixel_stride + 2] * scale;
1083 oidn_filter.setImage(
1084 passes[i].name, scaled_buffer.
data(), oidn::Format::Float3,
w, h, 0, 0, 0);
1087 oidn_filter.setImage(passes[i].name,
1089 oidn::Format::Float3,
1093 pixel_stride *
sizeof(
float),
1094 row_stride *
sizeof(
float));
1099 oidn_filter.commit();
1100 oidn_filter.execute();
1138 const float scale = 1.0f / rtile.
sample;
1139 const float invscale = rtile.
sample;
1140 const size_t pass_stride =
task.pass_stride;
1144 task.map_neighbor_tiles(neighbors,
this);
1146 rtile = center_tile;
1163 const int xmin =
max(ntile.
x, rect.
x);
1164 const int ymin =
max(ntile.
y, rect.
y);
1165 const int xmax =
min(ntile.
x + ntile.
w, rect.
z);
1166 const int ymax =
min(ntile.
y + ntile.
h, rect.
w);
1168 const size_t tile_offset = ntile.
offset + xmin + ymin * ntile.
stride;
1169 const float *tile_buffer = (
float *)ntile.
buffer + tile_offset * pass_stride;
1171 const size_t merged_stride =
rect_size.x;
1172 const size_t merged_offset = (xmin - rect.
x) + (ymin - rect.
y) * merged_stride;
1173 float *merged_buffer = merged.
data() + merged_offset * pass_stride;
1175 for (
int y = ymin;
y < ymax;
y++) {
1176 for (
int x = 0;
x < pass_stride * (xmax - xmin);
x++) {
1177 merged_buffer[
x] = tile_buffer[
x] * scale;
1179 tile_buffer += ntile.
stride * pass_stride;
1180 merged_buffer += merged_stride * pass_stride;
1191 const int xmin =
max(ntile.
x, rect.
x);
1192 const int ymin =
max(ntile.
y, rect.
y);
1193 const int xmax =
min(ntile.
x + ntile.
w, rect.
z);
1194 const int ymax =
min(ntile.
y + ntile.
h, rect.
w);
1196 const size_t tile_offset = ntile.
offset + xmin + ymin * ntile.
stride;
1197 float *tile_buffer = (
float *)ntile.
buffer + tile_offset * pass_stride;
1199 const size_t merged_stride =
rect_size.x;
1200 const size_t merged_offset = (xmin - rect.
x) + (ymin - rect.
y) * merged_stride;
1201 const float *merged_buffer = merged.
data() + merged_offset * pass_stride;
1203 for (
int y = ymin;
y < ymax;
y++) {
1204 for (
int x = 0;
x < pass_stride * (xmax - xmin);
x += pass_stride) {
1205 tile_buffer[
x + 0] = merged_buffer[
x + 0] * invscale;
1206 tile_buffer[
x + 1] = merged_buffer[
x + 1] * invscale;
1207 tile_buffer[
x + 2] = merged_buffer[
x + 2] * invscale;
1209 tile_buffer += ntile.
stride * pass_stride;
1210 merged_buffer += merged_stride * pass_stride;
1214 task.unmap_neighbor_tiles(neighbors,
this);
1252 if (
task.need_finish_queue ==
false)
1271 delete split_kernel;
1283 bool hold_denoise_lock =
false;
1287 hold_denoise_lock =
true;
1292 while (
task.acquire_tile(
this, tile, tile_types)) {
1310 if (denoising ==
NULL) {
1316 task.update_progress(&tile, tile.
w * tile.
h);
1319 task.release_tile(tile);
1322 if (
task.need_finish_queue ==
false)
1327 if (hold_denoise_lock) {
1334 kg->~KernelGlobals();
1336 delete split_kernel;
1363 denoising.
profiler = &denoising_profiler_state;
1370 task.update_progress(&tile, tile.
w * tile.
h);
1375 float sample_scale = 1.0f / (
task.sample + 1);
1377 if (
task.rgba_half) {
1382 (
float *)
task.buffer,
1394 (
float *)
task.buffer,
1411 (float4 *)
task.shader_output,
1412 task.shader_eval_type,
1442 list<DeviceTask> tasks;
1447 tasks.push_back(
task);
1478 kg.transparent_shadow_intersections =
NULL;
1479 const int decoupled_count =
sizeof(
kg.decoupled_volume_steps) /
1480 sizeof(*
kg.decoupled_volume_steps);
1481 for (
int i = 0; i < decoupled_count; ++i) {
1482 kg.decoupled_volume_steps[i] =
NULL;
1484 kg.decoupled_volume_steps_index = 0;
1485 kg.coverage_asset =
kg.coverage_object =
kg.coverage_material =
NULL;
1498 if (
kg->transparent_shadow_intersections !=
NULL) {
1499 free(
kg->transparent_shadow_intersections);
1501 const int decoupled_count =
sizeof(
kg->decoupled_volume_steps) /
1502 sizeof(*
kg->decoupled_volume_steps);
1503 for (
int i = 0; i < decoupled_count; ++i) {
1504 if (
kg->decoupled_volume_steps[i] !=
NULL) {
1505 free(
kg->decoupled_volume_steps[i]);
1509 OSLShader::thread_free(
kg);
1564 int num_global_elements,
1583 num_global_elements,
1611 if (!kernel->
func) {
1642 return new CPUDevice(info, stats, profiler, background);
1669 string capabilities =
"";
1675 if (capabilities[capabilities.size() - 1] ==
' ')
1676 capabilities.resize(capabilities.size() - 1);
1677 return capabilities;
typedef float(TangentPoint)[2]
void BLI_kdtree_nd_() free(KDTree *tree)
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
Group RGB to Bright Vector Camera Vector Combine Material Light Line Style Layer Add Ambient Diffuse Glossy Refraction Transparent Toon Principled Hair Volume Principled Light Particle Volume Image Sky Noise Wave Voronoi Brick Texture Vector Combine Vertex Separate Vector White RGB Map Separate Set Z Dilate Combine Combine Color Channel Split ID Combine Luminance Directional Alpha Distance Hue Movie Ellipse Bokeh View Corner DENOISE
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
virtual void mem_zero(device_memory &mem) override
bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr, DenoisingTask *task)
bool denoising_divide_shadow(device_ptr a_ptr, device_ptr b_ptr, device_ptr sample_variance_ptr, device_ptr sv_variance_ptr, device_ptr buffer_variance_ptr, DenoisingTask *task)
KernelFunctions< void(*)(float *, float *, int *, int)> filter_nlm_normalize_kernel
bool denoising_combine_halves(device_ptr a_ptr, device_ptr b_ptr, device_ptr mean_ptr, device_ptr variance_ptr, int r, int4 rect, DenoisingTask *task)
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int) override
KernelFunctions< void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel
virtual void mem_copy_from(device_memory &, int, int, int, int) override
virtual bool show_samples() const override
virtual void task_cancel() override
bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample)
KernelFunctions< void(*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel
virtual BVHLayoutMask get_bvh_layout_mask() const override
virtual void mem_free(device_memory &mem) override
thread_spin_lock oidn_task_lock
virtual bool load_kernels(const DeviceRequestedFeatures &requested_features_) override
KernelFunctions< void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel
KernelGlobals thread_kernel_globals_init()
void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile)
KernelFunctions< void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel
KernelFunctions< void(*)(float *, float *, int *, int, int)> filter_nlm_calc_weight_kernel
virtual int get_split_task_count(DeviceTask &task) override
KernelFunctions< void(*)(int, int, float *, float *, float *, float *, int *, int)> filter_detect_outliers_kernel
KernelFunctions< void(*)(int, int, int, float *, float *, float *, int *, float *, float3 *, int *, int *, int, int, int, int, bool)> filter_nlm_construct_gramian_kernel
void tex_alloc(device_texture &mem)
void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
KernelFunctions< void(*)(int, int, int, int *, float *, float *, int, int *)> filter_write_feature_kernel
KernelGlobals kernel_globals
void denoise_nlm(DenoisingTask &denoising, RenderTile &tile)
bool denoising_construct_transform(DenoisingTask *task)
virtual void mem_alloc(device_memory &mem) override
KernelFunctions< void(*)(int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)> filter_divide_shadow_kernel
void tex_free(device_texture &mem)
void global_alloc(device_memory &mem)
KernelFunctions< void(*)(int, int, float *, float *, float *, float *, float *, int *, int, int, int)> filter_nlm_update_output_kernel
bool denoising_write_feature(int out_offset, device_ptr from_ptr, device_ptr buffer_ptr, DenoisingTask *task)
CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
void thread_film_convert(DeviceTask &task)
virtual void * osl_memory() override
void thread_denoise(DeviceTask &task)
KernelFunctions< void(*)(int, int, float *, float *, float *, float *, int *, int)> filter_combine_halves_kernel
virtual void const_copy_to(const char *name, void *host, size_t size) override
KernelFunctions< void(*)(int, int, float *, float *, float *, float *, int *, int, int, int, float, float)> filter_nlm_calc_difference_kernel
void thread_kernel_globals_free(KernelGlobals *kg)
KernelFunctions< void(*)(int, int, int, float *, int *, float *, float3 *, int *, int)> filter_finalize_kernel
bool denoising_get_feature(int mean_offset, int variance_offset, device_ptr mean_ptr, device_ptr variance_ptr, float scale, DenoisingTask *task)
virtual void task_wait() override
KernelFunctions< void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel
bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
device_vector< TextureInfo > texture_info
KernelFunctions< void(*)(int, TileInfo *, int, int, int, int, float *, float *, float, int *, int, int)> filter_get_feature_kernel
virtual void mem_copy_to(device_memory &mem) override
KernelFunctions< void(*)(KernelGlobals *, ccl_constant KernelData *, ccl_global void *, int, ccl_global char *, int, int, int, int, int, int, int, int, ccl_global int *, int, ccl_global char *, ccl_global unsigned int *, unsigned int, ccl_global float *)> data_init_kernel
bool denoising_accumulate(device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame, DenoisingTask *task)
void thread_shader(DeviceTask &task)
bool denoising_detect_outliers(device_ptr image_ptr, device_ptr variance_ptr, device_ptr depth_ptr, device_ptr output_ptr, DenoisingTask *task)
KernelFunctions< void(*)(float *, float *, int *, int, int)> filter_nlm_blur_kernel
void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg)
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
DeviceRequestedFeatures requested_features
KernelFunctions< void(*)(float *, TileInfo *, int, int, int, float *, int *, int *, int, int, bool, int, float)> filter_construct_transform_kernel
void thread_run(DeviceTask &task)
virtual void task_add(DeviceTask &task) override
unordered_map< string, KernelFunctions< void(*)(KernelGlobals *, KernelData *)> > split_kernels
void denoise_openimagedenoise_buffer(DeviceTask &task, float *buffer, const size_t offset, const size_t stride, const size_t x, const size_t y, const size_t w, const size_t h, const float scale)
void global_free(device_memory &mem)
void thread_render(DeviceTask &task)
~CPUSplitKernelFunction()
void(* func)(KernelGlobals *kg, KernelData *data)
virtual bool enqueue(const KernelDimensions &dim, device_memory &kernel_globals, device_memory &data)
CPUSplitKernelFunction(CPUDevice *device)
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads)
virtual int2 split_kernel_local_size()
virtual SplitKernelFunction * get_split_kernel_function(const string &kernel_name, const DeviceRequestedFeatures &)
CPUSplitKernel(CPUDevice *device)
virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, RenderTile &rtile, int num_global_elements, device_memory &kernel_globals, device_memory &kernel_data_, device_memory &split_data, device_memory &ray_state, device_memory &queue_index, device_memory &use_queues_flag, device_memory &work_pool_wgs)
virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task)
void init_pixel(int x, int y)
void run_denoising(RenderTile &tile)
struct DenoisingTask::RenderBuffers render_buffer
struct DenoisingTask::DeviceFunctions functions
struct DenoisingTask::DenoiseBuffers buffer
ProfilingState * profiler
DenoiserTypeMask denoisers
bool has_adaptive_stop_per_sample
bool has_volume_decoupled
bool load_kernels(const DeviceRequestedFeatures &requested_features)
bool path_trace(DeviceTask &task, RenderTile &rtile, device_memory &kgbuffer, device_memory &kernel_data)
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
KernelFunctions(F kernel_default, F kernel_sse2, F kernel_sse3, F kernel_sse41, F kernel_avx, F kernel_avx2)
void add_state(ProfilingState *state)
void remove_state(ProfilingState *state)
device_vector< float > buffer
StealingState stealing_state
void mem_free(size_t size)
void mem_alloc(size_t size)
T * resize(size_t newsize)
size_t memory_elements_size(int elements)
device_ptr device_pointer
void alloc_to_device(size_t num, bool shrink_to_fit=true)
T * resize(size_t width, size_t height=0, size_t depth=0)
Device * device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
static const char * logged_architecture
void device_cpu_info(vector< DeviceInfo > &devices)
#define REGISTER_KERNEL(name)
#define REGISTER_SPLIT_KERNEL(name)
#define KERNEL_FUNCTIONS(name)
string device_cpu_capabilities()
@ DENOISER_OPENIMAGEDENOISE
@ DENOISER_INPUT_RGB_ALBEDO
@ DENOISER_INPUT_RGB_ALBEDO_NORMAL
void KERNEL_FUNCTION_FULL_NAME() filter_nlm_calc_difference(int dx, int dy, float *weight_image, float *variance_image, float *scale_image, float *difference_image, int *rect, int stride, int channel_offset, int frame_offset, float a, float k_2)
void KERNEL_FUNCTION_FULL_NAME() filter_nlm_update_output(int dx, int dy, float *difference_image, float *image, float *temp_image, float *out_image, float *accum_image, int *rect, int channel_offset, int stride, int f)
void KERNEL_FUNCTION_FULL_NAME() filter_write_feature(int sample, int x, int y, int *buffer_params, float *from, float *buffer, int out_offset, int *prefilter_rect)
void KERNEL_FUNCTION_FULL_NAME() filter_get_feature(int sample, TileInfo *tile_info, int m_offset, int v_offset, int x, int y, float *mean, float *variance, float scale, int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset)
void KERNEL_FUNCTION_FULL_NAME() filter_detect_outliers(int x, int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, ccl_global float *output, int *rect, int pass_stride)
void KERNEL_FUNCTION_FULL_NAME() filter_nlm_calc_weight(float *difference_image, float *out_image, int *rect, int stride, int f)
void KERNEL_FUNCTION_FULL_NAME() filter_finalize(int x, int y, int storage_ofs, float *buffer, int *rank, float *XtWX, float3 *XtWY, int *buffer_params, int sample)
void KERNEL_FUNCTION_FULL_NAME() filter_nlm_construct_gramian(int dx, int dy, int t, float *difference_image, float *buffer, float *transform, int *rank, float *XtWX, float3 *XtWY, int *rect, int *filter_window, int stride, int f, int pass_stride, int frame_offset, bool use_time)
void KERNEL_FUNCTION_FULL_NAME() filter_construct_transform(float *buffer, TileInfo *tiles, int x, int y, int storage_ofs, float *transform, int *rank, int *rect, int pass_stride, int frame_stride, bool use_time, int radius, float pca_threshold)
void KERNEL_FUNCTION_FULL_NAME() filter_combine_halves(int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r)
void KERNEL_FUNCTION_FULL_NAME() filter_nlm_blur(float *difference_image, float *out_image, int *rect, int stride, int f)
void KERNEL_FUNCTION_FULL_NAME() filter_nlm_normalize(float *out_image, float *accum_image, int *rect, int stride)
void KERNEL_FUNCTION_FULL_NAME() filter_divide_shadow(int sample, TileInfo *tile_info, int x, int y, float *unfilteredA, float *unfilteredB, float *sampleV, float *sampleVV, float *bufferV, int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset)
ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)
void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
CCL_NAMESPACE_BEGIN ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg, ccl_global float *buffer, int sample)
ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg, ccl_global float *buffer, float sample_multiplier)
ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
#define CCL_NAMESPACE_END
#define make_int4(x, y, z, w)
void KERNEL_FUNCTION_FULL_NAME() convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() path_trace(KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() data_init(KernelGlobals *kg, ccl_constant KernelData *data, ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, ccl_global int *Queue_index, int queuesize, ccl_global char *use_queues_flag, ccl_global unsigned int *work_pool_wgs, unsigned int num_samples, ccl_global float *buffer)
void KERNEL_FUNCTION_FULL_NAME() shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int filter, int i, int offset, int sample)
ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg, ShaderData *emission_sd, ccl_addr_space PathState *state, ccl_global float *buffer, ccl_addr_space Ray *ray)
CCL_NAMESPACE_BEGIN ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char * use_queues_flag
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int * queue_index
__kernel void ccl_constant KernelData ccl_global void ccl_global char * ray_state
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
static void sample(SocketReader *reader, int x, int y, float color[4])
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
struct blender::compositor::@172::@174 task
static int bake(const BakeAPIRender *bkr, Object *ob_low, const ListBase *selected_objects, ReportList *reports)
unsigned __int64 uint64_t
function< bool(device_ptr a_ptr, device_ptr b_ptr, device_ptr mean_ptr, device_ptr variance_ptr, int r, int4 rect)> combine_halves
function< bool(device_ptr image_ptr, device_ptr variance_ptr, device_ptr depth_ptr, device_ptr output_ptr)> detect_outliers
function< bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature
function< bool(device_ptr output_ptr)> solve
function< bool(device_ptr a_ptr, device_ptr b_ptr, device_ptr sample_variance_ptr, device_ptr sv_variance_ptr, device_ptr buffer_variance_ptr)> divide_shadow
function< bool()> construct_transform
function< bool(int mean_offset, int variance_offset, device_ptr mean_ptr, device_ptr variance_ptr, float scale)> get_feature
function< bool(device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)> accumulate
function< bool(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr)> non_local_means
void push(TaskRunFunction &&task)
void wait_work(Summary *stats=NULL)
ccl_global float * buffer
void util_aligned_free(void *ptr)
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(size_t size, int alignment)
#define MIN_ALIGNMENT_CPU_DATA_TYPES
__forceinline bool any(const avxb &b)
DebugFlags & DebugFlags()
static CCL_NAMESPACE_BEGIN bool openimagedenoise_supported()
void path_init(const string &path, const string &user_path)
@ PROFILING_DENOISING_COMBINE_HALVES
@ PROFILING_DENOISING_RECONSTRUCT
@ PROFILING_DENOISING_GET_FEATURE
@ PROFILING_DENOISING_DIVIDE_SHADOW
@ PROFILING_DENOISING_DETECT_OUTLIERS
@ PROFILING_DENOISING_CONSTRUCT_TRANSFORM
@ PROFILING_DENOISING_NON_LOCAL_MEANS
ccl_device_inline int4 rect_clip(int4 a, int4 b)
ccl_device_inline int4 rect_expand(int4 rect, int d)
ccl_device_inline int rect_size(int4 rect)
#define SIMD_SET_FLUSH_TO_ZERO
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
bool system_cpu_support_avx2()
string system_cpu_brand_string()
bool system_cpu_support_avx()
bool system_cpu_support_sse3()
bool system_cpu_support_sse41()
bool system_cpu_support_sse2()
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
tbb::spin_mutex thread_spin_lock
ccl_device_inline size_t align_up(size_t offset, size_t alignment)