Blender V4.5
realize_on_domain_operation.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <limits>
6
7#include "BLI_math_matrix.hh"
10#include "BLI_utildefines.h"
11
12#include "GPU_capabilities.hh"
13#include "GPU_shader.hh"
14#include "GPU_texture.hh"
15
16#include "COM_context.hh"
17#include "COM_domain.hh"
19#include "COM_result.hh"
20#include "COM_utilities.hh"
21
23
24namespace blender::compositor {
25
26/* ------------------------------------------------------------------------------------------------
27 * Realize On Domain Operation
28 */
29
31 Domain target_domain,
32 ResultType type)
33 : SimpleOperation(context), target_domain_(target_domain)
34{
35 InputDescriptor input_descriptor;
36 input_descriptor.type = type;
37 this->declare_input_descriptor(input_descriptor);
38 this->populate_result(context.create_result(type));
39}
40
42{
43 /* Translate the input such that it is centered in the virtual compositing space. Adding any
44 * corrective translation if necessary. */
45 const float2 input_center_translation = float2(-float2(this->get_input().domain().size) / 2.0f);
46 const float3x3 input_transformation = math::translate(
47 this->get_input().domain().transformation,
48 input_center_translation + this->compute_corrective_translation());
49
50 /* Translate the output such that it is centered in the virtual compositing space. */
51 const float2 output_center_translation = -float2(this->compute_domain().size) / 2.0f;
52 const float3x3 output_transformation = math::translate(this->compute_domain().transformation,
53 output_center_translation);
54
55 /* Get the transformation from the output space to the input space */
56 const float3x3 inverse_transformation = math::invert(input_transformation) *
57 output_transformation;
58
59 if (this->context().use_gpu()) {
60 this->realize_on_domain_gpu(inverse_transformation);
61 }
62 else {
63 this->realize_on_domain_cpu(inverse_transformation);
64 }
65}
66
67float2 RealizeOnDomainOperation::compute_corrective_translation()
68{
69 if (this->get_input().get_realization_options().interpolation == Interpolation::Nearest) {
70 /* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
71 * some GPUs at pixel boundaries. */
72 return float2(std::numeric_limits<float>::epsilon() * 10e3f);
73 }
74
75 /* Assuming no transformations, if the input size is odd and output size is even or vice versa,
76 * the centers of pixels of the input and output will be half a pixel away from each other due
77 * to the centering translation. Which introduce fuzzy result due to interpolation. So if one
78 * is odd and the other is even, detected by testing the low bit of the xor of the sizes, shift
79 * the input by 1/2 pixel so the pixels align. */
80 const int2 output_size = this->compute_domain().size;
81 const int2 input_size = this->get_input().domain().size;
82 return float2(((input_size[0] ^ output_size[0]) & 1) ? -0.5f : 0.0f,
83 ((input_size[1] ^ output_size[1]) & 1) ? -0.5f : 0.0f);
84}
85
86void RealizeOnDomainOperation::realize_on_domain_gpu(const float3x3 &inverse_transformation)
87{
88 GPUShader *shader = this->context().get_shader(this->get_realization_shader_name());
89 GPU_shader_bind(shader);
90
91 GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", inverse_transformation.ptr());
92
93 /* The texture sampler should use bilinear interpolation for both the bilinear and bicubic
94 * cases, as the logic used by the bicubic realization shader expects textures to use bilinear
95 * interpolation. */
96 Result &input = this->get_input();
97 const RealizationOptions realization_options = input.get_realization_options();
98 const bool use_bilinear = ELEM(
99 realization_options.interpolation, Interpolation::Bilinear, Interpolation::Bicubic);
100 GPU_texture_filter_mode(input, use_bilinear);
101
102 /* If the input repeats, set a repeating extend mode for out-of-bound texture access. Otherwise,
103 * make out-of-bound texture access return zero by setting a clamp to border extend mode. */
105 realization_options.repeat_x ?
109 realization_options.repeat_y ?
112
113 input.bind_as_texture(shader, "input_tx");
114
115 const Domain domain = this->compute_domain();
116 Result &output = this->get_result();
117 output.allocate_texture(domain);
118 output.bind_as_image(shader, "domain_img");
119
120 compute_dispatch_threads_at_least(shader, domain.size);
121
122 input.unbind_as_texture();
123 output.unbind_as_image();
125}
126
127const char *RealizeOnDomainOperation::get_realization_shader_name()
128{
129 if (this->get_input().get_realization_options().interpolation == Interpolation::Bicubic) {
130 switch (this->get_input().type()) {
132 return "compositor_realize_on_domain_bicubic_float";
136 return "compositor_realize_on_domain_bicubic_float4";
137 case ResultType::Int:
138 case ResultType::Int2:
140 case ResultType::Bool:
141 /* Realization does not support internal image types. */
142 break;
143 }
144 }
145 else {
146 switch (this->get_input().type()) {
148 return "compositor_realize_on_domain_float";
152 return "compositor_realize_on_domain_float4";
153 case ResultType::Int:
154 case ResultType::Int2:
156 case ResultType::Bool:
157 /* Realization does not support internal image types. */
158 break;
159 }
160 }
161
163 return nullptr;
164}
165
166void RealizeOnDomainOperation::realize_on_domain_cpu(const float3x3 &inverse_transformation)
167{
168 Result &input = this->get_input();
169 Result &output = this->get_result();
170
171 const Domain domain = this->compute_domain();
172 output.allocate_texture(domain);
173
174 const RealizationOptions realization_options = input.get_realization_options();
175 parallel_for(domain.size, [&](const int2 texel) {
176 /* Add 0.5 to evaluate the input sampler at the center of the pixel. */
177 float2 coordinates = float2(texel) + float2(0.5f);
178
179 /* Transform the input image by transforming the domain coordinates with the inverse of input
180 * image's transformation. The inverse transformation is an affine matrix and thus the
181 * coordinates should be in homogeneous coordinates. */
182 coordinates = (inverse_transformation * float3(coordinates, 1.0f)).xy();
183
184 /* Subtract the offset and divide by the input image size to get the relevant coordinates into
185 * the sampler's expected [0, 1] range. */
186 const int2 input_size = input.domain().size;
187 float2 normalized_coordinates = coordinates / float2(input_size);
188
189 float4 sample;
190 switch (realization_options.interpolation) {
191 case Interpolation::Nearest:
192 sample = input.sample_nearest_wrap(
193 normalized_coordinates, realization_options.repeat_x, realization_options.repeat_y);
194 break;
195 case Interpolation::Bilinear:
196 sample = input.sample_bilinear_wrap(
197 normalized_coordinates, realization_options.repeat_x, realization_options.repeat_y);
198 break;
199 case Interpolation::Bicubic:
200 sample = input.sample_cubic_wrap(
201 normalized_coordinates, realization_options.repeat_x, realization_options.repeat_y);
202 break;
203 }
204 output.store_pixel_generic_type(texel, sample);
205 });
206}
207
209{
210 return target_domain_;
211}
212
213/* If the transformations of the input and output domains are within this tolerance value, then
214 * realization shouldn't be needed. */
215static constexpr float transformation_tolerance = 10e-6f;
216
218 const Domain &domain)
219{
220 const int2 size = domain.size;
221
222 /* If the domain is only infinitesimally rotated or scaled, return a domain with just the
223 * translation component. */
224 if (math::is_equal(
226 {
228 }
229
230 /* Compute the 4 corners of the domain. */
231 const float2 lower_left_corner = float2(0.0f);
232 const float2 lower_right_corner = float2(size.x, 0.0f);
233 const float2 upper_left_corner = float2(0.0f, size.y);
234 const float2 upper_right_corner = float2(size);
235
236 /* Eliminate the translation component of the transformation and create a centered
237 * transformation with the image center as the origin. Translation is ignored since it has no
238 * effect on the size of the domain and will be restored later. */
239 const float2 center = float2(float2(size) / 2.0f);
240 const float3x3 transformation = float3x3(float2x2(domain.transformation));
241 const float3x3 centered_transformation = math::from_origin_transform(transformation, center);
242
243 /* Transform each of the 4 corners of the image by the centered transformation. */
244 const float2 transformed_lower_left_corner = math::transform_point(centered_transformation,
245 lower_left_corner);
246 const float2 transformed_lower_right_corner = math::transform_point(centered_transformation,
247 lower_right_corner);
248 const float2 transformed_upper_left_corner = math::transform_point(centered_transformation,
249 upper_left_corner);
250 const float2 transformed_upper_right_corner = math::transform_point(centered_transformation,
251 upper_right_corner);
252
253 /* Compute the lower and upper bounds of the bounding box of the transformed corners. */
254 const float2 lower_bound = math::min(
255 math::min(transformed_lower_left_corner, transformed_lower_right_corner),
256 math::min(transformed_upper_left_corner, transformed_upper_right_corner));
257 const float2 upper_bound = math::max(
258 math::max(transformed_lower_left_corner, transformed_lower_right_corner),
259 math::max(transformed_upper_left_corner, transformed_upper_right_corner));
260
261 /* Round the bounds such that they cover the entire transformed domain, which means flooring for
262 * the lower bound and ceiling for the upper bound. */
263 const int2 integer_lower_bound = int2(math::floor(lower_bound));
264 const int2 integer_upper_bound = int2(math::ceil(upper_bound));
265
266 const int2 new_size = integer_upper_bound - integer_lower_bound;
267
268 /* Make sure the new size is safe by clamping to the hardware limits and an upper bound. */
269 const int max_size = context.use_gpu() ? GPU_max_texture_size() : 65536;
270 const int2 safe_size = math::clamp(new_size, int2(1), int2(max_size));
271
272 /* Create a domain from the new safe size and just the translation component of the
273 * transformation, */
274 return Domain(safe_size, math::from_location<float3x3>(domain.transformation.location()));
275}
276
279 const Result &input_result,
280 const InputDescriptor &input_descriptor,
281 const Domain &operation_domain)
282{
283 /* This input doesn't need realization, the operation is not needed. */
284 if (input_descriptor.realization_mode == InputRealizationMode::None) {
285 return nullptr;
286 }
287
288 /* The input expects a single value and if no single value is provided, it will be ignored and a
289 * default value will be used, so no need to realize it and the operation is not needed. */
290 if (input_descriptor.expects_single_value) {
291 return nullptr;
292 }
293
294 /* Input result is a single value and does not need realization, the operation is not needed. */
295 if (input_result.is_single_value()) {
296 return nullptr;
297 }
298
299 /* If we are realizing on the operation domain, then our target domain is the operation domain,
300 * otherwise, we are only realizing the transforms, then our target domain is the input's one. */
301 const bool use_operation_domain = input_descriptor.realization_mode ==
303 const Domain target_domain = use_operation_domain ? operation_domain : input_result.domain();
304
305 const Domain realized_target_domain =
307
308 /* The input have an almost identical domain to the realized target domain, so no need to realize
309 * it and the operation is not needed. */
310 if (Domain::is_equal(input_result.domain(), realized_target_domain, transformation_tolerance)) {
311 return nullptr;
312 }
313
314 /* Otherwise, realization is needed. */
315 return new RealizeOnDomainOperation(context, realized_target_domain, input_descriptor.type);
316}
317
318} // namespace blender::compositor
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define ELEM(...)
int GPU_max_texture_size()
void GPU_shader_bind(GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_shader_uniform_mat3_as_mat4(GPUShader *sh, const char *name, const float data[3][3])
void GPU_shader_unbind()
void GPU_texture_extend_mode_y(GPUTexture *texture, GPUSamplerExtendMode extend_mode)
void GPU_texture_extend_mode_x(GPUTexture *texture, GPUSamplerExtendMode extend_mode)
@ GPU_SAMPLER_EXTEND_MODE_REPEAT
@ GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER
void GPU_texture_filter_mode(GPUTexture *texture, bool use_filter)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
Result create_result(ResultType type, ResultPrecision precision)
GPUShader * get_shader(const char *info_name, ResultPrecision precision)
static bool is_equal(const Domain &a, const Domain &b, const float epsilon=0.0f)
Definition domain.cc:30
RealizeOnDomainOperation(Context &context, Domain target_domain, ResultType type)
static SimpleOperation * construct_if_needed(Context &context, const Result &input_result, const InputDescriptor &input_descriptor, const Domain &operation_domain)
static Domain compute_realized_transformation_domain(Context &context, const Domain &domain)
const Domain & domain() const
bool is_single_value() const
Definition result.cc:622
void declare_input_descriptor(InputDescriptor descriptor)
#define input
MatBase< 3, 3 > float3x3
#define output
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size=int2(16))
Definition utilities.cc:170
static constexpr float transformation_tolerance
void parallel_for(const int2 range, const Function &function)
T clamp(const T &a, const T &min, const T &max)
T floor(const T &a)
T min(const T &a, const T &b)
CartesianBasis invert(const CartesianBasis &basis)
MatBase< T, NumCol, NumRow > translate(const MatBase< T, NumCol, NumRow > &mat, const VectorT &translation)
MatT from_origin_transform(const MatT &transform, const VectorT origin)
T ceil(const T &a)
bool is_equal(const MatBase< T, NumCol, NumRow > &a, const MatBase< T, NumCol, NumRow > &b, const T epsilon=T(0))
T max(const T &a, const T &b)
MatT from_location(const typename MatT::loc_type &location)
VecBase< T, 3 > transform_point(const CartesianBasis &basis, const VecBase< T, 3 > &v)
MatBase< float, 2, 2 > float2x2
VecBase< int32_t, 2 > int2
VecBase< float, 2 > float2
MatBase< float, 3, 3 > float3x3