Blender  V2.93
util_math_matrix.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __UTIL_MATH_MATRIX_H__
18 #define __UTIL_MATH_MATRIX_H__
19 
21 
22 #define MAT(A, size, row, col) A[(row) * (size) + (col)]
23 
24 /* Variants that use a constant stride on GPUS. */
25 #ifdef __KERNEL_GPU__
26 # define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)]
27 /* Element access when only the lower-triangular elements are stored. */
28 # define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)]
29 # define VECS(V, i, s) V[(i) * (s)]
30 #else
31 # define MATS(A, n, r, c, s) MAT(A, n, r, c)
32 # define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)]
33 # define VECS(V, i, s) V[i]
34 #endif
35 
36 /* Zeroing helpers. */
37 
39 {
40  for (int i = 0; i < n; i++) {
41  v[i] = 0.0f;
42  }
43 }
44 
46 {
47  for (int row = 0; row < n; row++) {
48  for (int col = 0; col <= row; col++) {
49  MAT(A, n, row, col) = 0.0f;
50  }
51  }
52 }
53 
54 /* Elementary vector operations. */
55 
56 ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n)
57 {
58  for (int i = 0; i < n; i++) {
59  a[i] += b[i];
60  }
61 }
62 
63 ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n)
64 {
65  for (int i = 0; i < n; i++) {
66  a[i] *= b[i];
67  }
68 }
69 
71  const float *ccl_restrict b,
72  int astride,
73  int n)
74 {
75  for (int i = 0; i < n; i++) {
76  a[i * astride] *= b[i];
77  }
78 }
79 
80 ccl_device_inline void math_vector_scale(float *a, float b, int n)
81 {
82  for (int i = 0; i < n; i++) {
83  a[i] *= b;
84  }
85 }
86 
87 ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n)
88 {
89  for (int i = 0; i < n; i++) {
90  a[i] = max(a[i], b[i]);
91  }
92 }
93 
95 {
96  for (int i = 0; i < n; i++) {
97  v[i] += w * x[i];
98  }
99 }
100 
102  ccl_global float3 *v, int n, float *x, float3 w, int stride)
103 {
104  for (int i = 0; i < n; i++) {
105  ccl_global float *elem = (ccl_global float *)(v + i * stride);
106  atomic_add_and_fetch_float(elem + 0, w.x * x[i]);
107  atomic_add_and_fetch_float(elem + 1, w.y * x[i]);
108  atomic_add_and_fetch_float(elem + 2, w.z * x[i]);
109  }
110 }
111 
112 /* Elementary matrix operations.
113  * Note: TriMatrix refers to a square matrix that is symmetric,
114  * and therefore its upper-triangular part isn't stored. */
115 
117  int n,
118  float val,
119  int stride)
120 {
121  for (int row = 0; row < n; row++) {
122  MATHS(A, row, row, stride) += val;
123  }
124 }
125 
126 /* Add Gramian matrix of v to A.
127  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
129  int n,
130  const float *ccl_restrict v,
131  float weight)
132 {
133  for (int row = 0; row < n; row++) {
134  for (int col = 0; col <= row; col++) {
135  MAT(A, n, row, col) += v[row] * v[col] * weight;
136  }
137  }
138 }
139 
140 /* Add Gramian matrix of v to A.
141  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
143  ccl_global float *A, int n, const float *ccl_restrict v, float weight, int stride)
144 {
145  for (int row = 0; row < n; row++) {
146  for (int col = 0; col <= row; col++) {
147  atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight);
148  }
149  }
150 }
151 
153  int n,
154  const float *ccl_restrict v,
155  float weight)
156 {
157  for (int row = 0; row < n; row++) {
158  for (int col = 0; col <= row; col++) {
159  MATHS(A, row, col, 1) += v[row] * v[col] * weight;
160  }
161  }
162 }
163 
164 /* Transpose matrix A in place. */
166 {
167  for (int i = 0; i < n; i++) {
168  for (int j = 0; j < i; j++) {
169  float temp = MATS(A, n, i, j, stride);
170  MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride);
171  MATS(A, n, j, i, stride) = temp;
172  }
173  }
174 }
175 
176 /* Solvers for matrix problems */
177 
178 /* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A
179  * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L.
180  * Also, only the lower triangular part of A is ever accessed. */
182 {
183  for (int row = 0; row < n; row++) {
184  for (int col = 0; col <= row; col++) {
185  float sum_col = MATHS(A, row, col, stride);
186  for (int k = 0; k < col; k++) {
187  sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride);
188  }
189  if (row == col) {
190  sum_col = sqrtf(max(sum_col, 0.0f));
191  }
192  else {
193  sum_col /= MATHS(A, col, col, stride);
194  }
195  MATHS(A, row, col, stride) = sum_col;
196  }
197  }
198 }
199 
200 /* Solve A*S=y for S given A and y,
201  * where A is symmetrical positive-semi-definite and both inputs are destroyed in the process.
202  *
203  * We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A.
204  * With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S.
205  * Since L is lower triangular, finding b is relatively easy since y is known.
206  * Then, the remaining problem is Lt*S = b, which again can be solved easily.
207  *
208  * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is
209  * symmetrical positive-semidefinite by construction,
210  * so we can just use this function with A=Xt*W*X and y=Xt*W*y. */
213  int n,
214  int stride)
215 {
216  /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good
217  * heuristic for the amount of pixels considered (with weighting),
218  * therefore the amount of correction is scaled based on it. */
219  math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */
220  math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */
221 
222  /* Use forward substitution to solve L*b = y, replacing y by b. */
223  for (int row = 0; row < n; row++) {
224  float3 sum = VECS(y, row, stride);
225  for (int col = 0; col < row; col++)
226  sum -= MATHS(A, row, col, stride) * VECS(y, col, stride);
227  VECS(y, row, stride) = sum / MATHS(A, row, row, stride);
228  }
229 
230  /* Use backward substitution to solve Lt*S = b, replacing b by S. */
231  for (int row = n - 1; row >= 0; row--) {
232  float3 sum = VECS(y, row, stride);
233  for (int col = row + 1; col < n; col++)
234  sum -= MATHS(A, col, row, stride) * VECS(y, col, stride);
235  VECS(y, row, stride) = sum / MATHS(A, row, row, stride);
236  }
237 }
238 
239 /* Perform the Jacobi Eigenvalue Method on matrix A.
240  * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever
241  * accessed. The algorithm overwrites the contents of A.
242  *
243  * After returning, A will be overwritten with D, which is (almost) diagonal,
244  * and V will contain the eigenvectors of the original A in its rows (!),
245  * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A.
246  */
248  ccl_global float *V,
249  int n,
250  int v_stride)
251 {
252  const float singular_epsilon = 1e-9f;
253 
254  for (int row = 0; row < n; row++) {
255  for (int col = 0; col < n; col++) {
256  MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f;
257  }
258  }
259 
260  for (int sweep = 0; sweep < 8; sweep++) {
261  float off_diagonal = 0.0f;
262  for (int row = 1; row < n; row++) {
263  for (int col = 0; col < row; col++) {
264  off_diagonal += fabsf(MAT(A, n, row, col));
265  }
266  }
267  if (off_diagonal < 1e-7f) {
268  /* The matrix has nearly reached diagonal form.
269  * Since the eigenvalues are only used to determine truncation, their exact values aren't
270  * required - a relative error of a few ULPs won't matter at all. */
271  break;
272  }
273 
274  /* Set the threshold for the small element rotation skip in the first sweep:
275  * Skip all elements that are less than a tenth of the average off-diagonal element. */
276  float threshold = 0.2f * off_diagonal / (n * n);
277 
278  for (int row = 1; row < n; row++) {
279  for (int col = 0; col < row; col++) {
280  /* Perform a Jacobi rotation on this element that reduces it to zero. */
281  float element = MAT(A, n, row, col);
282  float abs_element = fabsf(element);
283 
284  /* If we're in a later sweep and the element already is very small,
285  * just set it to zero and skip the rotation. */
286  if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) &&
287  abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) {
288  MAT(A, n, row, col) = 0.0f;
289  continue;
290  }
291 
292  if (element == 0.0f) {
293  continue;
294  }
295 
296  /* If we're in one of the first sweeps and the element is smaller than the threshold,
297  * skip it. */
298  if (sweep < 3 && (abs_element < threshold)) {
299  continue;
300  }
301 
302  /* Determine rotation: The rotation is characterized by its angle phi - or,
303  * in the actual implementation, sin(phi) and cos(phi).
304  * To find those, we first compute their ratio - that might be unstable if the angle
305  * approaches 90°, so there's a fallback for that case.
306  * Then, we compute sin(phi) and cos(phi) themselves. */
307  float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
308  float ratio;
309  if (abs_element > singular_epsilon * fabsf(singular_diff)) {
310  float cot_2phi = 0.5f * singular_diff / element;
311  ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi * cot_2phi));
312  if (cot_2phi < 0.0f)
313  ratio = -ratio; /* Copy sign. */
314  }
315  else {
316  ratio = element / singular_diff;
317  }
318 
319  float c = 1.0f / sqrtf(1.0f + ratio * ratio);
320  float s = ratio * c;
321  /* To improve numerical stability by avoiding cancellation, the update equations are
322  * reformulized to use sin(phi) and tan(phi/2) instead. */
323  float tan_phi_2 = s / (1.0f + c);
324 
325  /* Update the singular values in the diagonal. */
326  float singular_delta = ratio * element;
327  MAT(A, n, row, row) += singular_delta;
328  MAT(A, n, col, col) -= singular_delta;
329 
330  /* Set the element itself to zero. */
331  MAT(A, n, row, col) = 0.0f;
332 
333  /* Perform the actual rotations on the matrices. */
334 #define ROT(M, r1, c1, r2, c2, stride) \
335  { \
336  float M1 = MATS(M, n, r1, c1, stride); \
337  float M2 = MATS(M, n, r2, c2, stride); \
338  MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \
339  MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \
340  }
341 
342  /* Split into three parts to ensure correct accesses since we only store the
343  * lower-triangular part of A. */
344  for (int i = 0; i < col; i++)
345  ROT(A, col, i, row, i, 1);
346  for (int i = col + 1; i < row; i++)
347  ROT(A, i, col, row, i, 1);
348  for (int i = row + 1; i < n; i++)
349  ROT(A, i, col, i, row, 1);
350 
351  for (int i = 0; i < n; i++)
352  ROT(V, col, i, row, i, v_stride);
353 #undef ROT
354  }
355  }
356  }
357 
358  /* Sort eigenvalues and the associated eigenvectors. */
359  for (int i = 0; i < n - 1; i++) {
360  float v = MAT(A, n, i, i);
361  int k = i;
362  for (int j = i; j < n; j++) {
363  if (MAT(A, n, j, j) >= v) {
364  v = MAT(A, n, j, j);
365  k = j;
366  }
367  }
368  if (k != i) {
369  /* Swap eigenvalues. */
370  MAT(A, n, k, k) = MAT(A, n, i, i);
371  MAT(A, n, i, i) = v;
372  /* Swap eigenvectors. */
373  for (int j = 0; j < n; j++) {
374  float v = MATS(V, n, i, j, v_stride);
375  MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride);
376  MATS(V, n, k, j, v_stride) = v;
377  }
378  }
379  }
380 }
381 
382 #ifdef __KERNEL_SSE3__
383 ccl_device_inline void math_vector_zero_sse(float4 *A, int n)
384 {
385  for (int i = 0; i < n; i++) {
386  A[i] = make_float4(0.0f);
387  }
388 }
389 
390 ccl_device_inline void math_matrix_zero_sse(float4 *A, int n)
391 {
392  for (int row = 0; row < n; row++) {
393  for (int col = 0; col <= row; col++) {
394  MAT(A, n, row, col) = make_float4(0.0f);
395  }
396  }
397 }
398 
399 /* Add Gramian matrix of v to A.
400  * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
401 ccl_device_inline void math_matrix_add_gramian_sse(float4 *A,
402  int n,
403  const float4 *ccl_restrict v,
404  float4 weight)
405 {
406  for (int row = 0; row < n; row++) {
407  for (int col = 0; col <= row; col++) {
408  MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight;
409  }
410  }
411 }
412 
413 ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a)
414 {
415  for (int i = 0; i < n; i++) {
416  V[i] += a[i];
417  }
418 }
419 
420 ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a)
421 {
422  for (int i = 0; i < n; i++) {
423  V[i] *= a[i];
424  }
425 }
426 
427 ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n)
428 {
429  for (int i = 0; i < n; i++) {
430  a[i] = max(a[i], b[i]);
431  }
432 }
433 
434 ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B)
435 {
436  for (int row = 0; row < n; row++) {
437  for (int col = 0; col <= row; col++) {
438  MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0];
439  }
440  }
441 }
442 #endif
443 
444 #undef MAT
445 
447 
448 #endif /* __UTIL_MATH_MATRIX_H__ */
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei stride
ATTR_WARN_UNUSED_RESULT const void * element
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define A
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
static T sum(const btAlignedObjectArray< T > &items)
uint col
#define ccl_restrict
#define ccl_device
#define ccl_device_inline
#define ccl_global
#define CCL_NAMESPACE_END
#define make_float4(x, y, z, w)
#define fabsf(x)
#define sqrtf(x)
#define B
static unsigned c
Definition: RandGen.cpp:97
static unsigned a[3]
Definition: RandGen.cpp:92
float max
#define atomic_add_and_fetch_float(p, x)
Definition: util_atomic.h:25
__forceinline int reduce_add(const avxi &v)
Definition: util_avxi.h:709
ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w)
#define MAT(A, size, row, col)
ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride)
ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, int n, float val, int stride)
ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n)
ccl_device_inline void math_matrix_zero(float *A, int n)
#define MATHS(A, r, c, s)
ccl_device_inline void math_vec3_add_strided(ccl_global float3 *v, int n, float *x, float3 w, int stride)
ccl_device_inline void math_vector_zero(float *v, int n)
ccl_device_inline void math_vector_mul_strided(ccl_global float *a, const float *ccl_restrict b, int astride, int n)
#define ROT(M, r1, c1, r2, c2, stride)
ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float *V, int n, int v_stride)
ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n)
ccl_device_inline void math_vector_scale(float *a, float b, int n)
ccl_device_inline void math_matrix_add_gramian(float *A, int n, const float *ccl_restrict v, float weight)
ccl_device_inline void math_trimatrix_add_gramian_strided(ccl_global float *A, int n, const float *ccl_restrict v, float weight, int stride)
ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, ccl_global float3 *y, int n, int stride)
#define MATS(A, n, r, c, s)
ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n)
ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, int n, const float *ccl_restrict v, float weight)
#define VECS(V, i, s)
CCL_NAMESPACE_BEGIN struct View V