|
BeBOP Optimized Sparse Kernel Interface Library
1.0.1h
|
Conversion between CSR and VBR format. More...
#include <assert.h>#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <oski/config.h>#include <oski/common.h>#include <oski/modloader.h>#include <oski/matrix.h>#include <oski/CSR/format.h>#include <oski/VBR/format.h>#include <oski/VBR/module.h>#include <oski/xforms_internal.h>Defines | |
| #define | MAX(a, b) ((a) > (b) ? (a) : (b)) |
| Return max of two side-effect free values. | |
Typedefs | |
| typedef unsigned char | flag_t |
| Flag type (takes the values 0 or 1) | |
Functions | |
| static int | ExpandSymm (const oski_matCSR_t *mat, const oski_matcommon_t *props, oski_matCSR_t **p_mat_full) |
| static int | TransposeSymm (const oski_matCSR_t *mat, const oski_matcommon_t *props, oski_matCSR_t **p_mat_trans) |
| static void | DestroyCSR (oski_matCSR_t *mat) |
| static void | SetFlags (oski_index_t n_x, const oski_index_t *restrict x, flag_t value, oski_index_t n, flag_t *restrict f) |
Scatters a specified value to all elements of a dense flag vector corresponding to the structurally non-zero elements of a sparse vector . | |
| static oski_index_t | CountUnique (oski_index_t n_x, const oski_index_t *restrict x, oski_index_t n, flag_t *restrict workspace) |
Counts the number of unique structurally non-zero elements in a sparse vector . | |
| static oski_index_t | CountNumCommonSpVec (oski_index_t i1, oski_index_t n1, const oski_index_t *restrict ind1, oski_index_t i2, oski_index_t n2, const oski_index_t *restrict ind2, oski_index_t b, int has_unit_diag, oski_index_t n, flag_t *restrict workspace) |
Returns the number of unique elements that two sparse vectors, and , have in common. | |
| static oski_index_t | FindBlockPart (oski_index_t m, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, oski_index_t b, int has_unit_diag, double threshold, oski_index_t *b_start, oski_index_t n_max, flag_t *workspace) |
| Determine a partitioning of a CSR structure into groups of consecutive rows that have similar non-zero patterns. | |
| static void | MakeBlockMap (const oski_index_t *restrict I_starts, oski_index_t M, oski_index_t *restrict map) |
| Computes a row-to-block-row lookup table. | |
| static int | GetPart (oski_index_t m, oski_index_t n, const oski_index_t *ptr, const oski_index_t *ind, oski_index_t b, int has_unit_diag, int is_symm, const oski_index_t *Tptr, const oski_index_t *Tind, oski_index_t b_T, int has_unit_diag_T, double threshold_r, double threshold_c, oski_index_t *p_M, oski_index_t *p_N, oski_index_t **p_brow, oski_index_t **p_bcol, flag_t *work_f) |
| static void | CountVBRSize (oski_index_t m, oski_index_t n, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, oski_index_t b, int has_unit_diag, oski_index_t M, oski_index_t N, oski_index_t *restrict brow, oski_index_t *restrict bcol, oski_index_t *p_nb, oski_index_t *p_nnz, flag_t *restrict f_workspace, oski_index_t *restrict i_workspace) |
| Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix. | |
| static void | CopyToVBR (oski_index_t m, oski_index_t n, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, oski_index_t b, int has_unit_diag, oski_index_t M, oski_index_t N, oski_index_t *restrict brow, oski_index_t *restrict bcol, oski_index_t nb, oski_index_t nnz, oski_index_t *V_ptr, oski_index_t *V_ind, oski_index_t *V_valptr, oski_value_t *V_val, flag_t *restrict f_workspace, oski_index_t *restrict i_workspace) |
| Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix. | |
| static int | ConvertToVBR (oski_index_t m, oski_index_t n, const oski_index_t *ptr, const oski_index_t *ind, const oski_value_t *val, oski_index_t b, int has_unit_diag, int is_symm, const oski_index_t *Tptr, const oski_index_t *Tind, oski_index_t b_T, int has_unit_diag_T, double threshold_r, double threshold_c, oski_index_t *p_M, oski_index_t *p_N, oski_index_t **p_brow, oski_index_t **p_bcol, oski_index_t **p_V_ptr, oski_index_t **p_V_ind, oski_index_t **p_V_valptr, oski_value_t **p_V_val) |
| void * | oski_CreateMatReprFromCSR (const oski_matCSR_t *mat, const oski_matcommon_t *props,...) |
| The variable argument list must contain the following parameters in the order listed: | |
| oski_matCSR_t * | oski_ConvertMatReprToCSR (const void *mat, const oski_matcommon_t *props) |
| Method: Convert to CSR format. | |
| void | oski_DestroyMatRepr (void *mat) |
| Method: Destroy matrix type-specific representation. | |
| void * | oski_CopyMatRepr (const void *mat, const oski_matcommon_t *props) |
| Method: Duplicate a matrix representation. | |
| int | oski_CreateLuaMatReprFromCSR (lua_State *L) |
| The VBR implementation expects the following arguments on the stack: | |
Conversion between CSR and VBR format.
| static void CopyToVBR | ( | oski_index_t | m, |
| oski_index_t | n, | ||
| const oski_index_t *restrict | ptr, | ||
| const oski_index_t *restrict | ind, | ||
| const oski_value_t *restrict | val, | ||
| oski_index_t | b, | ||
| int | has_unit_diag, | ||
| oski_index_t | M, | ||
| oski_index_t | N, | ||
| oski_index_t *restrict | brow, | ||
| oski_index_t *restrict | bcol, | ||
| oski_index_t | nb, | ||
| oski_index_t | nnz, | ||
| oski_index_t * | V_ptr, | ||
| oski_index_t * | V_ind, | ||
| oski_index_t * | V_valptr, | ||
| oski_value_t * | V_val, | ||
| flag_t *restrict | f_workspace, | ||
| oski_index_t *restrict | i_workspace | ||
| ) | [static] |
Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.
| [in] | m | No. of rows. |
| [in] | n | No. of columns. |
| [in] | ptr | Row pointers, of size m+1. |
| [in] | ind | Column indices, of size equal to the number of stored structural non-zero entries. |
| [in] | b | Index base of CSR representation (0- or 1-based). |
| [in] | has_unit_diag | Non-zero if the CSR representation assumes an implicit unit diagonal. |
| [in] | M | No. of block rows in the VBR representation. |
| [in] | N | No. of block columns in the VBR representation. |
| [in] | brow | Starting row of each of the M block rows in the VBR representation. brow is of size M+1, and 0 = brow[0] < brow[1] < ... < brow[M] = m. |
| [in] | bcol | Starting column of each of the M block columns in the VBR representation. bcol is of size M+1, and 0 = bcol[0] < bcol[1] < ... < bcol[N] = n. |
| [out] | nb | Number of blocks needed to store the CSR matrix in VBR format. |
| [out] | nnz | Number of non-zero elements needed to store the CSR matrix in VBR format. |
| [in,out] | f_workspace | Temporary workspace, of size at least N. This workspace must be initialized to zero on entry, and will be returned as all zero on exit. |
| [in,out] | i_workspace | Additional temporary workspace, of size at least 2*n. The values on entry are not used. On exit, the values are undefined. |
References MakeBlockMap(), oski_ZeroMem(), VAL_INC, and VAL_SET_ONE.
| static oski_index_t CountNumCommonSpVec | ( | oski_index_t | i1, |
| oski_index_t | n1, | ||
| const oski_index_t *restrict | ind1, | ||
| oski_index_t | i2, | ||
| oski_index_t | n2, | ||
| const oski_index_t *restrict | ind2, | ||
| oski_index_t | b, | ||
| int | has_unit_diag, | ||
| oski_index_t | n, | ||
| flag_t *restrict | workspace | ||
| ) | [static] |
Returns the number of unique elements that two sparse vectors,
and
, have in common.
These vectors are assumed to be row (or column) vectors of a larger matrix, located at positions row (column)
and
, respectively.
| [in] | i1 | Index of vector . |
| [in] | n1 | No. of elements stored with . |
| [in] | ind1 | Non-zero indices associated with . |
| [in] | i2 | Index of vector . |
| [in] | n2 | No. of elements stored with . |
| [in] | ind2 | Non-zero indices associated with . |
| [in] | b | Index base (0- or 1-based) for i1, i2, and all values in ind1 and ind2. |
| [in] | n | A value greater than or equal to the maximum value in either ind1 or ind2. That is, for all 0 <= k1 < n1, n >= ind1[k1] and for all 0 <= k2 < n2, n >= ind2[k2]. |
| [in,out] | workspace | A buffer of size 2*(n+1), all of whose elements must be zero. |
References SetFlags().
Referenced by FindBlockPart().
| static oski_index_t CountUnique | ( | oski_index_t | n_x, |
| const oski_index_t *restrict | x, | ||
| oski_index_t | n, | ||
| flag_t *restrict | workspace | ||
| ) | [static] |
Counts the number of unique structurally non-zero elements in a sparse vector
.
| [in] | n_x | Number of structurally non-zero elements in . |
| [in] | x | Sparse vector indices of . |
| [in] | n | Value >= maximum index in . |
| [out] | workspace | Buffer of size at least n+1, initialized to all zeros on entry. |
. Resets any modified elements of the workspace back to 0 on return. References SetFlags().
Referenced by FindBlockPart().
| static void CountVBRSize | ( | oski_index_t | m, |
| oski_index_t | n, | ||
| const oski_index_t *restrict | ptr, | ||
| const oski_index_t *restrict | ind, | ||
| oski_index_t | b, | ||
| int | has_unit_diag, | ||
| oski_index_t | M, | ||
| oski_index_t | N, | ||
| oski_index_t *restrict | brow, | ||
| oski_index_t *restrict | bcol, | ||
| oski_index_t * | p_nb, | ||
| oski_index_t * | p_nnz, | ||
| flag_t *restrict | f_workspace, | ||
| oski_index_t *restrict | i_workspace | ||
| ) | [static] |
Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.
| [in] | m | No. of rows. |
| [in] | n | No. of columns. |
| [in] | ptr | Row pointers, of size m+1. |
| [in] | ind | Column indices, of size equal to the number of stored structural non-zero entries. |
| [in] | b | Index base of CSR representation (0- or 1-based). |
| [in] | has_unit_diag | Non-zero if the CSR representation assumes an implicit unit diagonal. |
| [in] | M | No. of block rows in the VBR representation. |
| [in] | N | No. of block columns in the VBR representation. |
| [in] | brow | Starting row of each of the M block rows in the VBR representation. brow is of size M+1, and 0 = brow[0] < brow[1] < ... < brow[M] = m. |
| [in] | bcol | Starting column of each of the M block columns in the VBR representation. bcol is of size M+1, and 0 = bcol[0] < bcol[1] < ... < bcol[N] = n. |
| [out] | p_nb | Pointer to the number of non-zero blocks needed to store the CSR matrix in VBR format. If p_nb == NULL, then this value is not returned. |
| [out] | p_nnz | Pointer to the number of non-zero elements needed to store the CSR matrix in VBR format. If p_nnz == NULL, then this value is not returned. |
| [in,out] | f_workspace | Temporary workspace, of size at least N. This workspace must be initialized to zero on entry, and will be returned as all zero on exit. |
| [in,out] | i_workspace | Additional temporary workspace, of size at least n. The values on entry are not used. On exit, the values are undefined. |
References MakeBlockMap().
| static oski_index_t FindBlockPart | ( | oski_index_t | m, |
| const oski_index_t *restrict | ptr, | ||
| const oski_index_t *restrict | ind, | ||
| oski_index_t | b, | ||
| int | has_unit_diag, | ||
| double | threshold, | ||
| oski_index_t * | b_start, | ||
| oski_index_t | n_max, | ||
| flag_t * | workspace | ||
| ) | [static] |
Determine a partitioning of a CSR structure into groups of consecutive rows that have similar non-zero patterns.
This routine finds a block row partitioning, defined as a sequence of
block row starting positions
, where the
block row consists of all rows starting at
and ending at
. The block rows satisfy the following property.
be a pattern row vector corresponding to the non-zero pattern of row
of the matrix, and let
be the number of unique structural non-zeros in that row.
be a user-defined partitioning threshold.
satisfies
.| [in] | m | Number of rows, . |
| [in] | ptr | Row pointers, of size . |
| [in] | ind | Column indices, of size ptr[m]. |
| [in] | b | Index base (0- or 1-based) |
| [in] | has_unit_diag | Non-zero if matrix has an implicit unit diagonal. |
| [in] | threshold | The similarity threshold, . |
| [out] | b_start | Buffer in which to store the block row starts, of size at least . If b_start is NULL, then these values are not returned. |
| [in] | n_max | Maximum value of any column index in ind. |
| [in,out] | workspace | A buffer of size at least 2*(n_max+1) to be used as temporary storage. The workspace must be all zero initially, and if so, will be returned in an all-zero state. |
. If b_start is not NULL, returns the block row partitioning as well.The algorithm is greedy, starting at row 0 and growing each block row until the similarity property no longer holds.
. Thus, this routine can be called once to determine
, allocate a buffer b_start of size
, and then called again to determine b_start. References CountNumCommonSpVec(), CountUnique(), and MAX.
| static void MakeBlockMap | ( | const oski_index_t *restrict | I_starts, |
| oski_index_t | M, | ||
| oski_index_t *restrict | map | ||
| ) | [static] |
Computes a row-to-block-row lookup table.
| [in] | I_starts | Start of each row of a consecutive block row partitioning. |
| [in] | M | Number of block rows. |
| [in,out] | map | An array of size at least I_starts[M] to store the lookup table. |
Referenced by CopyToVBR(), and CountVBRSize().
| int oski_CreateLuaMatReprFromCSR | ( | lua_State * | L | ) |
The VBR implementation expects the following arguments on the stack:
Matrix-type specific method to convert from a CSR matrix, with arguments passed on the Lua stack.
| static void SetFlags | ( | oski_index_t | n_x, |
| const oski_index_t *restrict | x, | ||
| flag_t | value, | ||
| oski_index_t | n, | ||
| flag_t *restrict | f | ||
| ) | [static] |
Scatters a specified value to all elements of a dense flag vector
corresponding to the structurally non-zero elements of a sparse vector
.
More specifically, sets
for all
such that
.
| [in] | n_x | Number of structurally non-zero elements in . |
| [in] | x | Sparse vector indices of . |
| [in] | value | Value to assign. |
| [in] | n | Maximum length of . |
| [out] | f | Flag vector . |
corresponding to structurally non-zero elements of
are modified. Referenced by CountNumCommonSpVec(), and CountUnique().
1.7.6.1