|
libflame
revision_anchor
|
Functions | |
| void | bli_ssymm (side_t side, uplo_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dsymm (side_t side, uplo_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_csymm (side_t side, uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zsymm (side_t side, uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_ssymm_blas (side_t side, uplo_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
| void | bli_dsymm_blas (side_t side, uplo_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
| void | bli_csymm_blas (side_t side, uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
| void | bli_zsymm_blas (side_t side, uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
| void bli_csymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_csymm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by FLA_Symm_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex zero = bli_c0();
scomplex one = bli_c1();
scomplex* b_copy;
scomplex* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_callocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_ccopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_callocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_csymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_caxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_cfree( c_trans );
}
else // no extra axpyt step needed
{
bli_csymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_csymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().
Referenced by bli_csymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_csymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_csymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_dsymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d0(), bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dsymm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by bli_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double zero = bli_d0();
double one = bli_d1();
double* b_copy;
double* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_dcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_dallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_dcopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_dallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_dsymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_dscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_daxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_dfree( c_trans );
}
else // no extra axpyt step needed
{
bli_dsymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_dfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dsymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | b, | ||
| int | ldb, | ||
| double * | beta, | ||
| double * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().
Referenced by bli_dsymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_dsymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_dsymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_ssymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_s0(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_ssymm_blas(), bli_zero_dim2(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by bli_shemm(), FLA_Hemm_external(), and FLA_Symm_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float zero = bli_s0();
float one = bli_s1();
float* b_copy;
float* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_screate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_screate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_sallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_scopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_sallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_ssymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_sscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_saxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_sfree( c_trans );
}
else // no extra axpyt step needed
{
bli_ssymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_sfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_sfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_ssymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | b, | ||
| int | ldb, | ||
| float * | beta, | ||
| float * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().
Referenced by bli_ssymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_ssymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_ssymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_zsymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), bli_zsymm_blas(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by FLA_Symm_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex zero = bli_z0();
dcomplex one = bli_z1();
dcomplex* b_copy;
dcomplex* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_zallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_zcopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_zallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_zsymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_zaxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_zfree( c_trans );
}
else // no extra axpyt step needed
{
bli_zsymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zsymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().
Referenced by bli_zsymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_zsymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_zsymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
1.7.6.1