|
libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
| void | bli_sgemm (trans_t transa, trans_t transb, int m, int k, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dgemm (trans_t transa, trans_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_cgemm (trans_t transa, trans_t transb, int m, int k, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zgemm (trans_t transa, trans_t transb, int m, int k, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_sgemm_blas (trans_t transa, trans_t transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
| void | bli_dgemm_blas (trans_t transa, trans_t transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
| void | bli_cgemm_blas (trans_t transa, trans_t transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
| void | bli_zgemm_blas (trans_t transa, trans_t transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
| void | bli_shemm (side_t side, uplo_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dhemm (side_t side, uplo_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_chemm (side_t side, uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zhemm (side_t side, uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_chemm_blas (side_t side, uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
| void | bli_zhemm_blas (side_t side, uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
| void | bli_sherk (uplo_t uplo, trans_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dherk (uplo_t uplo, trans_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_cherk (uplo_t uplo, trans_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zherk (uplo_t uplo, trans_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_cherk_blas (uplo_t uplo, trans_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc) |
| void | bli_zherk_blas (uplo_t uplo, trans_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc) |
| void | bli_sher2k (uplo_t uplo, trans_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dher2k (uplo_t uplo, trans_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_cher2k (uplo_t uplo, trans_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, float *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zher2k (uplo_t uplo, trans_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, double *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_cher2k_blas (uplo_t uplo, trans_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc) |
| void | bli_zher2k_blas (uplo_t uplo, trans_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc) |
| void | bli_ssymm (side_t side, uplo_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dsymm (side_t side, uplo_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_csymm (side_t side, uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zsymm (side_t side, uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_ssymm_blas (side_t side, uplo_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
| void | bli_dsymm_blas (side_t side, uplo_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
| void | bli_csymm_blas (side_t side, uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
| void | bli_zsymm_blas (side_t side, uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
| void | bli_ssyrk (uplo_t uplo, trans_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dsyrk (uplo_t uplo, trans_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_csyrk (uplo_t uplo, trans_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zsyrk (uplo_t uplo, trans_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_ssyrk_blas (uplo_t uplo, trans_t trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc) |
| void | bli_dsyrk_blas (uplo_t uplo, trans_t trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc) |
| void | bli_csyrk_blas (uplo_t uplo, trans_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc) |
| void | bli_zsyrk_blas (uplo_t uplo, trans_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc) |
| void | bli_ssyr2k (uplo_t uplo, trans_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dsyr2k (uplo_t uplo, trans_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_csyr2k (uplo_t uplo, trans_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_zsyr2k (uplo_t uplo, trans_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_ssyr2k_blas (uplo_t uplo, trans_t trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
| void | bli_dsyr2k_blas (uplo_t uplo, trans_t trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
| void | bli_csyr2k_blas (uplo_t uplo, trans_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
| void | bli_zsyr2k_blas (uplo_t uplo, trans_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
| void | bli_strmm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_dtrmm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_ctrmm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_ztrmm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_strmm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb) |
| void | bli_dtrmm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb) |
| void | bli_ctrmm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb) |
| void | bli_ztrmm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb) |
| void | bli_strsm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_dtrsm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_ctrsm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_ztrsm (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_strsm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb) |
| void | bli_dtrsm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb) |
| void | bli_ctrsm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb) |
| void | bli_ztrsm_blas (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb) |
| void | bli_strmmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dtrmmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_ctrmmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_ztrmmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bli_strsmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bli_dtrsmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bli_ctrsmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
| void | bli_ztrsmsx (side_t side, uplo_t uplo, trans_t trans, diag_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
| void bli_cgemm | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | k, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_cconjm(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cgemm_blas(), bli_cscalm(), bli_is_col_storage(), bli_is_conjnotrans(), bli_zero_dim3(), BLIS_CONJ_NO_TRANSPOSE, BLIS_NO_CONJUGATE, and BLIS_TRANSPOSE.
Referenced by FLA_Gemm_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex zero = bli_c0();
scomplex one = bli_c1();
scomplex* a_unswap;
scomplex* b_unswap;
scomplex* a_conj;
scomplex* b_conj;
scomplex* c_trans;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_conj, inca_conj;
int ldb_conj, incb_conj;
int ldc_trans, incc_trans;
int m_gemm, n_gemm;
int gemm_needs_axpyt = FALSE;
int a_was_copied;
int b_was_copied;
// Return early if possible.
if ( bli_zero_dim3( m, k, n ) )
{
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
return;
}
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_ccreate_contigmt( transa,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigmt( transb,
k,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Figure out whether A and/or B was copied to contiguous memory. This
// is used later to prevent redundant copying.
a_was_copied = ( a != a_save );
b_was_copied = ( b != b_save );
// These are used to track the original values of a and b prior to any
// operand swapping that might take place. This is necessary for proper
// freeing of memory when one is a temporary contiguous matrix.
a_unswap = a;
b_unswap = b;
// These are used to track the dimensions of the product of the
// A and B operands to the BLAS invocation of gemm. These differ
// from m and n when the operands need to be swapped.
m_gemm = m;
n_gemm = n;
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_c )
// effective operation: C_c += tr( A_c ) * tr( B_c )
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( A_c ) * tr( B_c )^T
bli_swap_ints( ldb, incb );
bli_toggle_trans( transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( A_r )^T * tr( B_c )
bli_swap_ints( lda, inca );
bli_toggle_trans( transa );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_r )
// effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_cswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
gemm_needs_axpyt = TRUE;
bli_swap_ints( m_gemm, n_gemm );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_c )
// effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
gemm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_toggle_trans( transa );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_cswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( B_c )^T * tr( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_trans( transb );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_cswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_cswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
}
// We need a temporary matrix for the case where A is conjugated.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
// If transa indicates conjugate-no-transpose and A was not already
// copied, then copy and conjugate it to a temporary matrix. Otherwise,
// if transa indicates conjugate-no-transpose and A was already copied,
// just conjugate it.
if ( bli_is_conjnotrans( transa ) && !a_was_copied )
{
a_conj = bli_callocm( m_gemm, k );
lda_conj = m_gemm;
inca_conj = 1;
bli_ccopymt( BLIS_CONJ_NO_TRANSPOSE,
m_gemm,
k,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( bli_is_conjnotrans( transa ) && a_was_copied )
{
bli_cconjm( m_gemm,
k,
a_conj, inca_conj, lda_conj );
}
// We need a temporary matrix for the case where B is conjugated.
b_conj = b;
ldb_conj = ldb;
incb_conj = incb;
// If transb indicates conjugate-no-transpose and B was not already
// copied, then copy and conjugate it to a temporary matrix. Otherwise,
// if transb indicates conjugate-no-transpose and B was already copied,
// just conjugate it.
if ( bli_is_conjnotrans( transb ) && !b_was_copied )
{
b_conj = bli_callocm( k, n_gemm );
ldb_conj = k;
incb_conj = 1;
bli_ccopymt( BLIS_CONJ_NO_TRANSPOSE,
k,
n_gemm,
b, incb, ldb,
b_conj, incb_conj, ldb_conj );
}
else if ( bli_is_conjnotrans( transb ) && b_was_copied )
{
bli_cconjm( k,
n_gemm,
b_conj, incb_conj, ldb_conj );
}
// There are two cases where we need to perform the gemm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( gemm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, while m_gemm and n_gemm are the
// dimensions of the actual product op(A)*op(B), which may be n-by-m
// since the operands may have been swapped.
c_trans = bli_callocm( m_gemm, n_gemm );
ldc_trans = m_gemm;
incc_trans = 1;
// Compute tr( A ) * tr( B ), where A and B may have been swapped
// to reference the other, and store the result in C_trans.
bli_cgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a_conj, lda_conj,
b_conj, ldb_conj,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_caxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_cfree( c_trans );
}
else // no extra axpyt step needed
{
bli_cgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a_conj, lda_conj,
b_conj, ldb_conj,
beta,
c, ldc );
}
if ( bli_is_conjnotrans( transa ) && !a_was_copied )
bli_cfree( a_conj );
if ( bli_is_conjnotrans( transb ) && !b_was_copied )
bli_cfree( b_conj );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a_unswap, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b_unswap, &b_rs, &b_cs );
bli_cfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_cgemm_blas | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), cblas_cgemm(), CblasColMajor, and F77_cgemm().
Referenced by bli_cgemm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_TRANSPOSE cblas_transa;
enum CBLAS_TRANSPOSE cblas_transb;
bli_param_map_to_netlib_trans( transa, &cblas_transa );
bli_param_map_to_netlib_trans( transb, &cblas_transb );
cblas_cgemm( cblas_order,
cblas_transa,
cblas_transb,
m,
n,
k,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_transa;
char blas_transb;
bli_param_map_to_netlib_trans( transa, &blas_transa );
bli_param_map_to_netlib_trans( transb, &blas_transb );
F77_cgemm( &blas_transa,
&blas_transb,
&m,
&n,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_chemm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_cconjmr(), bli_ccopymrt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_chemm_blas(), bli_cscalm(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_CONJ_NO_TRANSPOSE, BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by FLA_Hemm_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex zero = bli_c0();
scomplex one = bli_c1();
scomplex* a_conj;
scomplex* b_copy;
scomplex* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_conj, inca_conj;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int hemm_needs_conja = FALSE;
int hemm_needs_copyb = FALSE;
int hemm_needs_transb = FALSE;
int hemm_needs_axpyt = FALSE;
int a_was_copied;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Figure out whether A was copied to contiguous memory. This is used to
// prevent redundant copying.
a_was_copied = ( a != a_save );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
hemm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
hemm_needs_conja = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
hemm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
hemm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
hemm_needs_conja = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
hemm_needs_copyb = TRUE;
hemm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where A is conjugated.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
if ( hemm_needs_conja && !a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
a_conj = bli_callocm( dim_a, dim_a );
lda_conj = dim_a;
inca_conj = 1;
bli_ccopymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
dim_a,
dim_a,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( hemm_needs_conja && a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
bli_cconjmr( uplo,
dim_a,
dim_a,
a_conj, inca_conj, lda_conj );
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( hemm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( hemm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_callocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_ccopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the hemm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( hemm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_callocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_chemm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_caxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_cfree( c_trans );
}
else // no extra axpyt step needed
{
bli_chemm_blas( side,
uplo,
m,
n,
alpha,
a_conj, lda_conj,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( hemm_needs_conja && !a_was_copied )
bli_cfree( a_conj );
if ( hemm_needs_copyb )
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_chemm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_chemm(), CblasColMajor, and F77_chemm().
Referenced by bli_chemm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_chemm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_chemm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_cher2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c1(), bli_callocm(), bli_caxpymrt(), bli_ccopymt(), bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_cher2k_blas(), bli_csscalmr(), bli_is_col_storage(), bli_s0(), bli_set_dims_with_trans(), bli_zero_dim2(), BLIS_CONJ_NO_TRANSPOSE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Her2k_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float zero_r = bli_s0();
scomplex one = bli_c1();
scomplex alpha_copy;
scomplex* a_copy;
scomplex* b_copy;
scomplex* c_conj;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_copy, inca_copy;
int ldb_copy, incb_copy;
int ldc_conj, incc_conj;
int her2k_needs_copya = FALSE;
int her2k_needs_copyb = FALSE;
int her2k_needs_conj = FALSE;
int her2k_needs_alpha_conj = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_ccreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigmt( trans,
m,
k,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
her2k_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
her2k_needs_copya = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_conjtrans( trans );
her2k_needs_conj = TRUE;
her2k_needs_alpha_conj = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
her2k_needs_conj = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
her2k_needs_copyb = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
her2k_needs_conj = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
her2k_needs_copya = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
her2k_needs_conj = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
bli_toggle_conjtrans( trans );
her2k_needs_alpha_conj = TRUE;
}
}
}
// Make a copy of alpha and conjugate if necessary.
alpha_copy = *alpha;
if ( her2k_needs_alpha_conj )
{
bli_zconjs( &alpha_copy );
}
a_copy = a;
lda_copy = lda;
inca_copy = inca;
// There are two cases where we need to copy A column-major storage.
// We handle those two cases here.
if ( her2k_needs_copya )
{
int m_a;
int n_a;
// Determine the dimensions of A according to the value of trans. We
// need this in order to set the leading dimension of the copy of A.
bli_set_dims_with_trans( trans, m, k, &m_a, &n_a );
// We need a temporary matrix to hold a column-major copy of A.
a_copy = bli_callocm( m, k );
lda_copy = m_a;
inca_copy = 1;
// Copy the contents of A into A_copy.
bli_ccopymt( BLIS_NO_TRANSPOSE,
m_a,
n_a,
a, inca, lda,
a_copy, inca_copy, lda_copy );
}
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to copy B column-major storage.
// We handle those two cases here.
if ( her2k_needs_copyb )
{
int m_b;
int n_b;
// Determine the dimensions of B according to the value of trans. We
// need this in order to set the leading dimension of the copy of B.
bli_set_dims_with_trans( trans, m, k, &m_b, &n_b );
// We need a temporary matrix to hold a column-major copy of B.
b_copy = bli_callocm( m, k );
ldb_copy = m_b;
incb_copy = 1;
// Copy the contents of B into B_copy.
bli_ccopymt( BLIS_NO_TRANSPOSE,
m_b,
n_b,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the rank-2k product and
// then axpy the result into C with a conjugation. We handle those two
// cases here.
if ( her2k_needs_conj )
{
// We need a temporary matrix for holding the rank-k product.
c_conj = bli_callocm( m, m );
ldc_conj = m;
incc_conj = 1;
// Compute the rank-2k product.
bli_cher2k_blas( uplo,
trans,
m,
k,
&alpha_copy,
a_copy, lda_copy,
b_copy, ldb_copy,
&zero_r,
c_conj, ldc_conj );
// Scale C by beta.
bli_csscalmr( uplo,
m,
m,
beta,
c, incc, ldc );
// And finally, accumulate the rank-2k product in C_conj into C
// with a conjugation.
bli_caxpymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
m,
m,
&one,
c_conj, incc_conj, ldc_conj,
c, incc, ldc );
// Free the temporary matrix for C.
bli_cfree( c_conj );
}
else
{
bli_cher2k_blas( uplo,
trans,
m,
k,
&alpha_copy,
a_copy, lda_copy,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( her2k_needs_copya )
bli_cfree( a_copy );
if ( her2k_needs_copyb )
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_cher2k_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb, | ||
| float * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_cher2k(), CblasColMajor, and F77_cher2k().
Referenced by bli_cher2k().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_cher2k( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_cher2k( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_cherk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c1(), bli_callocm(), bli_caxpymrt(), bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_cherk_blas(), bli_csscalmr(), bli_is_col_storage(), bli_s0(), bli_zero_dim2(), and BLIS_CONJ_NO_TRANSPOSE.
Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().
{
uplo_t uplo_save = uplo;
int m_save = m;
scomplex* a_save = a;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float zero_r = bli_s0();
scomplex one = bli_c1();
scomplex* c_conj;
int lda, inca;
int ldc, incc;
int ldc_conj, incc_conj;
int herk_needs_conj = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_ccreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c'
// effective operation: uplo( C_c ) += A_c * A_c'
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r'
// effective operation: uplo( C_c ) += conj( A_c' * A_c )
bli_swap_ints( lda, inca );
bli_toggle_conjtrans( trans );
herk_needs_conj = TRUE;
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c'
// effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
herk_needs_conj = TRUE;
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r'
// effective operation: ~uplo( C_c ) += A_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_conjtrans( trans );
}
}
// There are two cases where we need to perform the rank-k product and
// then axpy the result into C with a conjugation. We handle those two
// cases here.
if ( herk_needs_conj )
{
// We need a temporary matrix for holding the rank-k product.
c_conj = bli_callocm( m, m );
ldc_conj = m;
incc_conj = 1;
// Compute the rank-k product.
bli_cherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
&zero_r,
c_conj, ldc_conj );
// Scale C by beta.
bli_csscalmr( uplo,
m,
m,
beta,
c, incc, ldc );
// And finally, accumulate the rank-k product in C_conj into C
// with a conjugation.
bli_caxpymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
m,
m,
&one,
c_conj, incc_conj, ldc_conj,
c, incc, ldc );
// Free the temporary matrix for C.
bli_cfree( c_conj );
}
else
{
bli_cherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
}
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_cherk_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| float * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().
Referenced by bli_cherk().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_cherk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_cherk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bli_csymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_csymm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by FLA_Symm_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex zero = bli_c0();
scomplex one = bli_c1();
scomplex* b_copy;
scomplex* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_callocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_ccopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_callocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_csymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_caxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_cfree( c_trans );
}
else // no extra axpyt step needed
{
bli_csymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_csymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().
Referenced by bli_csymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_csymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_csymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_csyr2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_callocm(), bli_ccopymt(), bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_csyr2k_blas(), bli_is_col_storage(), bli_set_dims_with_trans(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Syr2k_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex* a_copy;
scomplex* b_copy;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_copy, inca_copy;
int ldb_copy, incb_copy;
int syr2k_needs_copya = FALSE;
int syr2k_needs_copyb = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_ccreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigmt( trans,
m,
k,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copya = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_trans( trans );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copyb = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copya = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
}
a_copy = a;
lda_copy = lda;
inca_copy = inca;
// There are two cases where we need to copy A column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copya )
{
int m_a;
int n_a;
// Determine the dimensions of A according to the value of trans. We
// need this in order to set the leading dimension of the copy of A.
bli_set_dims_with_trans( trans, m, k, &m_a, &n_a );
// We need a temporary matrix to hold a column-major copy of A.
a_copy = bli_callocm( m, k );
lda_copy = m_a;
inca_copy = 1;
// Copy the contents of A into A_copy.
bli_ccopymt( BLIS_NO_TRANSPOSE,
m_a,
n_a,
a, inca, lda,
a_copy, inca_copy, lda_copy );
}
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to copy B column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copyb )
{
int m_b;
int n_b;
// Determine the dimensions of B according to the value of trans. We
// need this in order to set the leading dimension of the copy of B.
bli_set_dims_with_trans( trans, m, k, &m_b, &n_b );
// We need a temporary matrix to hold a column-major copy of B.
b_copy = bli_callocm( m, k );
ldb_copy = m_b;
incb_copy = 1;
// Copy the contents of B into B_copy.
bli_ccopymt( BLIS_NO_TRANSPOSE,
m_b,
n_b,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
bli_csyr2k_blas( uplo,
trans,
m,
k,
alpha,
a_copy, lda_copy,
b_copy, ldb_copy,
beta,
c, ldc );
if ( syr2k_needs_copya )
bli_cfree( a_copy );
if ( syr2k_needs_copyb )
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_csyr2k_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), BLIS_TRANSPOSE, cblas_csyr2k(), CblasColMajor, and F77_csyr2k().
Referenced by bli_csyr2k().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_csyr2k( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_csyr2k( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_csyrk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_csyrk_blas(), bli_is_col_storage(), and bli_zero_dim2().
Referenced by FLA_Syrk_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
scomplex* a_save = a;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
int lda, inca;
int ldc, incc;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_ccreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c^T
// effective operation: uplo( C_c ) += A_c * A_c^T
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r^T
// effective operation: uplo( C_c ) += A_c^T * A_c
bli_swap_ints( lda, inca );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c^T
// effective operation: ~uplo( C_c ) += A_c * A_c^T
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r^T
// effective operation: ~uplo( C_c ) += A_c^T * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
bli_csyrk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_csyrk_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_csyrk(), CblasColMajor, and F77_csyrk().
Referenced by bli_csyrk().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_csyrk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_csyrk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bli_ctrmm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_callocm(), bli_cconjmr(), bli_ccopymrt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_ctrmm_blas(), bli_is_col_storage(), bli_is_conjnotrans(), bli_set_dim_with_side(), bli_zero_dim2(), and BLIS_CONJ_NO_TRANSPOSE.
Referenced by bli_ctrmmsx(), and FLA_Trmm_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
scomplex* a_conj;
int dim_a;
int lda, inca;
int ldb, incb;
int lda_conj, inca_conj;
int a_was_copied;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Figure out whether A was copied to contiguous memory. This is used to
// prevent redundant copying.
a_was_copied = ( a != a_save );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
// Initialize with values assuming that trans is not conjnotrans.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
// We want to handle the conjnotrans case. The easiest way to do so is
// by making a conjugated copy of A.
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
a_conj = bli_callocm( dim_a, dim_a );
lda_conj = dim_a;
inca_conj = 1;
bli_ccopymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
dim_a,
dim_a,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( bli_is_conjnotrans( trans ) && a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
bli_cconjmr( uplo,
dim_a,
dim_a,
a_conj, inca_conj, lda_conj );
}
bli_ctrmm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a_conj, lda_conj,
b, ldb );
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
bli_cfree( a_conj );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_ctrmm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ctrmm(), CblasColMajor, and F77_ctrmm().
Referenced by bli_ctrmm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_ctrmm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_ctrmm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_ctrmmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_ctrmm(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trmmsx_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex one = bli_c1();
scomplex* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_callocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_ccopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_ctrmm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_caxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_ctrsm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_callocm(), bli_cconjmr(), bli_ccopymrt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_ctrsm_blas(), bli_is_col_storage(), bli_is_conjnotrans(), bli_set_dim_with_side(), bli_zero_dim2(), and BLIS_CONJ_NO_TRANSPOSE.
Referenced by bli_ctrsmsx(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_piv_opc_var3(), and FLA_Trsm_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
scomplex* a_conj;
int dim_a;
int lda, inca;
int ldb, incb;
int lda_conj, inca_conj;
int a_was_copied;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Figure out whether A was copied to contiguous memory. This is used to
// prevent redundant copying.
a_was_copied = ( a != a_save );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
// Initialize with values assuming that trans is not conjnotrans.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
// We want to handle the conjnotrans case. The easiest way to do so is
// by making a conjugated copy of A.
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
a_conj = bli_callocm( dim_a, dim_a );
lda_conj = dim_a;
inca_conj = 1;
bli_ccopymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
dim_a,
dim_a,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( bli_is_conjnotrans( trans ) && a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
bli_cconjmr( uplo,
dim_a,
dim_a,
a_conj, inca_conj, lda_conj );
}
bli_ctrsm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a_conj, lda_conj,
b, ldb );
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
bli_cfree( a_conj );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_ctrsm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| scomplex * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ctrsm(), CblasColMajor, and F77_ctrsm().
Referenced by bli_ctrsm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_ctrsm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_ctrsm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_ctrsmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| scomplex * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_ctrsm(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trsmsx_external().
{
int m_save = m;
int n_save = n;
scomplex* a_save = a;
scomplex* b_save = b;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
scomplex one = bli_c1();
scomplex* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_ccreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_ccreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_ccreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_callocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_ccopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_ctrsm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_cscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_caxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_cfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_cfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_cfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dgemm | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | k, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d0(), bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcreate_contigm(), bli_dcreate_contigmt(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dgemm_blas(), bli_dscalm(), bli_is_col_storage(), bli_zero_dim3(), BLIS_NO_CONJUGATE, and BLIS_TRANSPOSE.
Referenced by FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Gemm_external(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opd_var4(), FLA_Tevd_v_opz_var2(), and FLA_Tevd_v_opz_var4().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double zero = bli_d0();
double one = bli_d1();
double* a_unswap;
double* b_unswap;
double* c_trans;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldc_trans, incc_trans;
int m_gemm, n_gemm;
int gemm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim3( m, k, n ) )
{
bli_dscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
return;
}
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_dcreate_contigmt( transa,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigmt( transb,
k,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// These are used to track the original values of a and b prior to any
// operand swapping that might take place. This is necessary for proper
// freeing of memory when one is a temporary contiguous matrix.
a_unswap = a;
b_unswap = b;
// These are used to track the dimensions of the product of the
// A and B operands to the BLAS invocation of gemm. These differ
// from m and n when the operands need to be swapped.
m_gemm = m;
n_gemm = n;
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_c )
// effective operation: C_c += tr( A_c ) * tr( B_c )
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( A_c ) * tr( B_c )^T
bli_swap_ints( ldb, incb );
bli_toggle_trans( transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( A_r )^T * tr( B_c )
bli_swap_ints( lda, inca );
bli_toggle_trans( transa );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_r )
// effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_dswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
gemm_needs_axpyt = TRUE;
bli_swap_ints( m_gemm, n_gemm );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_c )
// effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
gemm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_toggle_trans( transa );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_dswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( B_c )^T * tr( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_trans( transb );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_dswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_dswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
}
// There are two cases where we need to perform the gemm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( gemm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, while m_gemm and n_gemm are the
// dimensions of the actual product op(A)*op(B), which may be n-by-m
// since the operands may have been swapped.
c_trans = bli_dallocm( m_gemm, n_gemm );
ldc_trans = m_gemm;
incc_trans = 1;
// Compute tr( A ) * tr( B ), where A and B may have been swapped
// to reference the other, and store the result in C_trans.
bli_dgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_dscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_daxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_dfree( c_trans );
}
else // no extra axpyt step needed
{
bli_dgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
}
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a_unswap, &a_rs, &a_cs );
bli_dfree_contigm( b_save, b_rs_save, b_cs_save,
&b_unswap, &b_rs, &b_cs );
bli_dfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dgemm_blas | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | b, | ||
| int | ldb, | ||
| double * | beta, | ||
| double * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), cblas_dgemm(), CblasColMajor, and F77_dgemm().
Referenced by bli_dgemm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_TRANSPOSE cblas_transa;
enum CBLAS_TRANSPOSE cblas_transb;
bli_param_map_to_netlib_trans( transa, &cblas_transa );
bli_param_map_to_netlib_trans( transb, &cblas_transb );
cblas_dgemm( cblas_order,
cblas_transa,
cblas_transb,
m,
n,
k,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_transa;
char blas_transb;
bli_param_map_to_netlib_trans( transa, &blas_transa );
bli_param_map_to_netlib_trans( transb, &blas_transb );
F77_dgemm( &blas_transa,
&blas_transb,
&m,
&n,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_dhemm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_dsymm().
{
bli_dsymm( side,
uplo,
m,
n,
alpha,
a, a_rs, a_cs,
b, b_rs, b_cs,
beta,
c, c_rs, c_cs );
}
| void bli_dher2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_dsyr2k().
{
bli_dsyr2k( uplo,
trans,
m,
k,
alpha,
a, a_rs, a_cs,
b, b_rs, b_cs,
beta,
c, c_rs, c_cs );
}
| void bli_dherk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_dsyrk().
{
bli_dsyrk( uplo,
trans,
m,
k,
alpha,
a, a_rs, a_cs,
beta,
c, c_rs, c_cs );
}
| void bli_dsymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d0(), bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dsymm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by bli_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double zero = bli_d0();
double one = bli_d1();
double* b_copy;
double* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_dcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_dallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_dcopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_dallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_dsymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_dscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_daxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_dfree( c_trans );
}
else // no extra axpyt step needed
{
bli_dsymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_dfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dsymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | b, | ||
| int | ldb, | ||
| double * | beta, | ||
| double * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().
Referenced by bli_dsymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_dsymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_dsymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_dsyr2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_dallocm(), bli_dcopymt(), bli_dcreate_contigmr(), bli_dcreate_contigmt(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigmr(), bli_dsyr2k_blas(), bli_is_col_storage(), bli_set_dims_with_trans(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_dher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
double* a_save = a;
double* b_save = b;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double* a_copy;
double* b_copy;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_copy, inca_copy;
int ldb_copy, incb_copy;
int syr2k_needs_copya = FALSE;
int syr2k_needs_copyb = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_dcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigmt( trans,
m,
k,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copya = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_trans( trans );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copyb = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copya = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
}
a_copy = a;
lda_copy = lda;
inca_copy = inca;
// There are two cases where we need to copy A column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copya )
{
int m_a;
int n_a;
// Determine the dimensions of A according to the value of trans. We
// need this in order to set the leading dimension of the copy of A.
bli_set_dims_with_trans( trans, m, k, &m_a, &n_a );
// We need a temporary matrix to hold a column-major copy of A.
a_copy = bli_dallocm( m, k );
lda_copy = m_a;
inca_copy = 1;
// Copy the contents of A into A_copy.
bli_dcopymt( BLIS_NO_TRANSPOSE,
m_a,
n_a,
a, inca, lda,
a_copy, inca_copy, lda_copy );
}
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to copy B column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copyb )
{
int m_b;
int n_b;
// Determine the dimensions of B according to the value of trans. We
// need this in order to set the leading dimension of the copy of B.
bli_set_dims_with_trans( trans, m, k, &m_b, &n_b );
// We need a temporary matrix to hold a column-major copy of B.
b_copy = bli_dallocm( m, k );
ldb_copy = m_b;
incb_copy = 1;
// Copy the contents of B into B_copy.
bli_dcopymt( BLIS_NO_TRANSPOSE,
m_b,
n_b,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
bli_dsyr2k_blas( uplo,
trans,
m,
k,
alpha,
a_copy, lda_copy,
b_copy, ldb_copy,
beta,
c, ldc );
if ( syr2k_needs_copya )
bli_dfree( a_copy );
if ( syr2k_needs_copyb )
bli_dfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dsyr2k_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | b, | ||
| int | ldb, | ||
| double * | beta, | ||
| double * | c, | ||
| int | ldc | ||
| ) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), BLIS_TRANSPOSE, cblas_dsyr2k(), CblasColMajor, and F77_dsyr2k().
Referenced by bli_dsyr2k().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_dsyr2k( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_dsyr2k( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_dsyrk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_dcreate_contigmr(), bli_dcreate_contigmt(), bli_dfree_contigm(), bli_dfree_saved_contigmr(), bli_dsyrk_blas(), bli_is_col_storage(), and bli_zero_dim2().
Referenced by bli_dherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_opd_var1().
{
uplo_t uplo_save = uplo;
int m_save = m;
double* a_save = a;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
int lda, inca;
int ldc, incc;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_dcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c^T
// effective operation: uplo( C_c ) += A_c * A_c^T
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r^T
// effective operation: uplo( C_c ) += A_c^T * A_c
bli_swap_ints( lda, inca );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c^T
// effective operation: ~uplo( C_c ) += A_c * A_c^T
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r^T
// effective operation: ~uplo( C_c ) += A_c^T * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
bli_dsyrk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dsyrk_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | beta, | ||
| double * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dsyrk(), CblasColMajor, and F77_dsyrk().
Referenced by bli_dsyrk().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_dsyrk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_dsyrk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bli_dtrmm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dtrmm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_dtrmmsx(), and FLA_Trmm_external().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int dim_a;
int lda, inca;
int ldb, incb;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_dcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
bli_dtrmm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, lda,
b, ldb );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_dtrmm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dtrmm(), CblasColMajor, and F77_dtrmm().
Referenced by bli_dtrmm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_dtrmm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
*alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_dtrmm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_dtrmmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dtrmm(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trmmsx_external().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double one = bli_d1();
double* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_dcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_dallocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_dcopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_dtrmm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_dscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_daxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_dfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_dtrsm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dtrsm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_dtrsmsx(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_piv_opd_var3(), and FLA_Trsm_external().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int dim_a;
int lda, inca;
int ldb, incb;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_dcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
bli_dtrsm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, lda,
b, ldb );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_dtrsm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | lda, | ||
| double * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dtrsm(), CblasColMajor, and F77_dtrsm().
Referenced by bli_dtrsm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_dtrsm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
*alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_dtrsm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_dtrsmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dtrsm(), bli_is_col_storage(), bli_set_dim_with_side(), bli_zero_dim2(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trsmsx_external().
{
int m_save = m;
int n_save = n;
double* a_save = a;
double* b_save = b;
double* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double one = bli_d1();
double* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_dcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_dallocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_dcopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_dtrsm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_dscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_daxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_dfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_dfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_dfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_sgemm | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | k, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_s0(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_screate_contigm(), bli_screate_contigmt(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sgemm_blas(), bli_sscalm(), bli_zero_dim3(), BLIS_NO_CONJUGATE, and BLIS_TRANSPOSE.
Referenced by FLA_Gemm_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float zero = bli_s0();
float one = bli_s1();
float* a_unswap;
float* b_unswap;
float* c_trans;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldc_trans, incc_trans;
int m_gemm, n_gemm;
int gemm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim3( m, k, n ) )
{
bli_sscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
return;
}
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_screate_contigmt( transa,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigmt( transb,
k,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_screate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// These are used to track the original values of a and b prior to any
// operand swapping that might take place. This is necessary for proper
// freeing of memory when one is a temporary contiguous matrix.
a_unswap = a;
b_unswap = b;
// These are used to track the dimensions of the product of the
// A and B operands to the BLAS invocation of gemm. These differ
// from m and n when the operands need to be swapped.
m_gemm = m;
n_gemm = n;
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_c )
// effective operation: C_c += tr( A_c ) * tr( B_c )
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( A_c ) * tr( B_c )^T
bli_swap_ints( ldb, incb );
bli_toggle_trans( transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( A_r )^T * tr( B_c )
bli_swap_ints( lda, inca );
bli_toggle_trans( transa );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_r )
// effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_sswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
gemm_needs_axpyt = TRUE;
bli_swap_ints( m_gemm, n_gemm );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_c )
// effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
gemm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_toggle_trans( transa );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_sswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( B_c )^T * tr( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_trans( transb );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_sswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_sswap_pointers( a, b );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
}
// There are two cases where we need to perform the gemm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( gemm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, while m_gemm and n_gemm are the
// dimensions of the actual product op(A)*op(B), which may be n-by-m
// since the operands may have been swapped.
c_trans = bli_sallocm( m_gemm, n_gemm );
ldc_trans = m_gemm;
incc_trans = 1;
// Compute tr( A ) * tr( B ), where A and B may have been swapped
// to reference the other, and store the result in C_trans.
bli_sgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_sscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_saxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_sfree( c_trans );
}
else // no extra axpyt step needed
{
bli_sgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
}
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a_unswap, &a_rs, &a_cs );
bli_sfree_contigm( b_save, b_rs_save, b_cs_save,
&b_unswap, &b_rs, &b_cs );
bli_sfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_sgemm_blas | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | b, | ||
| int | ldb, | ||
| float * | beta, | ||
| float * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), cblas_sgemm(), CblasColMajor, and F77_sgemm().
Referenced by bli_sgemm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_TRANSPOSE cblas_transa;
enum CBLAS_TRANSPOSE cblas_transb;
bli_param_map_to_netlib_trans( transa, &cblas_transa );
bli_param_map_to_netlib_trans( transb, &cblas_transb );
cblas_sgemm( cblas_order,
cblas_transa,
cblas_transb,
m,
n,
k,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_transa;
char blas_transb;
bli_param_map_to_netlib_trans( transa, &blas_transa );
bli_param_map_to_netlib_trans( transb, &blas_transb );
F77_sgemm( &blas_transa,
&blas_transb,
&m,
&n,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_shemm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_ssymm().
{
bli_ssymm( side,
uplo,
m,
n,
alpha,
a, a_rs, a_cs,
b, b_rs, b_cs,
beta,
c, c_rs, c_cs );
}
| void bli_sher2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_ssyr2k().
{
bli_ssyr2k( uplo,
trans,
m,
k,
alpha,
a, a_rs, a_cs,
b, b_rs, b_cs,
beta,
c, c_rs, c_cs );
}
| void bli_sherk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_ssyrk().
{
bli_ssyrk( uplo,
trans,
m,
k,
alpha,
a, a_rs, a_cs,
beta,
c, c_rs, c_cs );
}
| void bli_ssymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_s0(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_ssymm_blas(), bli_zero_dim2(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by bli_shemm(), FLA_Hemm_external(), and FLA_Symm_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float zero = bli_s0();
float one = bli_s1();
float* b_copy;
float* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_screate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_screate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_sallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_scopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_sallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_ssymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_sscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_saxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_sfree( c_trans );
}
else // no extra axpyt step needed
{
bli_ssymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_sfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_sfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_ssymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | b, | ||
| int | ldb, | ||
| float * | beta, | ||
| float * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().
Referenced by bli_ssymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_ssymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_ssymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_ssyr2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_sallocm(), bli_scopymt(), bli_screate_contigmr(), bli_screate_contigmt(), bli_set_dims_with_trans(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigmr(), bli_ssyr2k_blas(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_sher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
float* a_save = a;
float* b_save = b;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float* a_copy;
float* b_copy;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_copy, inca_copy;
int ldb_copy, incb_copy;
int syr2k_needs_copya = FALSE;
int syr2k_needs_copyb = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_screate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigmt( trans,
m,
k,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_screate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copya = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_trans( trans );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copyb = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copya = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
}
a_copy = a;
lda_copy = lda;
inca_copy = inca;
// There are two cases where we need to copy A column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copya )
{
int m_a;
int n_a;
// Determine the dimensions of A according to the value of trans. We
// need this in order to set the leading dimension of the copy of A.
bli_set_dims_with_trans( trans, m, k, &m_a, &n_a );
// We need a temporary matrix to hold a column-major copy of A.
a_copy = bli_sallocm( m, k );
lda_copy = m_a;
inca_copy = 1;
// Copy the contents of A into A_copy.
bli_scopymt( BLIS_NO_TRANSPOSE,
m_a,
n_a,
a, inca, lda,
a_copy, inca_copy, lda_copy );
}
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to copy B column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copyb )
{
int m_b;
int n_b;
// Determine the dimensions of B according to the value of trans. We
// need this in order to set the leading dimension of the copy of B.
bli_set_dims_with_trans( trans, m, k, &m_b, &n_b );
// We need a temporary matrix to hold a column-major copy of B.
b_copy = bli_sallocm( m, k );
ldb_copy = m_b;
incb_copy = 1;
// Copy the contents of B into B_copy.
bli_scopymt( BLIS_NO_TRANSPOSE,
m_b,
n_b,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
bli_ssyr2k_blas( uplo,
trans,
m,
k,
alpha,
a_copy, lda_copy,
b_copy, ldb_copy,
beta,
c, ldc );
if ( syr2k_needs_copya )
bli_sfree( a_copy );
if ( syr2k_needs_copyb )
bli_sfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_sfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_ssyr2k_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | b, | ||
| int | ldb, | ||
| float * | beta, | ||
| float * | c, | ||
| int | ldc | ||
| ) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), BLIS_TRANSPOSE, cblas_ssyr2k(), CblasColMajor, and F77_ssyr2k().
Referenced by bli_ssyr2k().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_ssyr2k( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_ssyr2k( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_ssyrk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_screate_contigmr(), bli_screate_contigmt(), bli_sfree_contigm(), bli_sfree_saved_contigmr(), bli_ssyrk_blas(), and bli_zero_dim2().
Referenced by bli_sherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_ops_var1().
{
uplo_t uplo_save = uplo;
int m_save = m;
float* a_save = a;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
int lda, inca;
int ldc, incc;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_screate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c^T
// effective operation: uplo( C_c ) += A_c * A_c^T
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r^T
// effective operation: uplo( C_c ) += A_c^T * A_c
bli_swap_ints( lda, inca );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c^T
// effective operation: ~uplo( C_c ) += A_c * A_c^T
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r^T
// effective operation: ~uplo( C_c ) += A_c^T * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
bli_ssyrk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_ssyrk_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | beta, | ||
| float * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ssyrk(), CblasColMajor, and F77_ssyrk().
Referenced by bli_ssyrk().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_ssyrk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_ssyrk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bli_strmm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_col_storage(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_strmm_blas(), and bli_zero_dim2().
Referenced by bli_strmmsx(), and FLA_Trmm_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int dim_a;
int lda, inca;
int ldb, incb;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_screate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
bli_strmm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, lda,
b, ldb );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_strmm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_strmm(), CblasColMajor, and F77_strmm().
Referenced by bli_strmm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_strmm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
*alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_strmm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_strmmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_strmm(), bli_zero_dim2(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trmmsx_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float one = bli_s1();
float* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_screate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_screate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_sallocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_scopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_strmm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_sscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_saxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_sfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_sfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_strsm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_col_storage(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_strsm_blas(), and bli_zero_dim2().
Referenced by bli_strsmsx(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_piv_ops_var3(), and FLA_Trsm_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int dim_a;
int lda, inca;
int ldb, incb;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_screate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
bli_strsm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, lda,
b, ldb );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_strsm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | lda, | ||
| float * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_strsm(), CblasColMajor, and F77_strsm().
Referenced by bli_strsm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_strsm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
*alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_strsm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_strsmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_strsm(), bli_zero_dim2(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trsmsx_external().
{
int m_save = m;
int n_save = n;
float* a_save = a;
float* b_save = b;
float* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float one = bli_s1();
float* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_screate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_screate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_screate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_sallocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_scopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_strsm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_sscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_saxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_sfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_sfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_sfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zgemm | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | k, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_is_conjnotrans(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zconjm(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmt(), bli_zero_dim3(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zgemm_blas(), bli_zscalm(), BLIS_CONJ_NO_TRANSPOSE, BLIS_NO_CONJUGATE, and BLIS_TRANSPOSE.
Referenced by FLA_Gemm_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex zero = bli_z0();
dcomplex one = bli_z1();
dcomplex* a_unswap;
dcomplex* b_unswap;
dcomplex* a_conj;
dcomplex* b_conj;
dcomplex* c_trans;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_conj, inca_conj;
int ldb_conj, incb_conj;
int ldc_trans, incc_trans;
int m_gemm, n_gemm;
int gemm_needs_axpyt = FALSE;
int a_was_copied;
int b_was_copied;
// Return early if possible.
if ( bli_zero_dim3( m, k, n ) )
{
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
return;
}
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_zcreate_contigmt( transa,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigmt( transb,
k,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Figure out whether A and/or B was copied to contiguous memory. This
// is used later to prevent redundant copying.
a_was_copied = ( a != a_save );
b_was_copied = ( b != b_save );
// These are used to track the original values of a and b prior to any
// operand swapping that might take place. This is necessary for proper
// freeing of memory when one is a temporary contiguous matrix.
a_unswap = a;
b_unswap = b;
// These are used to track the dimensions of the product of the
// A and B operands to the BLAS invocation of gemm. These differ
// from m and n when the operands need to be swapped.
m_gemm = m;
n_gemm = n;
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_c )
// effective operation: C_c += tr( A_c ) * tr( B_c )
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( A_c ) * tr( B_c )^T
bli_swap_ints( ldb, incb );
bli_toggle_trans( transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( A_r )^T * tr( B_c )
bli_swap_ints( lda, inca );
bli_toggle_trans( transa );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += tr( A_r ) * tr( B_r )
// effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_zswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
gemm_needs_axpyt = TRUE;
bli_swap_ints( m_gemm, n_gemm );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_c )
// effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
gemm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_c ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_toggle_trans( transa );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_zswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_c )
// effective operation: C_c += tr( B_c )^T * tr( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_trans( transb );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_zswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += tr( A_r ) * tr( B_r )
// effective operation: C_c += tr( B_c ) * tr( A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
bli_swap_ints( m_gemm, n_gemm );
bli_zswap_pointers( a, b );
bli_swap_ints( a_was_copied, b_was_copied );
bli_swap_ints( lda, ldb );
bli_swap_ints( inca, incb );
bli_swap_trans( transa, transb );
}
}
}
// We need a temporary matrix for the case where A is conjugated.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
// If transa indicates conjugate-no-transpose and A was not already
// copied, then copy and conjugate it to a temporary matrix. Otherwise,
// if transa indicates conjugate-no-transpose and A was already copied,
// just conjugate it.
if ( bli_is_conjnotrans( transa ) && !a_was_copied )
{
a_conj = bli_zallocm( m_gemm, k );
lda_conj = m_gemm;
inca_conj = 1;
bli_zcopymt( BLIS_CONJ_NO_TRANSPOSE,
m_gemm,
k,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( bli_is_conjnotrans( transa ) && a_was_copied )
{
bli_zconjm( m_gemm,
k,
a_conj, inca_conj, lda_conj );
}
// We need a temporary matrix for the case where B is conjugated.
b_conj = b;
ldb_conj = ldb;
incb_conj = incb;
// If transb indicates conjugate-no-transpose and B was not already
// copied, then copy and conjugate it to a temporary matrix. Otherwise,
// if transb indicates conjugate-no-transpose and B was already copied,
// just conjugate it.
if ( bli_is_conjnotrans( transb ) && !b_was_copied )
{
b_conj = bli_zallocm( k, n_gemm );
ldb_conj = k;
incb_conj = 1;
bli_zcopymt( BLIS_CONJ_NO_TRANSPOSE,
k,
n_gemm,
b, incb, ldb,
b_conj, incb_conj, ldb_conj );
}
else if ( bli_is_conjnotrans( transb ) && b_was_copied )
{
bli_zconjm( k,
n_gemm,
b_conj, incb_conj, ldb_conj );
}
// There are two cases where we need to perform the gemm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( gemm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, while m_gemm and n_gemm are the
// dimensions of the actual product op(A)*op(B), which may be n-by-m
// since the operands may have been swapped.
c_trans = bli_zallocm( m_gemm, n_gemm );
ldc_trans = m_gemm;
incc_trans = 1;
// Compute tr( A ) * tr( B ), where A and B may have been swapped
// to reference the other, and store the result in C_trans.
bli_zgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a_conj, lda_conj,
b_conj, ldb_conj,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_zaxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_zfree( c_trans );
}
else // no extra axpyt step needed
{
bli_zgemm_blas( transa,
transb,
m_gemm,
n_gemm,
k,
alpha,
a_conj, lda_conj,
b_conj, ldb_conj,
beta,
c, ldc );
}
if ( bli_is_conjnotrans( transa ) && !a_was_copied )
bli_zfree( a_conj );
if ( bli_is_conjnotrans( transb ) && !b_was_copied )
bli_zfree( b_conj );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a_unswap, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b_unswap, &b_rs, &b_cs );
bli_zfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zgemm_blas | ( | trans_t | transa, |
| trans_t | transb, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), cblas_zgemm(), CblasColMajor, and F77_zgemm().
Referenced by bli_zgemm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_TRANSPOSE cblas_transa;
enum CBLAS_TRANSPOSE cblas_transb;
bli_param_map_to_netlib_trans( transa, &cblas_transa );
bli_param_map_to_netlib_trans( transb, &cblas_transb );
cblas_zgemm( cblas_order,
cblas_transa,
cblas_transb,
m,
n,
k,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_transa;
char blas_transb;
bli_param_map_to_netlib_trans( transa, &blas_transa );
bli_param_map_to_netlib_trans( transb, &blas_transb );
F77_zgemm( &blas_transa,
&blas_transb,
&m,
&n,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_zhemm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zconjmr(), bli_zcopymrt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zhemm_blas(), bli_zscalm(), BLIS_CONJ_NO_TRANSPOSE, BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by FLA_Hemm_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex zero = bli_z0();
dcomplex one = bli_z1();
dcomplex* a_conj;
dcomplex* b_copy;
dcomplex* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_conj, inca_conj;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int hemm_needs_conja = FALSE;
int hemm_needs_copyb = FALSE;
int hemm_needs_transb = FALSE;
int hemm_needs_axpyt = FALSE;
int a_was_copied;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Figure out whether A was copied to contiguous memory. This is used to
// prevent redundant copying.
a_was_copied = ( a != a_save );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
hemm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
hemm_needs_conja = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
hemm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
hemm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
hemm_needs_conja = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
hemm_needs_copyb = TRUE;
hemm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where A is conjugated.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
if ( hemm_needs_conja && !a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
a_conj = bli_zallocm( dim_a, dim_a );
lda_conj = dim_a;
inca_conj = 1;
bli_zcopymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
dim_a,
dim_a,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( hemm_needs_conja && a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zconjmr( uplo,
dim_a,
dim_a,
a_conj, inca_conj, lda_conj );
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( hemm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( hemm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_zallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_zcopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the hemm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( hemm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_zallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_zhemm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_zaxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_zfree( c_trans );
}
else // no extra axpyt step needed
{
bli_zhemm_blas( side,
uplo,
m,
n,
alpha,
a_conj, lda_conj,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( hemm_needs_conja && !a_was_copied )
bli_zfree( a_conj );
if ( hemm_needs_copyb )
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zhemm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_zhemm(), CblasColMajor, and F77_zhemm().
Referenced by bli_zhemm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_zhemm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_zhemm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_zher2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| double * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d0(), bli_is_col_storage(), bli_set_dims_with_trans(), bli_z1(), bli_zallocm(), bli_zaxpymrt(), bli_zcopymt(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zdscalmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), bli_zher2k_blas(), BLIS_CONJ_NO_TRANSPOSE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Her2k_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double zero_r = bli_d0();
dcomplex one = bli_z1();
dcomplex alpha_copy;
dcomplex* a_copy;
dcomplex* b_copy;
dcomplex* c_conj;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_copy, inca_copy;
int ldb_copy, incb_copy;
int ldc_conj, incc_conj;
int her2k_needs_copya = FALSE;
int her2k_needs_copyb = FALSE;
int her2k_needs_conj = FALSE;
int her2k_needs_alpha_conj = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_zcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigmt( trans,
m,
k,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
her2k_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
her2k_needs_copya = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_conjtrans( trans );
her2k_needs_conj = TRUE;
her2k_needs_alpha_conj = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
her2k_needs_conj = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
her2k_needs_copyb = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
her2k_needs_conj = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
her2k_needs_copya = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
her2k_needs_conj = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
bli_toggle_conjtrans( trans );
her2k_needs_alpha_conj = TRUE;
}
}
}
// Make a copy of alpha and conjugate if necessary.
alpha_copy = *alpha;
if ( her2k_needs_alpha_conj )
{
bli_zconjs( &alpha_copy );
}
a_copy = a;
lda_copy = lda;
inca_copy = inca;
// There are two cases where we need to copy A column-major storage.
// We handle those two cases here.
if ( her2k_needs_copya )
{
int m_a;
int n_a;
// Determine the dimensions of A according to the value of trans. We
// need this in order to set the leading dimension of the copy of A.
bli_set_dims_with_trans( trans, m, k, &m_a, &n_a );
// We need a temporary matrix to hold a column-major copy of A.
a_copy = bli_zallocm( m, k );
lda_copy = m_a;
inca_copy = 1;
// Copy the contents of A into A_copy.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m_a,
n_a,
a, inca, lda,
a_copy, inca_copy, lda_copy );
}
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to copy B column-major storage.
// We handle those two cases here.
if ( her2k_needs_copyb )
{
int m_b;
int n_b;
// Determine the dimensions of B according to the value of trans. We
// need this in order to set the leading dimension of the copy of B.
bli_set_dims_with_trans( trans, m, k, &m_b, &n_b );
// We need a temporary matrix to hold a column-major copy of B.
b_copy = bli_zallocm( m, k );
ldb_copy = m_b;
incb_copy = 1;
// Copy the contents of B into B_copy.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m_b,
n_b,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the rank-2k product and
// then axpy the result into C with a conjugation. We handle those two
// cases here.
if ( her2k_needs_conj )
{
// We need a temporary matrix for holding the rank-k product.
c_conj = bli_zallocm( m, m );
ldc_conj = m;
incc_conj = 1;
// Compute the rank-2k product.
bli_zher2k_blas( uplo,
trans,
m,
k,
&alpha_copy,
a_copy, lda_copy,
b_copy, ldb_copy,
&zero_r,
c_conj, ldc_conj );
// Scale C by beta.
bli_zdscalmr( uplo,
m,
m,
beta,
c, incc, ldc );
// And finally, accumulate the rank-2k product in C_conj into C
// with a conjugation.
bli_zaxpymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
m,
m,
&one,
c_conj, incc_conj, ldc_conj,
c, incc, ldc );
// Free the temporary matrix for C.
bli_zfree( c_conj );
}
else
{
bli_zher2k_blas( uplo,
trans,
m,
k,
&alpha_copy,
a_copy, lda_copy,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( her2k_needs_copya )
bli_zfree( a_copy );
if ( her2k_needs_copyb )
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zher2k_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb, | ||
| double * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zher2k(), CblasColMajor, and F77_zher2k().
Referenced by bli_zher2k().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_zher2k( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
alpha,
a, lda,
b, ldb,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_zher2k( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_zherk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_d0(), bli_is_col_storage(), bli_z1(), bli_zallocm(), bli_zaxpymrt(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zdscalmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), bli_zherk_blas(), and BLIS_CONJ_NO_TRANSPOSE.
Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().
{
uplo_t uplo_save = uplo;
int m_save = m;
dcomplex* a_save = a;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double zero_r = bli_d0();
dcomplex one = bli_z1();
dcomplex* c_conj;
int lda, inca;
int ldc, incc;
int ldc_conj, incc_conj;
int herk_needs_conj = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_zcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c'
// effective operation: uplo( C_c ) += A_c * A_c'
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r'
// effective operation: uplo( C_c ) += conj( A_c' * A_c )
bli_swap_ints( lda, inca );
bli_toggle_conjtrans( trans );
herk_needs_conj = TRUE;
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c'
// effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
herk_needs_conj = TRUE;
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r'
// effective operation: ~uplo( C_c ) += A_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_conjtrans( trans );
}
}
// There are two cases where we need to perform the rank-k product and
// then axpy the result into C with a conjugation. We handle those two
// cases here.
if ( herk_needs_conj )
{
// We need a temporary matrix for holding the rank-k product.
c_conj = bli_zallocm( m, m );
ldc_conj = m;
incc_conj = 1;
// Compute the rank-k product.
bli_zherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
&zero_r,
c_conj, ldc_conj );
// Scale C by beta.
bli_zdscalmr( uplo,
m,
m,
beta,
c, incc, ldc );
// And finally, accumulate the rank-k product in C_conj into C
// with a conjugation.
bli_zaxpymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
m,
m,
&one,
c_conj, incc_conj, ldc_conj,
c, incc, ldc );
// Free the temporary matrix for C.
bli_zfree( c_conj );
}
else
{
bli_zherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
}
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zherk_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| double * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().
Referenced by bli_zherk().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_zherk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_zherk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bli_zsymm | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), bli_zsymm_blas(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, and BLIS_TRANSPOSE.
Referenced by FLA_Symm_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex zero = bli_z0();
dcomplex one = bli_z1();
dcomplex* b_copy;
dcomplex* c_trans;
int dim_a;
int lda, inca;
int ldb, incb;
int ldc, incc;
int ldb_copy, incb_copy;
int ldc_trans, incc_trans;
int symm_needs_copyb = FALSE;
int symm_needs_transb = FALSE;
int symm_needs_axpyt = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_c
// effective operation: C_c += uplo( A_c ) * B_c
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_c ) * B_r
// effective operation: C_c += uplo( A_c ) * B_c
symm_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_c
// effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_c += uplo( A_r ) * B_r
// effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_axpyt = TRUE;
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_c
// effective operation: C_c += ( uplo( A_c ) * B_c )^T
bli_swap_ints( ldc, incc );
bli_swap_ints( m, n );
symm_needs_axpyt = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_c ) * B_r
// effective operation: C_c += B_c * ~uplo( conj( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_c
// effective operation: C_c += B_c^T * ~uplo( A_c )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
symm_needs_copyb = TRUE;
symm_needs_transb = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: C_r += uplo( A_r ) * B_r
// effective operation: C_c += B_c * conj( ~uplo( A_c ) )
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_uplo( uplo );
bli_toggle_side( side );
}
}
}
// We need a temporary matrix for the cases where B needs to be copied.
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to make a copy of B: one where the
// copy's dimensions are transposed from the original B, and one where
// the dimensions are not swapped.
if ( symm_needs_copyb )
{
trans_t transb;
// Set transb, which determines whether or not we need to copy from B
// as if it needs a transposition. If a transposition is needed, then
// m and n and have already been swapped. So in either case m
// represents the leading dimension of the copy.
if ( symm_needs_transb ) transb = BLIS_TRANSPOSE;
else transb = BLIS_NO_TRANSPOSE;
b_copy = bli_zallocm( m, n );
ldb_copy = m;
incb_copy = 1;
bli_zcopymt( transb,
m,
n,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
// There are two cases where we need to perform the symm and then axpy
// the result into C with a transposition. We handle those cases here.
if ( symm_needs_axpyt )
{
// We need a temporary matrix for holding C^T. Notice that m and n
// represent the dimensions of C, and thus C_trans is n-by-m
// (interpreting both as column-major matrices). So the leading
// dimension of the temporary matrix holding C^T is n.
c_trans = bli_zallocm( n, m );
ldc_trans = n;
incc_trans = 1;
// Compute A * B (or B * A) and store the result in C_trans.
// Note that there is no overlap between the axpyt cases and
// the conja/copyb cases, hence the use of a, b, lda, and ldb.
bli_zsymm_blas( side,
uplo,
n,
m,
alpha,
a, lda,
b, ldb,
&zero,
c_trans, ldc_trans );
// Scale C by beta.
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, incc, ldc );
// And finally, accumulate the matrix product in C_trans into C
// with a transpose.
bli_zaxpymt( BLIS_TRANSPOSE,
m,
n,
&one,
c_trans, incc_trans, ldc_trans,
c, incc, ldc );
// Free the temporary matrix for C.
bli_zfree( c_trans );
}
else // no extra axpyt step needed
{
bli_zsymm_blas( side,
uplo,
m,
n,
alpha,
a, lda,
b_copy, ldb_copy,
beta,
c, ldc );
}
if ( symm_needs_copyb )
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zsymm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().
Referenced by bli_zsymm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
cblas_zsymm( cblas_order,
cblas_side,
cblas_uplo,
m,
n,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_side;
char blas_uplo;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
F77_zsymm( &blas_side,
&blas_uplo,
&m,
&n,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_zsyr2k | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_set_dims_with_trans(), bli_zallocm(), bli_zcopymt(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), bli_zsyr2k_blas(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Syr2k_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex* a_copy;
dcomplex* b_copy;
int lda, inca;
int ldb, incb;
int ldc, incc;
int lda_copy, inca_copy;
int ldb_copy, incb_copy;
int syr2k_needs_copya = FALSE;
int syr2k_needs_copyb = FALSE;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_zcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigmt( trans,
m,
k,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copyb = TRUE;
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
// requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
syr2k_needs_copya = TRUE;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
// requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_trans( trans );
}
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copyb = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
if ( bli_is_col_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r'
// requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
syr2k_needs_copya = TRUE;
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
// requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r'
// requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
}
a_copy = a;
lda_copy = lda;
inca_copy = inca;
// There are two cases where we need to copy A column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copya )
{
int m_a;
int n_a;
// Determine the dimensions of A according to the value of trans. We
// need this in order to set the leading dimension of the copy of A.
bli_set_dims_with_trans( trans, m, k, &m_a, &n_a );
// We need a temporary matrix to hold a column-major copy of A.
a_copy = bli_zallocm( m, k );
lda_copy = m_a;
inca_copy = 1;
// Copy the contents of A into A_copy.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m_a,
n_a,
a, inca, lda,
a_copy, inca_copy, lda_copy );
}
b_copy = b;
ldb_copy = ldb;
incb_copy = incb;
// There are two cases where we need to copy B column-major storage.
// We handle those two cases here.
if ( syr2k_needs_copyb )
{
int m_b;
int n_b;
// Determine the dimensions of B according to the value of trans. We
// need this in order to set the leading dimension of the copy of B.
bli_set_dims_with_trans( trans, m, k, &m_b, &n_b );
// We need a temporary matrix to hold a column-major copy of B.
b_copy = bli_zallocm( m, k );
ldb_copy = m_b;
incb_copy = 1;
// Copy the contents of B into B_copy.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m_b,
n_b,
b, incb, ldb,
b_copy, incb_copy, ldb_copy );
}
bli_zsyr2k_blas( uplo,
trans,
m,
k,
alpha,
a_copy, lda_copy,
b_copy, ldb_copy,
beta,
c, ldc );
if ( syr2k_needs_copya )
bli_zfree( a_copy );
if ( syr2k_needs_copyb )
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zsyr2k_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), BLIS_TRANSPOSE, cblas_zsyr2k(), CblasColMajor, and F77_zsyr2k().
Referenced by bli_zsyr2k().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_zsyr2k( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
alpha,
a, lda,
b, ldb,
beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
// BLAS doesn't recognize the conjugate-transposition constant for syr2k,
// so we have to map it down to regular transposition.
if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_zsyr2k( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
b, &ldb,
beta,
c, &ldc );
#endif
}
| void bli_zsyrk | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zero_dim2(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), and bli_zsyrk_blas().
Referenced by FLA_Syrk_external().
{
uplo_t uplo_save = uplo;
int m_save = m;
dcomplex* a_save = a;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
int lda, inca;
int ldc, incc;
// Return early if possible.
if ( bli_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_zcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c^T
// effective operation: uplo( C_c ) += A_c * A_c^T
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r^T
// effective operation: uplo( C_c ) += A_c^T * A_c
bli_swap_ints( lda, inca );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( c_rs, c_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c^T
// effective operation: ~uplo( C_c ) += A_c * A_c^T
bli_swap_ints( ldc, incc );
bli_toggle_uplo( uplo );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r^T
// effective operation: ~uplo( C_c ) += A_c^T * A_c
bli_swap_ints( ldc, incc );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
bli_zsyrk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_zsyrk_blas | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | k, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zsyrk(), CblasColMajor, and F77_zsyrk().
Referenced by bli_zsyrk().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_zsyrk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
F77_zsyrk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bli_ztrmm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_col_storage(), bli_is_conjnotrans(), bli_set_dim_with_side(), bli_zallocm(), bli_zconjmr(), bli_zcopymrt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_ztrmm_blas(), and BLIS_CONJ_NO_TRANSPOSE.
Referenced by bli_ztrmmsx(), and FLA_Trmm_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
dcomplex* a_conj;
int dim_a;
int lda, inca;
int ldb, incb;
int lda_conj, inca_conj;
int a_was_copied;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Figure out whether A was copied to contiguous memory. This is used to
// prevent redundant copying.
a_was_copied = ( a != a_save );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
}
}
// Initialize with values assuming that trans is not conjnotrans.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
// We want to handle the conjnotrans case. The easiest way to do so is
// by making a conjugated copy of A.
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
a_conj = bli_zallocm( dim_a, dim_a );
lda_conj = dim_a;
inca_conj = 1;
bli_zcopymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
dim_a,
dim_a,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( bli_is_conjnotrans( trans ) && a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zconjmr( uplo,
dim_a,
dim_a,
a_conj, inca_conj, lda_conj );
}
bli_ztrmm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a_conj, lda_conj,
b, ldb );
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
bli_zfree( a_conj );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_ztrmm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ztrmm(), CblasColMajor, and F77_ztrmm().
Referenced by bli_ztrmm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_ztrmm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_ztrmm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_ztrmmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), bli_ztrmm(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trmmsx_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex one = bli_z1();
dcomplex* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_zallocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_ztrmm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_zaxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bli_ztrsm | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_col_storage(), bli_is_conjnotrans(), bli_set_dim_with_side(), bli_zallocm(), bli_zconjmr(), bli_zcopymrt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_ztrsm_blas(), and BLIS_CONJ_NO_TRANSPOSE.
Referenced by bli_ztrsmsx(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_piv_opz_var3(), and FLA_Trsm_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
dcomplex* a_conj;
int dim_a;
int lda, inca;
int ldb, incb;
int lda_conj, inca_conj;
int a_was_copied;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
// Figure out whether A was copied to contiguous memory. This is used to
// prevent redundant copying.
a_was_copied = ( a != a_save );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_c ) ) * B_c
// effective operation: B_c := tr( uplo( A_c ) ) * B_c
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_c := tr( uplo( A_r ) ) * B_c
// effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
bli_toggle_trans( trans );
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_col_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_c ) ) * B_r
// effective operation: B_c := B_c * tr( uplo( A_c ) )^T
bli_swap_ints( ldb, incb );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_trans( trans );
}
else // if ( bli_is_row_storage( a_rs, a_cs ) )
{
// requested operation: B_r := tr( uplo( A_r ) ) * B_r
// effective operation: B_c := B_c * tr( ~uplo( A_c ) )
bli_swap_ints( ldb, incb );
bli_swap_ints( lda, inca );
bli_swap_ints( m, n );
bli_toggle_side( side );
bli_toggle_uplo( uplo );
}
}
// Initialize with values assuming that trans is not conjnotrans.
a_conj = a;
lda_conj = lda;
inca_conj = inca;
// We want to handle the conjnotrans case. The easiest way to do so is
// by making a conjugated copy of A.
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
a_conj = bli_zallocm( dim_a, dim_a );
lda_conj = dim_a;
inca_conj = 1;
bli_zcopymrt( uplo,
BLIS_CONJ_NO_TRANSPOSE,
dim_a,
dim_a,
a, inca, lda,
a_conj, inca_conj, lda_conj );
}
else if ( bli_is_conjnotrans( trans ) && a_was_copied )
{
int dim_a;
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zconjmr( uplo,
dim_a,
dim_a,
a_conj, inca_conj, lda_conj );
}
bli_ztrsm_blas( side,
uplo,
trans,
diag,
m,
n,
alpha,
a_conj, lda_conj,
b, ldb );
if ( bli_is_conjnotrans( trans ) && !a_was_copied )
bli_zfree( a_conj );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_saved_contigm( m_save,
n_save,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
}
| void bli_ztrsm_blas | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| dcomplex * | b, | ||
| int | ldb | ||
| ) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ztrsm(), CblasColMajor, and F77_ztrsm().
Referenced by bli_ztrsm().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_SIDE cblas_side;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
enum CBLAS_DIAG cblas_diag;
bli_param_map_to_netlib_side( side, &cblas_side );
bli_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bli_param_map_to_netlib_trans( trans, &cblas_trans );
bli_param_map_to_netlib_diag( diag, &cblas_diag );
cblas_ztrsm( cblas_order,
cblas_side,
cblas_uplo,
cblas_trans,
cblas_diag,
m,
n,
alpha,
a, lda,
b, ldb );
#else
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
bli_param_map_to_netlib_side( side, &blas_side );
bli_param_map_to_netlib_uplo( uplo, &blas_uplo );
bli_param_map_to_netlib_trans( trans, &blas_trans );
bli_param_map_to_netlib_diag( diag, &blas_diag );
F77_ztrsm( &blas_side,
&blas_uplo,
&blas_trans,
&blas_diag,
&m,
&n,
alpha,
a, &lda,
b, &ldb );
#endif
}
| void bli_ztrsmsx | ( | side_t | side, |
| uplo_t | uplo, | ||
| trans_t | trans, | ||
| diag_t | diag, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), bli_ztrsm(), BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.
Referenced by FLA_Trsmsx_external().
{
int m_save = m;
int n_save = n;
dcomplex* a_save = a;
dcomplex* b_save = b;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int b_rs_save = b_rs;
int b_cs_save = b_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
dcomplex one = bli_z1();
dcomplex* b_copy;
int dim_a;
int b_copy_rs, b_copy_cs;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bli_set_dim_with_side( side, m, n, &dim_a );
bli_zcreate_contigmr( uplo,
dim_a,
dim_a,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zcreate_contigm( m,
n,
b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zcreate_contigm( m,
n,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Create a copy of B to use in the computation so the original matrix is
// left untouched.
b_copy = bli_zallocm( m, n );
// Match the strides of B_copy to that of B.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
b_copy_rs = 1;
b_copy_cs = m;
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
b_copy_rs = n;
b_copy_cs = 1;
}
// Copy the contents of B to B_copy.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m,
n,
b, b_rs, b_cs,
b_copy, b_copy_rs, b_copy_cs );
// Perform the operation on B_copy.
bli_ztrsm( side,
uplo,
trans,
diag,
m,
n,
alpha,
a, a_rs, a_cs,
b_copy, b_copy_rs, b_copy_cs );
// Scale C by beta.
bli_zscalm( BLIS_NO_CONJUGATE,
m,
n,
beta,
c, c_rs, c_cs );
// Add B_copy into C.
bli_zaxpymt( BLIS_NO_TRANSPOSE,
m,
n,
&one,
b_copy, b_copy_rs, b_copy_cs,
c, c_rs, c_cs );
// Free the copy of B.
bli_zfree( b_copy );
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bli_zfree_contigm( b_save, b_rs_save, b_cs_save,
&b, &b_rs, &b_cs );
bli_zfree_saved_contigm( m_save,
n_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
1.7.6.1