|
libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
| void | bli_samax (int n, float *x, int incx, int *index) |
| void | bli_damax (int n, double *x, int incx, int *index) |
| void | bli_camax (int n, scomplex *x, int incx, int *index) |
| void | bli_zamax (int n, dcomplex *x, int incx, int *index) |
| void | bli_sasum (int n, float *x, int incx, float *norm) |
| void | bli_dasum (int n, double *x, int incx, double *norm) |
| void | bli_casum (int n, scomplex *x, int incx, float *norm) |
| void | bli_zasum (int n, dcomplex *x, int incx, double *norm) |
| void | bli_saxpy (int n, float *alpha, float *x, int incx, float *y, int incy) |
| void | bli_daxpy (int n, double *alpha, double *x, int incx, double *y, int incy) |
| void | bli_caxpy (int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy) |
| void | bli_zaxpy (int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_saxpyv (conj_t conj, int n, float *alpha, float *x, int incx, float *y, int incy) |
| void | bli_daxpyv (conj_t conj, int n, double *alpha, double *x, int incx, double *y, int incy) |
| void | bli_caxpyv (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy) |
| void | bli_zaxpyv (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_saxpymt (trans_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_daxpymt (trans_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_caxpymt (trans_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zaxpymt (trans_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_saxpymrt (uplo_t uplo, trans_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_daxpymrt (uplo_t uplo, trans_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_caxpymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zaxpymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_saxpysv (int n, float *alpha0, float *alpha1, float *x, int incx, float *beta, float *y, int incy) |
| void | bli_daxpysv (int n, double *alpha0, double *alpha1, double *x, int incx, double *beta, double *y, int incy) |
| void | bli_caxpysv (int n, scomplex *alpha0, scomplex *alpha1, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy) |
| void | bli_zaxpysv (int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy) |
| void | bli_saxpysmt (trans_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs) |
| void | bli_daxpysmt (trans_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs) |
| void | bli_caxpysmt (trans_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs) |
| void | bli_zaxpysmt (trans_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs) |
| void | bli_sconjv (int m, float *x, int incx) |
| void | bli_dconjv (int m, double *x, int incx) |
| void | bli_cconjv (int m, scomplex *x, int incx) |
| void | bli_zconjv (int m, dcomplex *x, int incx) |
| void | bli_sconjm (int m, int n, float *a, int a_rs, int a_cs) |
| void | bli_dconjm (int m, int n, double *a, int a_rs, int a_cs) |
| void | bli_cconjm (int m, int n, scomplex *a, int a_rs, int a_cs) |
| void | bli_zconjm (int m, int n, dcomplex *a, int a_rs, int a_cs) |
| void | bli_sconjmr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs) |
| void | bli_dconjmr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs) |
| void | bli_cconjmr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs) |
| void | bli_zconjmr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs) |
| void | bli_scopy (int m, float *x, int incx, float *y, int incy) |
| void | bli_dcopy (int m, double *x, int incx, double *y, int incy) |
| void | bli_ccopy (int m, scomplex *x, int incx, scomplex *y, int incy) |
| void | bli_zcopy (int m, dcomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_icopyv (conj_t conj, int m, int *x, int incx, int *y, int incy) |
| void | bli_scopyv (conj_t conj, int m, float *x, int incx, float *y, int incy) |
| void | bli_dcopyv (conj_t conj, int m, double *x, int incx, double *y, int incy) |
| void | bli_ccopyv (conj_t conj, int m, scomplex *x, int incx, scomplex *y, int incy) |
| void | bli_zcopyv (conj_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_sdcopyv (conj_t conj, int m, float *x, int incx, double *y, int incy) |
| void | bli_dscopyv (conj_t conj, int m, double *x, int incx, float *y, int incy) |
| void | bli_sccopyv (conj_t conj, int m, float *x, int incx, scomplex *y, int incy) |
| void | bli_cscopyv (conj_t conj, int m, scomplex *x, int incx, float *y, int incy) |
| void | bli_szcopyv (conj_t conj, int m, float *x, int incx, dcomplex *y, int incy) |
| void | bli_zscopyv (conj_t conj, int m, dcomplex *x, int incx, float *y, int incy) |
| void | bli_dccopyv (conj_t conj, int m, double *x, int incx, scomplex *y, int incy) |
| void | bli_cdcopyv (conj_t conj, int m, scomplex *x, int incx, double *y, int incy) |
| void | bli_dzcopyv (conj_t conj, int m, double *x, int incx, dcomplex *y, int incy) |
| void | bli_zdcopyv (conj_t conj, int m, dcomplex *x, int incx, double *y, int incy) |
| void | bli_czcopyv (conj_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_zccopyv (conj_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy) |
| void | bli_scopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_dcopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_ccopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zcopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_sscopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_sdcopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dscopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_sccopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_cscopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_szcopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zscopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_ddcopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dccopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_cdcopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dzcopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zdcopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_cccopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_czcopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zccopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zzcopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_scopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_dcopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_ccopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zcopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_sscopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_sdcopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_sccopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_szcopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_dscopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_ddcopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dccopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_dzcopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_cscopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_cdcopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_cccopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_czcopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zscopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_zdcopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_zccopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zzcopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_icopymt (trans_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs) |
| void | bli_scopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_dcopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_ccopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zcopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_sscopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_sdcopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dscopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_sccopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_cscopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_szcopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zscopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_ddcopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dccopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_cdcopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_dzcopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zdcopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_cccopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_czcopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_zccopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zzcopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
| void | bli_cdot_in (conj_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho) |
| void | bli_zdot_in (conj_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho) |
| void | bli_sdot (conj_t conj, int n, float *x, int incx, float *y, int incy, float *rho) |
| void | bli_ddot (conj_t conj, int n, double *x, int incx, double *y, int incy, double *rho) |
| void | bli_cdot (conj_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho) |
| void | bli_zdot (conj_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho) |
| void | bli_sdots (conj_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho) |
| void | bli_ddots (conj_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho) |
| void | bli_cdots (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho) |
| void | bli_zdots (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho) |
| void | bli_sdot2s (conj_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho) |
| void | bli_ddot2s (conj_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho) |
| void | bli_cdot2s (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho) |
| void | bli_zdot2s (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho) |
| void | bli_sfnorm (int m, int n, float *a, int a_rs, int a_cs, float *norm) |
| void | bli_dfnorm (int m, int n, double *a, int a_rs, int a_cs, double *norm) |
| void | bli_cfnorm (int m, int n, scomplex *a, int a_rs, int a_cs, float *norm) |
| void | bli_zfnorm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *norm) |
| void | bli_sinvscalv (conj_t conj, int n, float *alpha, float *x, int incx) |
| void | bli_dinvscalv (conj_t conj, int n, double *alpha, double *x, int incx) |
| void | bli_csinvscalv (conj_t conj, int n, float *alpha, scomplex *x, int incx) |
| void | bli_cinvscalv (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx) |
| void | bli_zdinvscalv (conj_t conj, int n, double *alpha, dcomplex *x, int incx) |
| void | bli_zinvscalv (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx) |
| void | bli_sinvscalm (conj_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs) |
| void | bli_dinvscalm (conj_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs) |
| void | bli_csinvscalm (conj_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs) |
| void | bli_cinvscalm (conj_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs) |
| void | bli_zdinvscalm (conj_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs) |
| void | bli_zinvscalm (conj_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs) |
| void | bli_snrm2 (int n, float *x, int incx, float *norm) |
| void | bli_dnrm2 (int n, double *x, int incx, double *norm) |
| void | bli_cnrm2 (int n, scomplex *x, int incx, float *norm) |
| void | bli_znrm2 (int n, dcomplex *x, int incx, double *norm) |
| void | bli_sscal (int n, float *alpha, float *x, int incx) |
| void | bli_dscal (int n, double *alpha, double *x, int incx) |
| void | bli_csscal (int n, float *alpha, scomplex *x, int incx) |
| void | bli_cscal (int n, scomplex *alpha, scomplex *x, int incx) |
| void | bli_zdscal (int n, double *alpha, dcomplex *x, int incx) |
| void | bli_zscal (int n, dcomplex *alpha, dcomplex *x, int incx) |
| void | bli_sscalv (conj_t conj, int n, float *alpha, float *x, int incx) |
| void | bli_dscalv (conj_t conj, int n, double *alpha, double *x, int incx) |
| void | bli_csscalv (conj_t conj, int n, float *alpha, scomplex *x, int incx) |
| void | bli_cscalv (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx) |
| void | bli_zdscalv (conj_t conj, int n, double *alpha, dcomplex *x, int incx) |
| void | bli_zscalv (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx) |
| void | bli_sscalm (conj_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs) |
| void | bli_dscalm (conj_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs) |
| void | bli_csscalm (conj_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs) |
| void | bli_cscalm (conj_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs) |
| void | bli_zdscalm (conj_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs) |
| void | bli_zscalm (conj_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs) |
| void | bli_sscalmr (uplo_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs) |
| void | bli_dscalmr (uplo_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs) |
| void | bli_csscalmr (uplo_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs) |
| void | bli_cscalmr (uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs) |
| void | bli_zdscalmr (uplo_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs) |
| void | bli_zscalmr (uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs) |
| void | bli_sswap (int n, float *x, int incx, float *y, int incy) |
| void | bli_dswap (int n, double *x, int incx, double *y, int incy) |
| void | bli_cswap (int n, scomplex *x, int incx, scomplex *y, int incy) |
| void | bli_zswap (int n, dcomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_sswapv (int n, float *x, int incx, float *y, int incy) |
| void | bli_dswapv (int n, double *x, int incx, double *y, int incy) |
| void | bli_cswapv (int n, scomplex *x, int incx, scomplex *y, int incy) |
| void | bli_zswapv (int n, dcomplex *x, int incx, dcomplex *y, int incy) |
| void | bli_sswapmt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
| void | bli_dswapmt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
| void | bli_cswapmt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
| void | bli_zswapmt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
References cblas_icamax(), and F77_icamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*index = cblas_icamax( n,
x, incx );
#else
*index = F77_icamax( &n,
x, &incx ) - 1;
#endif
}
References cblas_scasum(), and F77_scasum().
Referenced by FLA_Asum_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_scasum( n,
x, incx );
#else
*norm = F77_scasum( &n,
x, &incx );
#endif
}
References cblas_caxpy(), and F77_caxpy().
Referenced by bli_caxpymt(), bli_caxpysmt(), bli_caxpysv(), and bli_caxpyv().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_caxpy( n,
alpha,
x, incx,
y, incy );
#else
F77_caxpy( &n,
alpha,
x, &incx,
y, &incy );
#endif
}
| void bli_caxpymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_caxpyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by bli_cher2k(), bli_cherk(), and FLA_Axpyrt_external().
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_caxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_caxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_caxpymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_cgemm(), bli_chemm(), bli_csymm(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{
scomplex* a_begin;
scomplex* b_begin;
scomplex* a_temp;
int inca_temp;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
if ( bli_does_conj( trans ) )
{
conj_t conj = bli_proj_trans_to_conj( trans );
a_temp = bli_callocv( n_elem );
inca_temp = 1;
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
a_temp, inca_temp );
bli_caxpy( n_elem,
alpha,
a_temp, inca_temp,
b_begin, incb );
}
bli_cfree( a_temp );
}
else // if ( !bli_does_conj( trans ) )
{
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_caxpy( n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_caxpysmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | alpha0, | ||
| scomplex * | alpha1, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | beta, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_cscal(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), BLIS_NO_TRANSPOSE, scomplex::imag, and scomplex::real.
Referenced by FLA_Axpys_external().
{
scomplex* a_begin;
scomplex* b_begin;
scomplex* a_temp;
scomplex alpha_prod;
int inca_temp;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
if ( bli_does_conj( trans ) )
{
conj_t conj = bli_proj_trans_to_conj( trans );
a_temp = bli_callocv( n_elem );
inca_temp = 1;
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
a_temp, inca_temp );
bli_cscal( n_elem,
beta,
b_begin, incb );
bli_caxpy( n_elem,
&alpha_prod,
a_temp, inca_temp,
b_begin, incb );
}
bli_cfree( a_temp );
}
else // if ( !bli_does_conj( trans ) )
{
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cscal( n_elem,
beta,
b_begin, incb );
bli_caxpy( n_elem,
&alpha_prod,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_caxpysv | ( | int | n, |
| scomplex * | alpha0, | ||
| scomplex * | alpha1, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | beta, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_caxpy(), bli_cscal(), bli_zero_dim1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().
{
scomplex alpha_prod;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
bli_cscal( n,
beta,
y, incy );
bli_caxpy( n,
&alpha_prod,
x, incx,
y, incy );
}
| void bli_caxpyv | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_is_conj(), and bli_zero_dim1().
Referenced by bli_caxpymrt(), bli_cgemv(), bli_chemv(), bli_ctrmvsx(), bli_ctrsvsx(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().
{
scomplex* x_copy;
int incx_copy;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
x_copy = x;
incx_copy = incx;
if ( bli_is_conj( conj ) )
{
x_copy = bli_callocv( n );
incx_copy = 1;
bli_ccopyv( conj,
n,
x, incx,
x_copy, incx_copy );
}
bli_caxpy( n,
alpha,
x_copy, incx_copy,
y, incy );
if ( bli_is_conj( conj ) )
bli_cfree( x_copy );
}
| void bli_cccopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_ccopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_ccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_cccopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_ccopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_cccopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_ccopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_cconjm | ( | int | m, |
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_sm1(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_cgemm(), and FLA_Conjugate().
{
float m1 = bli_sm1();
float* a_conj;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
for ( j = 0; j < n_iter; ++j )
{
a_conj = ( float* )( a + j*lda ) + 1;
bli_sscal( n_elem,
&m1,
a_conj, 2*inca );
}
}
| void bli_cconjmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_sm1(), bli_sscal(), and bli_zero_dim2().
Referenced by bli_chemm(), bli_ctrmm(), bli_ctrsm(), and FLA_Conjugate_r().
{
float m1 = bli_sm1();
float* a_conj;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; ++j )
{
n_elem = bli_min( j + 1, n_elem_max );
a_conj = ( float* )( a + j*lda ) + 1;
bli_sscal( n_elem,
&m1,
a_conj, 2*inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; ++j )
{
n_elem = bli_max( 0, n_elem_max - j );
a_conj = ( float* )( a + j*lda + j*inca ) + 1;
if ( n_elem <= 0 ) break;
bli_sscal( n_elem,
&m1,
a_conj, 2*inca );
}
}
}
| void bli_cconjv | ( | int | m, |
| scomplex * | x, | ||
| int | incx | ||
| ) |
References bli_sm1(), and bli_sscal().
Referenced by bli_ccopymt(), bli_ccopyv(), bli_cgemv(), bli_cswapmt(), bli_zccopyv(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Househ2_UT_r_opc(), and FLA_LQ_UT_form_Q_opc_var1().
References cblas_ccopy(), and F77_ccopy().
Referenced by bli_ccopymr(), bli_ccopymt(), bli_ccopyv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_ccopy( m,
x, incx,
y, incy );
#else
F77_ccopy( &m,
x, &incx,
y, &incy );
#endif
}
| void bli_ccopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_ccopy(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().
Referenced by bli_ccreate_contigmr(), bli_cfree_saved_contigmr(), and FLA_Copyr_external().
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if A and B are both row-major, then let's access the
// matrices by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_ccopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_ccopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_ccopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by bli_chemm(), bli_ctrmm(), bli_ctrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_ccopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cconjv(), bli_ccopy(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_ccreate_contigm(), bli_ccreate_contigmt(), bli_cfree_saved_contigm(), bli_cfree_saved_contigmsr(), bli_cgemm(), bli_chemm(), bli_cher2k(), bli_csymm(), bli_csyr2k(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_ccopy( n_elem,
a_begin, inca,
b_begin, incb );
if ( bli_does_conj( trans ) )
bli_cconjv( n_elem,
b_begin, incb );
}
}
| void bli_ccopyv | ( | conj_t | conj, |
| int | m, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_cconjv(), bli_ccopy(), bli_is_conj(), and bli_zero_dim1().
Referenced by bli_caxpymt(), bli_caxpysmt(), bli_caxpyv(), bli_cccopymr(), bli_cccopymrt(), bli_cccopymt(), bli_ccopymrt(), bli_cgemv(), bli_cger(), bli_chemv(), bli_cher(), bli_cher2(), bli_csymmize(), bli_csymv_blas(), bli_csyr2_blas(), bli_csyr_blas(), bli_ctrmv(), bli_ctrmvsx(), bli_ctrsv(), bli_ctrsvsx(), FLA_Accum_T_UT_fc_opc_var1(), FLA_Accum_T_UT_fr_opc_var1(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_CAQR2_UT_opc_var1(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var3(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_LQ_UT_opc_var2(), FLA_QR_UT_opc_var2(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), and FLA_Tridiag_UT_shift_U_l_opc().
{
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
bli_ccopy( m,
x, incx,
y, incy );
if ( bli_is_conj( conj ) )
bli_cconjv( m,
y, incy );
}
| void bli_cdcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cdcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
scomplex* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cdcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_cdcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_cdcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cdcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
scomplex* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_cdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_cdcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cdcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
scomplex* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_cdcopyv | ( | conj_t | conj, |
| int | m, | ||
| scomplex * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), and scomplex::real.
Referenced by bli_cdcopymr(), bli_cdcopymrt(), and bli_cdcopymt().
{
scomplex* chi;
double* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = chi->real;
chi += incx;
psi += incy;
}
}
| void bli_cdot | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy, | ||
| scomplex * | rho | ||
| ) |
References bli_cdot_in(), bli_is_conj(), cblas_cdotc_sub(), and cblas_cdotu_sub().
Referenced by bli_cdot2s(), bli_cdots(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Sylv_hh_opc_var1(), FLA_Sylv_hn_opc_var1(), FLA_Sylv_nh_opc_var1(), FLA_Sylv_nn_opc_var1(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
if ( bli_is_conj( conj ) )
{
cblas_cdotc_sub( n,
x, incx,
y, incy,
rho );
}
else // if ( !bli_is_conj( conj ) )
{
cblas_cdotu_sub( n,
x, incx,
y, incy,
rho );
}
#else
bli_cdot_in( conj,
n,
x, incx,
y, incy,
rho );
#endif
}
| void bli_cdot2s | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy, | ||
| scomplex * | beta, | ||
| scomplex * | rho | ||
| ) |
References bli_cdot(), scomplex::imag, and scomplex::real.
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), and FLA_Lyap_n_opc_var3().
{
scomplex dotxy;
scomplex dotyx;
scomplex alpha_d = *alpha;
scomplex alphac_d = *alpha;
scomplex beta_d = *beta;
scomplex rho_d = *rho;
alphac_d.imag *= -1.0F;
bli_cdot( conj,
n,
x, incx,
y, incy,
&dotxy );
bli_cdot( conj,
n,
y, incy,
x, incx,
&dotyx );
rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag +
alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag +
alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag;
rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real +
alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real +
alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real;
}
| void bli_cdot_in | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy, | ||
| scomplex * | rho | ||
| ) |
References bli_is_conj(), scomplex::imag, and scomplex::real.
Referenced by bli_cdot().
{
scomplex* xip;
scomplex* yip;
scomplex xi;
scomplex yi;
scomplex rho_temp;
int i;
rho_temp.real = 0.0F;
rho_temp.imag = 0.0F;
xip = x;
yip = y;
if ( bli_is_conj( conj ) )
{
for ( i = 0; i < n; ++i )
{
xi.real = xip->real;
xi.imag = xip->imag;
yi.real = yip->real;
yi.imag = yip->imag;
rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;
xip += incx;
yip += incy;
}
}
else // if ( !bli_is_conj( conj ) )
{
for ( i = 0; i < n; ++i )
{
xi.real = xip->real;
xi.imag = xip->imag;
yi.real = yip->real;
yi.imag = yip->imag;
rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;
xip += incx;
yip += incy;
}
}
rho->real = rho_temp.real;
rho->imag = rho_temp.imag;
}
| void bli_cdots | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy, | ||
| scomplex * | beta, | ||
| scomplex * | rho | ||
| ) |
References bli_cdot(), scomplex::imag, and scomplex::real.
Referenced by FLA_Chol_l_opc_var1(), FLA_Chol_l_opc_var2(), FLA_Chol_u_opc_var1(), FLA_Chol_u_opc_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Hess_UT_step_opc_var5(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_Ttmm_l_opc_var2(), FLA_Ttmm_l_opc_var3(), FLA_Ttmm_u_opc_var2(), and FLA_Ttmm_u_opc_var3().
{
scomplex rho_orig = *rho;
scomplex dot_prod;
bli_cdot( conj,
n,
x, incx,
y, incy,
&dot_prod );
rho->real = beta->real * rho_orig.real - beta->imag * rho_orig.imag +
alpha->real * dot_prod.real - alpha->imag * dot_prod.imag;
rho->imag = beta->real * rho_orig.imag + beta->imag * rho_orig.real +
alpha->real * dot_prod.imag + alpha->imag * dot_prod.real;
}
| void bli_cfnorm | ( | int | m, |
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | norm | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), BLIS_NO_TRANSPOSE, scomplex::imag, and scomplex::real.
Referenced by FLA_Norm_frob().
{
scomplex* a_ij;
float sum;
int lda, inca;
int n_iter;
int n_elem;
int i, j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A is a vector separately.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix by
// rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
// Initialize the accumulator variable.
sum = 0.0F;
for ( j = 0; j < n_iter; j++ )
{
for ( i = 0; i < n_elem; i++ )
{
a_ij = a + i*inca + j*lda;
sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
}
}
// Compute the norm and store the result.
*norm = ( float ) sqrt( sum );
}
| void bli_cinvscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_cinvert2s(), bli_cscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{
scomplex alpha_inv;
scomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_ceq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_cinvert2s( conj, alpha, &alpha_inv );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_cscal( n_elem,
&alpha_inv,
a_begin, inca );
}
}
| void bli_cinvscalv | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | x, | ||
| int | incx | ||
| ) |
References bli_cinvert2s(), and bli_cscal().
Referenced by bli_crandmr(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Chol_l_opc_var2(), FLA_Chol_l_opc_var3(), FLA_Chol_u_opc_var2(), FLA_Chol_u_opc_var3(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Househ2_UT_l_opc(), FLA_Househ3UD_UT_opc(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_nopiv_opc_var5(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), FLA_Trinv_ln_opc_var1(), FLA_Trinv_ln_opc_var2(), FLA_Trinv_ln_opc_var3(), FLA_Trinv_un_opc_var1(), FLA_Trinv_un_opc_var2(), and FLA_Trinv_un_opc_var3().
{
scomplex alpha_inv;
if ( bli_ceq1( alpha ) ) return;
bli_cinvert2s( conj, alpha, &alpha_inv );
bli_cscal( n,
&alpha_inv,
x, incx );
}
References cblas_scnrm2(), and F77_scnrm2().
Referenced by FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_l_opc(), FLA_Househ3UD_UT_opc(), and FLA_Nrm2_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_scnrm2( n,
x, incx );
#else
*norm = F77_scnrm2( &n,
x, &incx );
#endif
}
References cblas_cscal(), and F77_cscal().
Referenced by bli_caxpysmt(), bli_caxpysv(), bli_cinvscalm(), bli_cinvscalv(), bli_cscalm(), bli_cscalmr(), bli_cscalv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_cscal( n,
alpha,
x, incx );
#else
F77_cscal( &n,
alpha,
x, &incx );
#endif
}
| void bli_cscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_cscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_cgemm(), bli_chemm(), bli_csymm(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{
scomplex alpha_conj;
scomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_ceq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_ccopys( conj, alpha, &alpha_conj );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_cscal( n_elem,
&alpha_conj,
a_begin, inca );
}
}
| void bli_cscalmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_cscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().
Referenced by FLA_Scalr_external().
{
scomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_ceq1( alpha ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
bli_cscal( n_elem,
alpha,
a_begin, inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
if ( n_elem <= 0 ) break;
bli_cscal( n_elem,
alpha,
a_begin, inca );
}
}
}
| void bli_cscalv | ( | conj_t | conj, |
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | x, | ||
| int | incx | ||
| ) |
References bli_cscal(), and bli_zero_dim1().
Referenced by bli_capdiagmv(), bli_cgemv(), bli_chemv(), bli_ctrmvsx(), bli_ctrsvsx(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_LQ_UT_form_Q_opc_var1(), FLA_QR_UT_form_Q_opc_var1(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Trinv_ln_opc_var4(), FLA_Trinv_lu_opc_var1(), FLA_Trinv_lu_opc_var2(), FLA_Trinv_lu_opc_var3(), FLA_Trinv_lu_opc_var4(), FLA_Trinv_un_opc_var4(), FLA_Trinv_uu_opc_var1(), FLA_Trinv_uu_opc_var2(), FLA_Trinv_uu_opc_var3(), FLA_Trinv_uu_opc_var4(), FLA_Ttmm_l_opc_var1(), FLA_Ttmm_l_opc_var2(), FLA_Ttmm_u_opc_var1(), and FLA_Ttmm_u_opc_var2().
{
scomplex alpha_conj;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
if ( bli_ceq1( alpha ) ) return;
bli_ccopys( conj, alpha, &alpha_conj );
bli_cscal( n,
&alpha_conj,
x, incx );
}
| void bli_cscopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cscopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
scomplex* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cscopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_cscopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_cscopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cscopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
scomplex* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_cscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_cscopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cscopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
scomplex* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_cscopyv | ( | conj_t | conj, |
| int | m, | ||
| scomplex * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), and scomplex::real.
Referenced by bli_cscopymr(), bli_cscopymrt(), and bli_cscopymt().
{
scomplex* chi;
float* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = chi->real;
chi += incx;
psi += incy;
}
}
| void bli_csinvscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_csscal(), bli_is_row_storage(), bli_is_vector(), bli_sinvert2s(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{
float alpha_inv;
scomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_seq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_sinvert2s( conj, alpha, &alpha_inv );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_csscal( n_elem,
&alpha_inv,
a_begin, inca );
}
}
| void bli_csinvscalv | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| scomplex * | x, | ||
| int | incx | ||
| ) |
References bli_csscal().
{
float alpha_inv;
if ( bli_seq1( alpha ) ) return;
alpha_inv = 1.0F / *alpha;
bli_csscal( n,
&alpha_inv,
x, incx );
}
| void bli_csscal | ( | int | n, |
| float * | alpha, | ||
| scomplex * | x, | ||
| int | incx | ||
| ) |
References cblas_csscal(), and F77_csscal().
Referenced by bli_csinvscalm(), bli_csinvscalv(), bli_csscalm(), bli_csscalmr(), and bli_csscalv().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_csscal( n,
*alpha,
x, incx );
#else
F77_csscal( &n,
alpha,
x, &incx );
#endif
}
| void bli_csscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_csscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Scal_external(), and FLA_Scalc_external().
{
float alpha_conj;
scomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_seq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_scopys( conj, alpha, &alpha_conj );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_csscal( n_elem,
&alpha_conj,
a_begin, inca );
}
}
| void bli_csscalmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_csscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().
Referenced by bli_cher2k(), bli_cherk(), and FLA_Scalr_external().
{
scomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_seq1( alpha ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
bli_csscal( n_elem,
alpha,
a_begin, inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
if ( n_elem <= 0 ) break;
bli_csscal( n_elem,
alpha,
a_begin, inca );
}
}
}
| void bli_csscalv | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| scomplex * | x, | ||
| int | incx | ||
| ) |
References bli_csscal(), and bli_zero_dim1().
Referenced by bli_csapdiagmv().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
if ( bli_seq1( alpha ) ) return;
bli_csscal( n,
alpha,
x, incx );
}
References cblas_cswap(), and F77_cswap().
Referenced by bli_cswapmt(), bli_cswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_cswap( n,
x, incx,
y, incy );
#else
F77_cswap( &n,
x, &incx,
y, &incy );
#endif
}
| void bli_cswapmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_cconjv(), bli_cswap(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{
scomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_cswap( n_elem,
a_begin, inca,
b_begin, incb );
if ( bli_does_conj( trans ) )
bli_cconjv( n_elem,
a_begin, inca );
if ( bli_does_conj( trans ) )
bli_cconjv( n_elem,
b_begin, incb );
}
}
| void bli_cswapv | ( | int | n, |
| scomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_cswap(), and bli_zero_dim1().
Referenced by FLA_Apply_pivots_macro_external().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
bli_cswap( n,
x, incx,
y, incy );
}
| void bli_czcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_czcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
scomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_czcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_czcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_czcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_czcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
scomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_czcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_czcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_czcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_czcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
scomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_czcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_czcopyv | ( | conj_t | conj, |
| int | m, | ||
| scomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_is_conj(), bli_zconjv(), bli_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by bli_czcopymr(), bli_czcopymrt(), and bli_czcopymt().
{
scomplex* chi;
dcomplex* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
psi->real = chi->real;
psi->imag = chi->imag;
chi += incx;
psi += incy;
}
if ( bli_is_conj( conj ) )
bli_zconjv( m,
y, incy );
}
| void bli_damax | ( | int | n, |
| double * | x, | ||
| int | incx, | ||
| int * | index | ||
| ) |
References cblas_idamax(), and F77_idamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*index = cblas_idamax( n,
x, incx );
#else
*index = F77_idamax( &n,
x, &incx ) - 1;
#endif
}
| void bli_dasum | ( | int | n, |
| double * | x, | ||
| int | incx, | ||
| double * | norm | ||
| ) |
References cblas_dasum(), and F77_dasum().
Referenced by FLA_Asum_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_dasum( n,
x, incx );
#else
*norm = F77_dasum( &n,
x, &incx );
#endif
}
| void bli_daxpy | ( | int | n, |
| double * | alpha, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References cblas_daxpy(), and F77_daxpy().
Referenced by bli_daxpymt(), bli_daxpysmt(), bli_daxpysv(), and bli_daxpyv().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_daxpy( n,
*alpha,
x, incx,
y, incy );
#else
F77_daxpy( &n,
alpha,
x, &incx,
y, &incy );
#endif
}
| void bli_daxpymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_daxpyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Axpyrt_external().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_daxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_daxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_daxpymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_daxpy(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_dgemm(), bli_dsymm(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_daxpy( n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
| void bli_daxpysmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | alpha0, | ||
| double * | alpha1, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | beta, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_daxpy(), bli_does_notrans(), bli_does_trans(), bli_dscal(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Axpys_external().
{
double* a_begin;
double* b_begin;
double alpha_prod;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
alpha_prod = (*alpha0) * (*alpha1);
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dscal( n_elem,
beta,
b_begin, incb );
bli_daxpy( n_elem,
&alpha_prod,
a_begin, inca,
b_begin, incb );
}
}
| void bli_daxpysv | ( | int | n, |
| double * | alpha0, | ||
| double * | alpha1, | ||
| double * | x, | ||
| int | incx, | ||
| double * | beta, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_daxpy(), bli_dscal(), and bli_zero_dim1().
Referenced by FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().
{
double alpha_prod;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
alpha_prod = (*alpha0) * (*alpha1);
bli_dscal( n,
beta,
y, incy );
bli_daxpy( n,
&alpha_prod,
x, incx,
y, incy );
}
| void bli_daxpyv | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_daxpy().
Referenced by bli_daxpymrt(), bli_dtrmvsx(), bli_dtrsvsx(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_il_opd_var4(), FLA_Eig_gest_il_opd_var5(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_iu_opd_var4(), FLA_Eig_gest_iu_opd_var5(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nl_opd_var4(), FLA_Eig_gest_nl_opd_var5(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Eig_gest_nu_opd_var4(), FLA_Eig_gest_nu_opd_var5(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), and FLA_Tridiag_UT_l_step_opd_var3().
{
bli_daxpy( n,
alpha,
x, incx,
y, incy );
}
| void bli_dccopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dccopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
double* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_dccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dccopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dccopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
double* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_dccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dccopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dccopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
double* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_dccopyv | ( | conj_t | conj, |
| int | m, | ||
| double * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), scomplex::imag, and scomplex::real.
Referenced by bli_dccopymr(), bli_dccopymrt(), and bli_dccopymt().
{
double* chi;
scomplex* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
psi->real = *chi;
psi->imag = 0.0F;
chi += incx;
psi += incy;
}
}
| void bli_dconjm | ( | int | m, |
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
{
return;
}
| void bli_dconjmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
{
return;
}
| void bli_dconjv | ( | int | m, |
| double * | x, | ||
| int | incx | ||
| ) |
Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var3(), and FLA_Bidiag_UT_u_step_opd_var4().
{
return;
}
| void bli_dcopy | ( | int | m, |
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References cblas_dcopy(), and F77_dcopy().
Referenced by bli_dcopymr(), bli_dcopymt(), bli_dcopyv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_dcopy( m,
x, incx,
y, incy );
#else
F77_dcopy( &m,
x, &incx,
y, &incy );
#endif
}
| void bli_dcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcopy(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().
Referenced by bli_dcreate_contigmr(), bli_dfree_saved_contigmr(), and FLA_Copyr_external().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if A and B are both row-major, then let's access the
// matrices by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dcopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_dcopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_dcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcopy(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_dcreate_contigm(), bli_dcreate_contigmt(), bli_dfree_saved_contigm(), bli_dfree_saved_contigmsr(), bli_dsymm(), bli_dsyr2k(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opd_var4().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dcopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_dcopyv | ( | conj_t | conj, |
| int | m, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_dcopy().
Referenced by bli_dcopymrt(), bli_ddcopymr(), bli_ddcopymrt(), bli_ddcopymt(), bli_dsymmize(), bli_dtrmvsx(), bli_dtrsvsx(), FLA_Accum_T_UT_fc_opd_var1(), FLA_Accum_T_UT_fr_opd_var1(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_CAQR2_UT_opd_var1(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var3(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_LQ_UT_opd_var2(), FLA_QR_UT_opd_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), and FLA_Tridiag_UT_shift_U_l_opd().
{
bli_dcopy( m,
x, incx,
y, incy );
}
| void bli_ddcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_dcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_ddcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_dcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_ddcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
References cblas_ddot(), and F77_ddot().
Referenced by bli_ddot2s(), bli_ddots(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_Sylv_hh_opd_var1(), FLA_Sylv_hn_opd_var1(), FLA_Sylv_nh_opd_var1(), FLA_Sylv_nn_opd_var1(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), and FLA_Tridiag_UT_l_step_opd_var3().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*rho = cblas_ddot( n,
x, incx,
y, incy );
#else
*rho = F77_ddot( &n,
x, &incx,
y, &incy );
#endif
}
| void bli_ddot2s | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy, | ||
| double * | beta, | ||
| double * | rho | ||
| ) |
References bli_ddot().
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), and FLA_Lyap_n_opd_var3().
{
double dot;
bli_ddot( conj,
n,
x, incx,
y, incy,
&dot );
*rho = (*beta) * (*rho) + 2.0 * (*alpha) * dot;
}
| void bli_ddots | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy, | ||
| double * | beta, | ||
| double * | rho | ||
| ) |
References bli_ddot().
Referenced by FLA_Chol_l_opd_var1(), FLA_Chol_l_opd_var2(), FLA_Chol_u_opd_var1(), FLA_Chol_u_opd_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opd_var5(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_Ttmm_l_opd_var2(), FLA_Ttmm_l_opd_var3(), FLA_Ttmm_u_opd_var2(), and FLA_Ttmm_u_opd_var3().
{
double dot_prod;
bli_ddot( conj,
n,
x, incx,
y, incy,
&dot_prod );
*rho = (*beta) * (*rho) + (*alpha) * dot_prod;
}
| void bli_dfnorm | ( | int | m, |
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | norm | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Norm_frob().
{
double* a_ij;
double sum;
int lda, inca;
int n_iter;
int n_elem;
int i, j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A is a vector separately.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix by
// rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
// Initialize the accumulator variable.
sum = 0.0;
for ( j = 0; j < n_iter; j++ )
{
for ( i = 0; i < n_elem; i++ )
{
a_ij = a + i*inca + j*lda;
sum += (*a_ij) * (*a_ij);
}
}
// Compute the norm and store the result.
*norm = sqrt( sum );
}
| void bli_dinvscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_dinvert2s(), bli_dscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{
double alpha_inv;
double* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_deq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_dinvert2s( conj, alpha, &alpha_inv );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_dscal( n_elem,
&alpha_inv,
a_begin, inca );
}
}
| void bli_dinvscalv | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| double * | x, | ||
| int | incx | ||
| ) |
References bli_dscal().
Referenced by bli_drandmr(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Chol_l_opd_var2(), FLA_Chol_l_opd_var3(), FLA_Chol_u_opd_var2(), FLA_Chol_u_opd_var3(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_il_opd_var4(), FLA_Eig_gest_il_opd_var5(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_iu_opd_var4(), FLA_Eig_gest_iu_opd_var5(), FLA_Househ2_UT_l_opd(), FLA_Househ3UD_UT_opd(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_nopiv_opd_var5(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), FLA_Trinv_ln_opd_var1(), FLA_Trinv_ln_opd_var2(), FLA_Trinv_ln_opd_var3(), FLA_Trinv_un_opd_var1(), FLA_Trinv_un_opd_var2(), and FLA_Trinv_un_opd_var3().
{
double alpha_inv;
if ( bli_deq1( alpha ) ) return;
alpha_inv = 1.0 / *alpha;
bli_dscal( n,
&alpha_inv,
x, incx );
}
| void bli_dnrm2 | ( | int | n, |
| double * | x, | ||
| int | incx, | ||
| double * | norm | ||
| ) |
References cblas_dnrm2(), and F77_dnrm2().
Referenced by FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_l_opd(), FLA_Househ3UD_UT_opd(), and FLA_Nrm2_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_dnrm2( n,
x, incx );
#else
*norm = F77_dnrm2( &n,
x, &incx );
#endif
}
| void bli_dscal | ( | int | n, |
| double * | alpha, | ||
| double * | x, | ||
| int | incx | ||
| ) |
References cblas_dscal(), and F77_dscal().
Referenced by bli_daxpysmt(), bli_daxpysv(), bli_dinvscalm(), bli_dinvscalv(), bli_dscalm(), bli_dscalmr(), bli_dscalv(), bli_zconjm(), bli_zconjmr(), bli_zconjv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_dscal( n,
*alpha,
x, incx );
#else
F77_dscal( &n,
alpha,
x, &incx );
#endif
}
| void bli_dscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_dscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_dgemm(), bli_dsymm(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{
double alpha_conj;
double* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_deq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_dcopys( conj, alpha, &alpha_conj );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_dscal( n_elem,
&alpha_conj,
a_begin, inca );
}
}
| void bli_dscalmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_dscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().
Referenced by FLA_Scalr_external().
{
double* a_begin;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_deq1( alpha ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
bli_dscal( n_elem,
alpha,
a_begin, inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
if ( n_elem <= 0 ) break;
bli_dscal( n_elem,
alpha,
a_begin, inca );
}
}
}
| void bli_dscalv | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| double * | x, | ||
| int | incx | ||
| ) |
References bli_dscal(), and bli_zero_dim1().
Referenced by bli_dapdiagmv(), bli_dgemv(), bli_dtrmvsx(), bli_dtrsvsx(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nl_opd_var4(), FLA_Eig_gest_nl_opd_var5(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Eig_gest_nu_opd_var4(), FLA_Eig_gest_nu_opd_var5(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_LQ_UT_form_Q_opd_var1(), FLA_QR_UT_form_Q_opd_var1(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Trinv_ln_opd_var4(), FLA_Trinv_lu_opd_var1(), FLA_Trinv_lu_opd_var2(), FLA_Trinv_lu_opd_var3(), FLA_Trinv_lu_opd_var4(), FLA_Trinv_un_opd_var4(), FLA_Trinv_uu_opd_var1(), FLA_Trinv_uu_opd_var2(), FLA_Trinv_uu_opd_var3(), FLA_Trinv_uu_opd_var4(), FLA_Ttmm_l_opd_var1(), FLA_Ttmm_l_opd_var2(), FLA_Ttmm_u_opd_var1(), and FLA_Ttmm_u_opd_var2().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
if ( bli_deq1( alpha ) ) return;
bli_dscal( n,
alpha,
x, incx );
}
| void bli_dscopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dscopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
double* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dscopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_dscopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dscopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_dscopyv(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
double* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_dscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dscopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_dscopyv(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
double* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_dscopyv | ( | conj_t | conj, |
| int | m, | ||
| double * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1().
Referenced by bli_dscopymr(), bli_dscopymrt(), and bli_dscopymt().
{
double* chi;
float* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = *chi;
chi += incx;
psi += incy;
}
}
| void bli_dswap | ( | int | n, |
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References cblas_dswap(), and F77_dswap().
Referenced by bli_dswapmt(), bli_dswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_dswap( n,
x, incx,
y, incy );
#else
F77_dswap( &n,
x, &incx,
y, &incy );
#endif
}
| void bli_dswapmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_notrans(), bli_does_trans(), bli_dswap(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{
double* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dswap( n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_dswapv | ( | int | n, |
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_dswap(), and bli_zero_dim1().
Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_evd_b_opd(), FLA_Sort_evd_f_opd(), FLA_Sort_svd_b_opd(), and FLA_Sort_svd_f_opd().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
bli_dswap( n,
x, incx,
y, incy );
}
| void bli_dzcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_dzcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
double* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dzcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_dzcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dzcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_dzcopyv(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
double* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_dzcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dzcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_dzcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_dzcopyv(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
double* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_dzcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_dzcopyv | ( | conj_t | conj, |
| int | m, | ||
| double * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), dcomplex::imag, and dcomplex::real.
Referenced by bli_dzcopymr(), bli_dzcopymrt(), and bli_dzcopymt().
{
double* chi;
dcomplex* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
psi->real = *chi;
psi->imag = 0.0;
chi += incx;
psi += incy;
}
}
| void bli_icopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| int * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| int * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_notrans(), bli_does_trans(), bli_icopyv(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
int* a_begin;
int* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_icopyv( trans,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_icopyv | ( | conj_t | conj, |
| int | m, | ||
| int * | x, | ||
| int | incx, | ||
| int * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1().
Referenced by bli_icopymt().
{
int* chi;
int* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = *chi;
chi += incx;
psi += incy;
}
}
| void bli_samax | ( | int | n, |
| float * | x, | ||
| int | incx, | ||
| int * | index | ||
| ) |
References cblas_isamax(), and F77_isamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*index = cblas_isamax( n,
x, incx );
#else
*index = F77_isamax( &n,
x, &incx ) - 1;
#endif
}
| void bli_sasum | ( | int | n, |
| float * | x, | ||
| int | incx, | ||
| float * | norm | ||
| ) |
References cblas_sasum(), and F77_sasum().
Referenced by FLA_Asum_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_sasum( n,
x, incx );
#else
*norm = F77_sasum( &n,
x, &incx );
#endif
}
| void bli_saxpy | ( | int | n, |
| float * | alpha, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References cblas_saxpy(), and F77_saxpy().
Referenced by bli_saxpymt(), bli_saxpysmt(), bli_saxpysv(), and bli_saxpyv().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_saxpy( n,
*alpha,
x, incx,
y, incy );
#else
F77_saxpy( &n,
alpha,
x, &incx,
y, &incy );
#endif
}
| void bli_saxpymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_saxpyv(), and bli_zero_dim2().
Referenced by FLA_Axpyrt_external().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_saxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_saxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_saxpymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_saxpy(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_sgemm(), bli_ssymm(), bli_strmmsx(), bli_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_saxpy( n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
| void bli_saxpysmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | alpha0, | ||
| float * | alpha1, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | beta, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_saxpy(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Axpys_external().
{
float* a_begin;
float* b_begin;
float alpha_prod;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
alpha_prod = (*alpha0) * (*alpha1);
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sscal( n_elem,
beta,
b_begin, incb );
bli_saxpy( n_elem,
&alpha_prod,
a_begin, inca,
b_begin, incb );
}
}
| void bli_saxpysv | ( | int | n, |
| float * | alpha0, | ||
| float * | alpha1, | ||
| float * | x, | ||
| int | incx, | ||
| float * | beta, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_saxpy(), bli_sscal(), and bli_zero_dim1().
Referenced by FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().
{
float alpha_prod;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
alpha_prod = (*alpha0) * (*alpha1);
bli_sscal( n,
beta,
y, incy );
bli_saxpy( n,
&alpha_prod,
x, incx,
y, incy );
}
| void bli_saxpyv | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_saxpy().
Referenced by bli_saxpymrt(), bli_strmvsx(), bli_strsvsx(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().
{
bli_saxpy( n,
alpha,
x, incx,
y, incy );
}
| void bli_sccopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_sccopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
float* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_sccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_sccopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_sccopyv(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
float* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_sccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_sccopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_sccopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
float* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_sccopyv | ( | conj_t | conj, |
| int | m, | ||
| float * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), scomplex::imag, and scomplex::real.
Referenced by bli_sccopymr(), bli_sccopymrt(), and bli_sccopymt().
{
float* chi;
scomplex* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
psi->real = *chi;
psi->imag = 0.0F;
chi += incx;
psi += incy;
}
}
| void bli_sconjm | ( | int | m, |
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
{
return;
}
| void bli_sconjmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
{
return;
}
| void bli_sconjv | ( | int | m, |
| float * | x, | ||
| int | incx | ||
| ) |
Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var3(), and FLA_Bidiag_UT_u_step_ops_var4().
{
return;
}
| void bli_scopy | ( | int | m, |
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References cblas_scopy(), and F77_scopy().
Referenced by bli_scopymr(), bli_scopymt(), bli_scopyv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_scopy( m,
x, incx,
y, incy );
#else
F77_scopy( &m,
x, &incx,
y, &incy );
#endif
}
| void bli_scopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_scopy(), and bli_zero_dim2().
Referenced by bli_screate_contigmr(), bli_sfree_saved_contigmr(), and FLA_Copyr_external().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if A and B are both row-major, then let's access the
// matrices by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_scopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_scopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_scopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_scopyv(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_scopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_scopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_scopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_scopy(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_screate_contigm(), bli_screate_contigmt(), bli_sfree_saved_contigm(), bli_sfree_saved_contigmsr(), bli_ssymm(), bli_ssyr2k(), bli_strmmsx(), bli_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_scopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_scopyv | ( | conj_t | conj, |
| int | m, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_scopy().
Referenced by bli_scopymrt(), bli_sscopymr(), bli_sscopymrt(), bli_sscopymt(), bli_ssymmize(), bli_strmvsx(), bli_strsvsx(), FLA_Accum_T_UT_fc_ops_var1(), FLA_Accum_T_UT_fr_ops_var1(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_CAQR2_UT_ops_var1(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var3(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_LQ_UT_ops_var2(), FLA_QR_UT_ops_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), and FLA_Tridiag_UT_shift_U_l_ops().
{
bli_scopy( m,
x, incx,
y, incy );
}
| void bli_sdcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_sdcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
float* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sdcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_sdcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_sdcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_sdcopyv(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
float* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_sdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_sdcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_sdcopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
float* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_sdcopyv | ( | conj_t | conj, |
| int | m, | ||
| float * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1().
Referenced by bli_sdcopymr(), bli_sdcopymrt(), and bli_sdcopymt().
{
float* chi;
double* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = *chi;
chi += incx;
psi += incy;
}
}
References cblas_sdot(), and F77_sdot().
Referenced by bli_sdot2s(), bli_sdots(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Sylv_hh_ops_var1(), FLA_Sylv_hn_ops_var1(), FLA_Sylv_nh_ops_var1(), FLA_Sylv_nn_ops_var1(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*rho = cblas_sdot( n,
x, incx,
y, incy );
#else
*rho = F77_sdot( &n,
x, &incx,
y, &incy );
#endif
}
| void bli_sdot2s | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy, | ||
| float * | beta, | ||
| float * | rho | ||
| ) |
References bli_sdot().
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), and FLA_Lyap_n_ops_var3().
{
float dot;
bli_sdot( conj,
n,
x, incx,
y, incy,
&dot );
*rho = (*beta) * (*rho) + 2.0F * (*alpha) * dot;
}
| void bli_sdots | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy, | ||
| float * | beta, | ||
| float * | rho | ||
| ) |
References bli_sdot().
Referenced by FLA_Chol_l_ops_var1(), FLA_Chol_l_ops_var2(), FLA_Chol_u_ops_var1(), FLA_Chol_u_ops_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_ops_var5(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_Ttmm_l_ops_var2(), FLA_Ttmm_l_ops_var3(), FLA_Ttmm_u_ops_var2(), and FLA_Ttmm_u_ops_var3().
{
float dot_prod;
bli_sdot( conj,
n,
x, incx,
y, incy,
&dot_prod );
*rho = (*beta) * (*rho) + (*alpha) * dot_prod;
}
| void bli_sfnorm | ( | int | m, |
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | norm | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Norm_frob().
{
float* a_ij;
float sum;
int lda, inca;
int n_iter;
int n_elem;
int i, j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A is a vector separately.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix by
// rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
// Initialize the accumulator variable.
sum = 0.0F;
for ( j = 0; j < n_iter; j++ )
{
for ( i = 0; i < n_elem; i++ )
{
a_ij = a + i*inca + j*lda;
sum += (*a_ij) * (*a_ij);
}
}
// Compute the norm and store the result.
*norm = ( float ) sqrt( sum );
}
| void bli_sinvscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_sinvert2s(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{
float alpha_inv;
float* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_seq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_sinvert2s( conj, alpha, &alpha_inv );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_sscal( n_elem,
&alpha_inv,
a_begin, inca );
}
}
| void bli_sinvscalv | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| float * | x, | ||
| int | incx | ||
| ) |
References bli_sscal().
Referenced by bli_srandmr(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Chol_l_ops_var2(), FLA_Chol_l_ops_var3(), FLA_Chol_u_ops_var2(), FLA_Chol_u_ops_var3(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Househ2_UT_l_ops(), FLA_Househ3UD_UT_ops(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_nopiv_ops_var5(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), FLA_Trinv_ln_ops_var1(), FLA_Trinv_ln_ops_var2(), FLA_Trinv_ln_ops_var3(), FLA_Trinv_un_ops_var1(), FLA_Trinv_un_ops_var2(), and FLA_Trinv_un_ops_var3().
{
float alpha_inv;
if ( bli_seq1( alpha ) ) return;
alpha_inv = 1.0F / *alpha;
bli_sscal( n,
&alpha_inv,
x, incx );
}
| void bli_snrm2 | ( | int | n, |
| float * | x, | ||
| int | incx, | ||
| float * | norm | ||
| ) |
References cblas_snrm2(), and F77_snrm2().
Referenced by FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_l_ops(), FLA_Househ3UD_UT_ops(), and FLA_Nrm2_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_snrm2( n,
x, incx );
#else
*norm = F77_snrm2( &n,
x, &incx );
#endif
}
| void bli_sscal | ( | int | n, |
| float * | alpha, | ||
| float * | x, | ||
| int | incx | ||
| ) |
References cblas_sscal(), and F77_sscal().
Referenced by bli_cconjm(), bli_cconjmr(), bli_cconjv(), bli_saxpysmt(), bli_saxpysv(), bli_sinvscalm(), bli_sinvscalv(), bli_sscalm(), bli_sscalmr(), bli_sscalv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_sscal( n,
*alpha,
x, incx );
#else
F77_sscal( &n,
alpha,
x, &incx );
#endif
}
| void bli_sscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_sgemm(), bli_ssymm(), bli_strmmsx(), bli_strsmsx(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{
float alpha_conj;
float* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_seq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_scopys( conj, alpha, &alpha_conj );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_sscal( n_elem,
&alpha_conj,
a_begin, inca );
}
}
| void bli_sscalmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_sscal(), and bli_zero_dim2().
Referenced by FLA_Scalr_external().
{
float* a_begin;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_seq1( alpha ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
bli_sscal( n_elem,
alpha,
a_begin, inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
if ( n_elem <= 0 ) break;
bli_sscal( n_elem,
alpha,
a_begin, inca );
}
}
}
| void bli_sscalv | ( | conj_t | conj, |
| int | n, | ||
| float * | alpha, | ||
| float * | x, | ||
| int | incx | ||
| ) |
References bli_sscal(), and bli_zero_dim1().
Referenced by bli_sapdiagmv(), bli_sgemv(), bli_strmvsx(), bli_strsvsx(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_LQ_UT_form_Q_ops_var1(), FLA_QR_UT_form_Q_ops_var1(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Trinv_ln_ops_var4(), FLA_Trinv_lu_ops_var1(), FLA_Trinv_lu_ops_var2(), FLA_Trinv_lu_ops_var3(), FLA_Trinv_lu_ops_var4(), FLA_Trinv_un_ops_var4(), FLA_Trinv_uu_ops_var1(), FLA_Trinv_uu_ops_var2(), FLA_Trinv_uu_ops_var3(), FLA_Trinv_uu_ops_var4(), FLA_Ttmm_l_ops_var1(), FLA_Ttmm_l_ops_var2(), FLA_Ttmm_u_ops_var1(), and FLA_Ttmm_u_ops_var2().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
if ( bli_seq1( alpha ) ) return;
bli_sscal( n,
alpha,
x, incx );
}
| void bli_sscopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_scopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_scopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_scopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_sscopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_scopyv(), and bli_zero_dim2().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_scopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_scopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_sscopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_scopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_scopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_sswap | ( | int | n, |
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References cblas_sswap(), and F77_sswap().
Referenced by bli_sswapmt(), bli_sswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_sswap( n,
x, incx,
y, incy );
#else
F77_sswap( &n,
x, &incx,
y, &incy );
#endif
}
| void bli_sswapmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_sswap(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{
float* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_sswap( n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_sswapv | ( | int | n, |
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_sswap(), and bli_zero_dim1().
Referenced by FLA_Apply_pivots_macro_external().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
bli_sswap( n,
x, incx,
y, incy );
}
| void bli_szcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_szcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
float* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_szcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_szcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_szcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_szcopyv(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
float* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_szcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_szcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_szcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_szcopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
float* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_szcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_szcopyv | ( | conj_t | conj, |
| int | m, | ||
| float * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), dcomplex::imag, and dcomplex::real.
Referenced by bli_szcopymr(), bli_szcopymrt(), and bli_szcopymt().
{
float* chi;
dcomplex* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
psi->real = *chi;
psi->imag = 0.0;
chi += incx;
psi += incy;
}
}
References cblas_izamax(), and F77_izamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*index = cblas_izamax( n,
x, incx );
#else
*index = F77_izamax( &n,
x, &incx ) - 1;
#endif
}
References cblas_dzasum(), and F77_dzasum().
Referenced by FLA_Asum_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_dzasum( n,
x, incx );
#else
*norm = F77_dzasum( &n,
x, &incx );
#endif
}
References cblas_zaxpy(), and F77_zaxpy().
Referenced by bli_zaxpymt(), bli_zaxpysmt(), bli_zaxpysv(), and bli_zaxpyv().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_zaxpy( n,
alpha,
x, incx,
y, incy );
#else
F77_zaxpy( &n,
alpha,
x, &incx,
y, &incy );
#endif
}
| void bli_zaxpymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zaxpyv(), and bli_zero_dim2().
Referenced by bli_zher2k(), bli_zherk(), and FLA_Axpyrt_external().
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_zaxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zaxpyv( conj,
n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zaxpymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim2(), bli_zfree(), and BLIS_NO_TRANSPOSE.
Referenced by bli_zgemm(), bli_zhemm(), bli_zsymm(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{
dcomplex* a_begin;
dcomplex* b_begin;
dcomplex* a_temp;
int inca_temp;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
if ( bli_does_conj( trans ) )
{
conj_t conj = bli_proj_trans_to_conj( trans );
a_temp = bli_zallocv( n_elem );
inca_temp = 1;
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
a_temp, inca_temp );
bli_zaxpy( n_elem,
alpha,
a_temp, inca_temp,
b_begin, incb );
}
bli_zfree( a_temp );
}
else // if ( !bli_does_conj( trans ) )
{
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zaxpy( n_elem,
alpha,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zaxpysmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha0, | ||
| dcomplex * | alpha1, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | beta, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim2(), bli_zfree(), bli_zscal(), BLIS_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.
Referenced by FLA_Axpys_external().
{
dcomplex* a_begin;
dcomplex* b_begin;
dcomplex* a_temp;
dcomplex alpha_prod;
int inca_temp;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
// Handle cases where A and B are vectors to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrices by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
if ( bli_does_conj( trans ) )
{
conj_t conj = bli_proj_trans_to_conj( trans );
a_temp = bli_zallocv( n_elem );
inca_temp = 1;
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
a_temp, inca_temp );
bli_zscal( n_elem,
beta,
b_begin, incb );
bli_zaxpy( n_elem,
&alpha_prod,
a_temp, inca_temp,
b_begin, incb );
}
bli_zfree( a_temp );
}
else // if ( !bli_does_conj( trans ) )
{
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zscal( n_elem,
beta,
b_begin, incb );
bli_zaxpy( n_elem,
&alpha_prod,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zaxpysv | ( | int | n, |
| dcomplex * | alpha0, | ||
| dcomplex * | alpha1, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | beta, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_zaxpy(), bli_zero_dim1(), bli_zscal(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().
{
dcomplex alpha_prod;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
bli_zscal( n,
beta,
y, incy );
bli_zaxpy( n,
&alpha_prod,
x, incx,
y, incy );
}
| void bli_zaxpyv | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_is_conj(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim1(), and bli_zfree().
Referenced by bli_zaxpymrt(), bli_zgemv(), bli_zhemv(), bli_ztrmvsx(), bli_ztrsvsx(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().
{
dcomplex* x_copy;
int incx_copy;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
x_copy = x;
incx_copy = incx;
if ( bli_is_conj( conj ) )
{
x_copy = bli_zallocv( n );
incx_copy = 1;
bli_zcopyv( conj,
n,
x, incx,
x_copy, incx_copy );
}
bli_zaxpy( n,
alpha,
x_copy, incx_copy,
y, incy );
if ( bli_is_conj( conj ) )
bli_zfree( x_copy );
}
| void bli_zccopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zccopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
dcomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_zccopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zccopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zccopyv(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
dcomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_zccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zccopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| scomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zccopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
dcomplex* a_begin;
scomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zccopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_zccopyv | ( | conj_t | conj, |
| int | m, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| scomplex * | y, | ||
| int | incy | ||
| ) |
References bli_cconjv(), bli_is_conj(), bli_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by bli_zccopymr(), bli_zccopymrt(), and bli_zccopymt().
{
dcomplex* chi;
scomplex* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
psi->real = chi->real;
psi->imag = chi->imag;
chi += incx;
psi += incy;
}
if ( bli_is_conj( conj ) )
bli_cconjv( m,
y, incy );
}
| void bli_zconjm | ( | int | m, |
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_dm1(), bli_dscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_zgemm(), and FLA_Conjugate().
{
double m1 = bli_dm1();
double* a_conj;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
for ( j = 0; j < n_iter; ++j )
{
a_conj = ( double* )( a + j*lda ) + 1;
bli_dscal( n_elem,
&m1,
a_conj, 2*inca );
}
}
| void bli_zconjmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_dm1(), bli_dscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().
Referenced by bli_zhemm(), bli_ztrmm(), bli_ztrsm(), and FLA_Conjugate_r().
{
double m1 = bli_dm1();
double* a_conj;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; ++j )
{
n_elem = bli_min( j + 1, n_elem_max );
a_conj = ( double* )( a + j*lda ) + 1;
bli_dscal( n_elem,
&m1,
a_conj, 2*inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; ++j )
{
n_elem = bli_max( 0, n_elem_max - j );
a_conj = ( double* )( a + j*lda + j*inca ) + 1;
if ( n_elem <= 0 ) break;
bli_dscal( n_elem,
&m1,
a_conj, 2*inca );
}
}
}
| void bli_zconjv | ( | int | m, |
| dcomplex * | x, | ||
| int | incx | ||
| ) |
References bli_dm1(), and bli_dscal().
Referenced by bli_czcopyv(), bli_zcopymt(), bli_zcopyv(), bli_zgemv(), bli_zswapmt(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Househ2_UT_r_opz(), and FLA_LQ_UT_form_Q_opz_var1().
References cblas_zcopy(), and F77_zcopy().
Referenced by bli_zcopymr(), bli_zcopymt(), bli_zcopyv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_zcopy( m,
x, incx,
y, incy );
#else
F77_zcopy( &m,
x, &incx,
y, &incy );
#endif
}
| void bli_zcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zcopy(), and bli_zero_dim2().
Referenced by bli_zcreate_contigmr(), bli_zfree_saved_contigmr(), bli_zfree_saved_contigmsr(), and FLA_Copyr_external().
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if A and B are both row-major, then let's access the
// matrices by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_zcopy( n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zcopyv(), and bli_zero_dim2().
Referenced by bli_zhemm(), bli_ztrmm(), bli_ztrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zconjv(), bli_zcopy(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by bli_zcreate_contigm(), bli_zcreate_contigmt(), bli_zfree_saved_contigm(), bli_zgemm(), bli_zhemm(), bli_zher2k(), bli_zsymm(), bli_zsyr2k(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), FLA_Tevd_v_opz_var2(), and FLA_Tevd_v_opz_var4().
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopy( n_elem,
a_begin, inca,
b_begin, incb );
if ( bli_does_conj( trans ) )
bli_zconjv( n_elem,
b_begin, incb );
}
}
| void bli_zcopyv | ( | conj_t | conj, |
| int | m, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_is_conj(), bli_zconjv(), bli_zcopy(), and bli_zero_dim1().
Referenced by bli_zaxpymt(), bli_zaxpysmt(), bli_zaxpyv(), bli_zcopymrt(), bli_zgemv(), bli_zger(), bli_zhemv(), bli_zher(), bli_zher2(), bli_zsymmize(), bli_zsymv_blas(), bli_zsyr2_blas(), bli_zsyr_blas(), bli_ztrmv(), bli_ztrmvsx(), bli_ztrsv(), bli_ztrsvsx(), bli_zzcopymr(), bli_zzcopymrt(), bli_zzcopymt(), FLA_Accum_T_UT_fc_opz_var1(), FLA_Accum_T_UT_fr_opz_var1(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_CAQR2_UT_opz_var1(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var3(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_LQ_UT_opz_var2(), FLA_QR_UT_opz_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), and FLA_Tridiag_UT_shift_U_l_opz().
{
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
bli_zcopy( m,
x, incx,
y, incy );
if ( bli_is_conj( conj ) )
bli_zconjv( m,
y, incy );
}
| void bli_zdcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zdcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
dcomplex* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zdcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_zdcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zdcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zdcopyv(), and bli_zero_dim2().
Referenced by FLA_Copyrt_external().
{
dcomplex* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_zdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zdcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zdcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
dcomplex* a_begin;
double* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zdcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_zdcopyv | ( | conj_t | conj, |
| int | m, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), and dcomplex::real.
Referenced by bli_zdcopymr(), bli_zdcopymrt(), and bli_zdcopymt().
{
dcomplex* chi;
double* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = chi->real;
chi += incx;
psi += incy;
}
}
| void bli_zdinvscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_dinvert2s(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zdscal(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{
double alpha_inv;
dcomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_deq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_dinvert2s( conj, alpha, &alpha_inv );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_zdscal( n_elem,
&alpha_inv,
a_begin, inca );
}
}
| void bli_zdinvscalv | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| dcomplex * | x, | ||
| int | incx | ||
| ) |
References bli_zdscal().
{
double alpha_inv;
if ( bli_deq1( alpha ) ) return;
alpha_inv = 1.0 / *alpha;
bli_zdscal( n,
&alpha_inv,
x, incx );
}
| void bli_zdot | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy, | ||
| dcomplex * | rho | ||
| ) |
References bli_is_conj(), bli_zdot_in(), cblas_zdotc_sub(), and cblas_zdotu_sub().
Referenced by bli_zdot2s(), bli_zdots(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Sylv_hh_opz_var1(), FLA_Sylv_hn_opz_var1(), FLA_Sylv_nh_opz_var1(), FLA_Sylv_nn_opz_var1(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
if ( bli_is_conj( conj ) )
{
cblas_zdotc_sub( n,
x, incx,
y, incy,
rho );
}
else // if ( !bli_is_conj( conj ) )
{
cblas_zdotu_sub( n,
x, incx,
y, incy,
rho );
}
#else
bli_zdot_in( conj,
n,
x, incx,
y, incy,
rho );
#endif
}
| void bli_zdot2s | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy, | ||
| dcomplex * | beta, | ||
| dcomplex * | rho | ||
| ) |
References bli_zdot(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), and FLA_Lyap_n_opz_var3().
{
dcomplex dotxy;
dcomplex dotyx;
dcomplex alpha_d = *alpha;
dcomplex alphac_d = *alpha;
dcomplex beta_d = *beta;
dcomplex rho_d = *rho;
alphac_d.imag *= -1.0;
bli_zdot( conj,
n,
x, incx,
y, incy,
&dotxy );
bli_zdot( conj,
n,
y, incy,
x, incx,
&dotyx );
rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag +
alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag +
alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag;
rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real +
alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real +
alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real;
}
| void bli_zdot_in | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy, | ||
| dcomplex * | rho | ||
| ) |
References bli_is_conj(), dcomplex::imag, and dcomplex::real.
Referenced by bli_zdot().
{
dcomplex* xip;
dcomplex* yip;
dcomplex xi;
dcomplex yi;
dcomplex rho_temp;
int i;
rho_temp.real = 0.0;
rho_temp.imag = 0.0;
xip = x;
yip = y;
if ( bli_is_conj( conj ) )
{
for ( i = 0; i < n; ++i )
{
xi.real = xip->real;
xi.imag = xip->imag;
yi.real = yip->real;
yi.imag = yip->imag;
rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;
xip += incx;
yip += incy;
}
}
else // if ( !bli_is_conj( conj ) )
{
for ( i = 0; i < n; ++i )
{
xi.real = xip->real;
xi.imag = xip->imag;
yi.real = yip->real;
yi.imag = yip->imag;
rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;
xip += incx;
yip += incy;
}
}
rho->real = rho_temp.real;
rho->imag = rho_temp.imag;
}
| void bli_zdots | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy, | ||
| dcomplex * | beta, | ||
| dcomplex * | rho | ||
| ) |
References bli_zdot(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Chol_l_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var1(), FLA_Chol_u_opz_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opz_var5(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_l_opz_var3(), FLA_Ttmm_u_opz_var2(), and FLA_Ttmm_u_opz_var3().
{
dcomplex rho_orig = *rho;
dcomplex dot_prod;
bli_zdot( conj,
n,
x, incx,
y, incy,
&dot_prod );
rho->real = beta->real * rho_orig.real - beta->imag * rho_orig.imag +
alpha->real * dot_prod.real - alpha->imag * dot_prod.imag;
rho->imag = beta->real * rho_orig.imag + beta->imag * rho_orig.real +
alpha->real * dot_prod.imag + alpha->imag * dot_prod.real;
}
| void bli_zdscal | ( | int | n, |
| double * | alpha, | ||
| dcomplex * | x, | ||
| int | incx | ||
| ) |
References cblas_zdscal(), and F77_zdscal().
Referenced by bli_zdinvscalm(), bli_zdinvscalv(), bli_zdscalm(), bli_zdscalmr(), and bli_zdscalv().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_zdscal( n,
*alpha,
x, incx );
#else
F77_zdscal( &n,
alpha,
x, &incx );
#endif
}
| void bli_zdscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zdscal(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Scal_external(), and FLA_Scalc_external().
{
double alpha_conj;
dcomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_deq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_dcopys( conj, alpha, &alpha_conj );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_zdscal( n_elem,
&alpha_conj,
a_begin, inca );
}
}
| void bli_zdscalmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zdscal(), and bli_zero_dim2().
Referenced by bli_zher2k(), bli_zherk(), and FLA_Scalr_external().
{
dcomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_deq1( alpha ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
bli_zdscal( n_elem,
alpha,
a_begin, inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
if ( n_elem <= 0 ) break;
bli_zdscal( n_elem,
alpha,
a_begin, inca );
}
}
}
| void bli_zdscalv | ( | conj_t | conj, |
| int | n, | ||
| double * | alpha, | ||
| dcomplex * | x, | ||
| int | incx | ||
| ) |
References bli_zdscal(), and bli_zero_dim1().
Referenced by bli_zdapdiagmv(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
if ( bli_deq1( alpha ) ) return;
bli_zdscal( n,
alpha,
x, incx );
}
| void bli_zfnorm | ( | int | m, |
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | norm | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), BLIS_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.
Referenced by FLA_Norm_frob().
{
dcomplex* a_ij;
double sum;
int lda, inca;
int n_iter;
int n_elem;
int i, j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A is a vector separately.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix by
// rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
// Initialize the accumulator variable.
sum = 0.0;
for ( j = 0; j < n_iter; j++ )
{
for ( i = 0; i < n_elem; i++ )
{
a_ij = a + i*inca + j*lda;
sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
}
}
// Compute the norm and store the result.
*norm = sqrt( sum );
}
| void bli_zinvscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), bli_zinvert2s(), bli_zscal(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{
dcomplex alpha_inv;
dcomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_zeq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_zinvert2s( conj, alpha, &alpha_inv );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_zscal( n_elem,
&alpha_inv,
a_begin, inca );
}
}
| void bli_zinvscalv | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | x, | ||
| int | incx | ||
| ) |
References bli_zinvert2s(), and bli_zscal().
Referenced by bli_zrandmr(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Chol_l_opz_var2(), FLA_Chol_l_opz_var3(), FLA_Chol_u_opz_var2(), FLA_Chol_u_opz_var3(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Househ2_UT_l_opz(), FLA_Househ3UD_UT_opz(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_nopiv_opz_var5(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), FLA_Trinv_ln_opz_var1(), FLA_Trinv_ln_opz_var2(), FLA_Trinv_ln_opz_var3(), FLA_Trinv_un_opz_var1(), FLA_Trinv_un_opz_var2(), and FLA_Trinv_un_opz_var3().
{
dcomplex alpha_inv;
if ( bli_zeq1( alpha ) ) return;
bli_zinvert2s( conj, alpha, &alpha_inv );
bli_zscal( n,
&alpha_inv,
x, incx );
}
References cblas_dznrm2(), and F77_dznrm2().
Referenced by FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_l_opz(), FLA_Househ3UD_UT_opz(), and FLA_Nrm2_external().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
*norm = cblas_dznrm2( n,
x, incx );
#else
*norm = F77_dznrm2( &n,
x, &incx );
#endif
}
References cblas_zscal(), and F77_zscal().
Referenced by bli_zaxpysmt(), bli_zaxpysv(), bli_zinvscalm(), bli_zinvscalv(), bli_zscalm(), bli_zscalmr(), bli_zscalv(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_zscal( n,
alpha,
x, incx );
#else
F77_zscal( &n,
alpha,
x, &incx );
#endif
}
| void bli_zscalm | ( | conj_t | conj, |
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), bli_zscal(), and BLIS_NO_TRANSPOSE.
Referenced by bli_zgemm(), bli_zhemm(), bli_zsymm(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{
dcomplex alpha_conj;
dcomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_zeq1( alpha ) ) return;
// Handle cases where A is a vector to ensure that the underlying axpy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for a vector.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
}
}
bli_zcopys( conj, alpha, &alpha_conj );
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
bli_zscal( n_elem,
&alpha_conj,
a_begin, inca );
}
}
| void bli_zscalmr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and bli_zscal().
Referenced by FLA_Scalr_external().
{
dcomplex* a_begin;
int lda, inca;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
if ( bli_zeq1( alpha ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
// An optimization: if A is row-major, then let's access the matrix
// by rows instead of by columns to increase spatial locality.
if ( bli_is_row_storage( a_rs, a_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
bli_zscal( n_elem,
alpha,
a_begin, inca );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
if ( n_elem <= 0 ) break;
bli_zscal( n_elem,
alpha,
a_begin, inca );
}
}
}
| void bli_zscalv | ( | conj_t | conj, |
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | x, | ||
| int | incx | ||
| ) |
References bli_zero_dim1(), and bli_zscal().
Referenced by bli_zapdiagmv(), bli_zgemv(), bli_zhemv(), bli_ztrmvsx(), bli_ztrsvsx(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_LQ_UT_form_Q_opz_var1(), FLA_QR_UT_form_Q_opz_var1(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), FLA_Trinv_ln_opz_var4(), FLA_Trinv_lu_opz_var1(), FLA_Trinv_lu_opz_var2(), FLA_Trinv_lu_opz_var3(), FLA_Trinv_lu_opz_var4(), FLA_Trinv_un_opz_var4(), FLA_Trinv_uu_opz_var1(), FLA_Trinv_uu_opz_var2(), FLA_Trinv_uu_opz_var3(), FLA_Trinv_uu_opz_var4(), FLA_Ttmm_l_opz_var1(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_u_opz_var1(), and FLA_Ttmm_u_opz_var2().
{
dcomplex alpha_conj;
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
if ( bli_zeq1( alpha ) ) return;
bli_zcopys( conj, alpha, &alpha_conj );
bli_zscal( n,
&alpha_conj,
x, incx );
}
| void bli_zscopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), bli_zscopyv(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copyr_external().
{
dcomplex* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zscopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_zscopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zscopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zero_dim2(), and bli_zscopyv().
Referenced by FLA_Copyrt_external().
{
dcomplex* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_zscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zscopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), bli_zscopyv(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{
dcomplex* a_begin;
float* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zscopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
| void bli_zscopyv | ( | conj_t | conj, |
| int | m, | ||
| dcomplex * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), and dcomplex::real.
Referenced by bli_zscopymr(), bli_zscopymrt(), and bli_zscopymt().
{
dcomplex* chi;
float* psi;
int i;
// Return early if possible.
if ( bli_zero_dim1( m ) ) return;
// Initialize pointers.
chi = x;
psi = y;
for ( i = 0; i < m; ++i )
{
*psi = chi->real;
chi += incx;
psi += incy;
}
}
References cblas_zswap(), and F77_zswap().
Referenced by bli_zswapmt(), bli_zswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
cblas_zswap( n,
x, incx,
y, incy );
#else
F77_zswap( &n,
x, &incx,
y, &incy );
#endif
}
| void bli_zswapmt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zconjv(), bli_zero_dim2(), bli_zswap(), and BLIS_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major and if A is effectively row-major
// after a possible transposition, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
}
for ( j = 0; j < n_iter; j++ )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zswap( n_elem,
a_begin, inca,
b_begin, incb );
if ( bli_does_conj( trans ) )
bli_zconjv( n_elem,
a_begin, inca );
if ( bli_does_conj( trans ) )
bli_zconjv( n_elem,
b_begin, incb );
}
}
| void bli_zswapv | ( | int | n, |
| dcomplex * | x, | ||
| int | incx, | ||
| dcomplex * | y, | ||
| int | incy | ||
| ) |
References bli_zero_dim1(), and bli_zswap().
Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_evd_b_opz(), FLA_Sort_evd_f_opz(), FLA_Sort_svd_b_opz(), and FLA_Sort_svd_f_opz().
{
// Return early if possible.
if ( bli_zero_dim1( n ) ) return;
bli_zswap( n,
x, incx,
y, incy );
}
| void bli_zzcopymr | ( | uplo_t | uplo, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_is_row_storage(), bli_is_upper(), bli_zcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem_max;
int n_elem;
int j;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// We initialize for column-major.
n_iter = n;
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// An optimization: if B is row-major, then let's access the matrix
// by rows instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem_max );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
bli_toggle_uplo( uplo );
}
if ( bli_is_upper( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( bli_is_lower( uplo ) )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_max( 0, n_elem_max - j );
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
if ( n_elem <= 0 ) break;
bli_zcopyv( BLIS_NO_TRANSPOSE,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zzcopymrt | ( | uplo_t | uplo, |
| trans_t | trans, | ||
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zcopyv(), and bli_zero_dim2().
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int n_elem_max;
int n_elem_is_descending;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Initialize variables based on storage format of B and value of uplo.
if ( bli_is_col_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = TRUE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = n;
n_elem_max = bli_min( m, n );
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
n_elem_is_descending = FALSE;
}
}
else // if ( bli_is_row_storage( b_rs, b_cs ) )
{
if ( bli_is_lower( uplo ) )
{
n_iter = m;
n_elem_max = bli_min( m, n );
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = FALSE;
}
else // if ( bli_is_upper( uplo ) )
{
n_iter = bli_min( m, n );
n_elem_max = n;
lda = a_rs;
inca = a_cs;
ldb = b_rs;
incb = b_cs;
n_elem_is_descending = TRUE;
}
}
// Swap lda and inca if we're doing a transpose.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
// Choose the loop based on whether n_elem will be shrinking or growing
// with each iteration.
if ( n_elem_is_descending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = n_elem_max - j;
a_begin = a + j*lda + j*inca;
b_begin = b + j*ldb + j*incb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
else // if ( n_elem_is_ascending )
{
for ( j = 0; j < n_iter; j++ )
{
n_elem = bli_min( j + 1, n_elem_max );
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
}
| void bli_zzcopymt | ( | trans_t | trans, |
| int | m, | ||
| int | n, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| dcomplex * | b, | ||
| int | b_rs, | ||
| int | b_cs | ||
| ) |
References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.
{
dcomplex* a_begin;
dcomplex* b_begin;
int lda, inca;
int ldb, incb;
int n_iter;
int n_elem;
int j;
conj_t conj;
// Return early if possible.
if ( bli_zero_dim2( m, n ) ) return;
// Handle cases where A and B are vectors to ensure that the underlying copy
// gets invoked only once.
if ( bli_is_vector( m, n ) )
{
// Initialize with values appropriate for vectors.
n_iter = 1;
n_elem = bli_vector_dim( m, n );
lda = 1; // multiplied by zero when n_iter == 1; not needed.
inca = bli_vector_inc( trans, m, n, a_rs, a_cs );
ldb = 1; // multiplied by zero when n_iter == 1; not needed.
incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
}
else // matrix case
{
// Initialize with optimal values for column-major storage of B.
n_iter = n;
n_elem = m;
lda = a_cs;
inca = a_rs;
ldb = b_cs;
incb = b_rs;
// Handle the transposition of A.
if ( bli_does_trans( trans ) )
{
bli_swap_ints( lda, inca );
}
// An optimization: if B is row-major, then let's access the matrix by rows
// instead of by columns for increased spatial locality.
if ( bli_is_row_storage( b_rs, b_cs ) )
{
bli_swap_ints( n_iter, n_elem );
bli_swap_ints( lda, inca );
bli_swap_ints( ldb, incb );
}
}
// Extract conj component from trans parameter.
conj = bli_proj_trans_to_conj( trans );
for ( j = 0; j < n_iter; ++j )
{
a_begin = a + j*lda;
b_begin = b + j*ldb;
bli_zcopyv( conj,
n_elem,
a_begin, inca,
b_begin, incb );
}
}
1.7.6.1