|
libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
| FLA_Error | FLA_Apply_G_rf_opt_var1 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var1 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var1 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var2 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var2 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var2 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var3 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var3 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var3 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var4 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var4 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var4 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var5 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var5 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var5 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var6 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var6 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var6 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var7 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var7 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var7 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var8 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var8 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var8 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_opt_var9 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ops_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_opz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asm_var9 (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var9 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_asm_var3b (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var3b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_asm_var5b (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var5b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_asm_var6b (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var6b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_asm_var8b (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var8b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bhs_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bhd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bhc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bhz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, FLA_Obj *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_asm_var9b (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_blk_var9b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
| FLA_Error | FLA_Apply_G_rf_bls_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_bld_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blc_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error | FLA_Apply_G_rf_blz_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
| FLA_Error FLA_Apply_G_rf_asc_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_blc_var1().
{
float one = bli_s1();
float zero = bli_s0();
int nG_app = n_A - 1;
int l, j;
float gamma;
float sigma;
scomplex* a1;
scomplex* a2;
scomplex* g1;
scomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_asc( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blc_var2().
{
float one = bli_s1();
float zero = bli_s0();
float gamma;
float sigma;
scomplex* a1;
scomplex* a2;
scomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asc( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asc( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asc( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_blc_var3().
{
float one = bli_s1();
float zero = bli_s0();
float gamma23_k1;
float sigma23_k1;
float gamma34_k1;
float sigma34_k1;
float gamma12_k2;
float sigma12_k2;
float gamma23_k2;
float sigma23_k2;
scomplex* a1;
scomplex* a2;
scomplex* a3;
scomplex* a4;
scomplex* g23_k1;
scomplex* g34_k1;
scomplex* g12_k2;
scomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
//n_iter = nG_app % k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asc( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asc( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asc( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asc_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asc_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asc_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blc_var6().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
scomplex* a1;
scomplex* a2;
scomplex* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asc_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asc_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asc_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blc_var9().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
scomplex* a1;
scomplex* a2;
scomplex* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asc( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asc( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asc_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_bld_var1().
{
double one = bli_d1();
double zero = bli_d0();
int nG_app = n_A - 1;
int l, j;
double gamma;
double sigma;
double* a1;
double* a2;
dcomplex* g1;
dcomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_asd( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bld_var2().
{
double one = bli_d1();
double zero = bli_d0();
double gamma;
double sigma;
double* a1;
double* a2;
dcomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asd( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asd( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asd( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_bld_var3().
{
double one = bli_d1();
double zero = bli_d0();
double gamma23_k1;
double sigma23_k1;
double gamma34_k1;
double sigma34_k1;
double gamma12_k2;
double sigma12_k2;
double gamma23_k2;
double sigma23_k2;
double* a1;
double* a2;
double* a3;
double* a4;
dcomplex* g23_k1;
dcomplex* g34_k1;
dcomplex* g12_k2;
dcomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asd( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asd( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asd( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bld_var3b().
{
double one = bli_d1();
double zero = bli_d0();
double gamma23_k1;
double sigma23_k1;
double gamma34_k1;
double sigma34_k1;
double gamma12_k2;
double sigma12_k2;
double gamma23_k2;
double sigma23_k2;
double* a1;
double* a2;
double* a3;
double* a4;
dcomplex* g23_k1;
dcomplex* g34_k1;
dcomplex* g12_k2;
dcomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
int m_app;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
//n_iter = nG_app % k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asd( m_app,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asd( m_app,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asd( m_app,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asd( m_app,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_app,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asd( m_app,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
m_app = m_A;
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
m_app = m_A;
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asd( m_app,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asd( m_app,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asd( m_app,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
m_app = m_A;
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_app,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asd( m_app,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asd( m_app,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asd_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asd_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asd_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bld_var6().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bld_var6b().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
int m_app;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = m_A;
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = m_A;
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asd_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asd_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asd_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bld_var9().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bld_var9b().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
int m_app;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 3 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
m_app = m_A;
if ( !is_ident12 )
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = m_A;
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var1 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var1(), FLA_Apply_G_rf_asd_var1(), FLA_Apply_G_rf_ass_var1(), FLA_Apply_G_rf_asz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var2 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_asz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var3 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_asz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var3b | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var3b(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_ass_var3b(), FLA_Apply_G_rf_asz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var3b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var3b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var3b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var3b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var4 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asm_var5 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asm_var5b | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asm_var6 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_asz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var6b | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var6b(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_ass_var6b(), FLA_Apply_G_rf_asz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var7 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asm_var8 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asm_var8b | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asm_var9 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_ass_var9(), FLA_Apply_G_rf_asz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var9b | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var9b(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_ass_var9b(), FLA_Apply_G_rf_asz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var9b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var9b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var9b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var9b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_ass_var9(), and FLA_Apply_G_rf_bls_var1().
{
float one = bli_s1();
float zero = bli_s0();
int nG_app = n_A - 1;
int l, j;
float gamma;
float sigma;
float* a1;
float* a2;
scomplex* g1;
scomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_ass( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bls_var2().
{
float one = bli_s1();
float zero = bli_s0();
float gamma;
float sigma;
float* a1;
float* a2;
scomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ass_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_ass( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_ass( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_ass( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_bls_var3().
{
float one = bli_s1();
float zero = bli_s0();
float gamma23_k1;
float sigma23_k1;
float gamma34_k1;
float sigma34_k1;
float gamma12_k2;
float sigma12_k2;
float gamma23_k2;
float sigma23_k2;
float* a1;
float* a2;
float* a3;
float* a4;
scomplex* g23_k1;
scomplex* g34_k1;
scomplex* g12_k2;
scomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ass_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_ass( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_ass( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_ass( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_ass( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_ass( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_ass( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ass( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_ass( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_ass( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_ass( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_ass( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_ass( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bls_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ass_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ass_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ass_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bls_var6().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
float* a1;
float* a2;
float* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ass_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_ass( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_ass( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_ass( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bls_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ass_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ass_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ass_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bls_var9().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
float* a1;
float* a2;
float* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ass_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_ass( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_ass( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ass( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ass( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_ass( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bls_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_asz_var2(), FLA_Apply_G_rf_asz_var3(), FLA_Apply_G_rf_asz_var6(), FLA_Apply_G_rf_asz_var9(), and FLA_Apply_G_rf_blz_var1().
{
double one = bli_d1();
double zero = bli_d0();
int nG_app = n_A - 1;
int l, j;
double gamma;
double sigma;
dcomplex* a1;
dcomplex* a2;
dcomplex* g1;
dcomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_asz( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blz_var2().
{
double one = bli_d1();
double zero = bli_d0();
double gamma;
double sigma;
dcomplex* a1;
dcomplex* a2;
dcomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asz( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asz( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_asz( m_A,
&gamma,
&sigma,
a1, 1,
a2, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_blz_var3().
{
double one = bli_d1();
double zero = bli_d0();
double gamma23_k1;
double sigma23_k1;
double gamma34_k1;
double sigma34_k1;
double gamma12_k2;
double sigma12_k2;
double gamma23_k2;
double sigma23_k2;
dcomplex* a1;
dcomplex* a2;
dcomplex* a3;
dcomplex* a4;
dcomplex* g23_k1;
dcomplex* g34_k1;
dcomplex* g12_k2;
dcomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
//n_iter = nG_app % k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asz( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asz( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_asz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_asz( m_A,
&gamma12_k2,
&sigma12_k2,
a1, 1,
a2, 1 );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k2,
&sigma23_k2,
a2, 1,
a3, 1 );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_asz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, 1,
a2, 1,
a3, 1,
a4, 1 );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_asz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, 1,
a3, 1 );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_asz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, 1,
a4, 1 );
}
else
{
MAC_Apply_G_mx3_asz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, 1,
a3, 1,
a4, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asz_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asz_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asz_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blz_var6().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
dcomplex* a1;
dcomplex* a2;
dcomplex* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
//for ( k = 0, g = nG_app - 1; k < nG_app; k += n_fuse, g -= n_fuse )
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asz_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asz_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_asz_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blz_var9().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
dcomplex* a1;
dcomplex* a2;
dcomplex* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asz( m_A,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asz( m_A,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_asz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bhc_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bhd_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bhs_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bhz_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| FLA_Obj * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asc_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opc_var1( k_G,
FLA_Apply_G_rf_asc_var1( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asc_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opc_var2( k_G,
FLA_Apply_G_rf_asc_var2( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asc_var3().
Referenced by FLA_Apply_G_rf_blk_var3().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opc_var3( k_G,
FLA_Apply_G_rf_asc_var3( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Apply_G_rf_blk_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asc_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opc_var6( k_G,
FLA_Apply_G_rf_asc_var6( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Apply_G_rf_blk_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blc_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asc_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opc_var9( k_G,
FLA_Apply_G_rf_asc_var9( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blc_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Apply_G_rf_blk_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var1( k_G,
FLA_Apply_G_rf_asd_var1( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var2( k_G,
FLA_Apply_G_rf_asd_var2( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var3().
Referenced by FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_v_opd_var1(), FLA_Tevd_v_opd_var1(), and FLA_Tevd_v_opd_var3().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var3( k_G,
FLA_Apply_G_rf_asd_var3( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var3b().
Referenced by FLA_Apply_G_rf_blk_var3b(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opd_var4(), FLA_Tevd_v_opz_var2(), and FLA_Tevd_v_opz_var4().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_behind = i;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var3b( k_G,
FLA_Apply_G_rf_asd_var3b( k_G,
b,
n_A,
i_k,
m_behind,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bld_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bld_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bld_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var6( k_G,
FLA_Apply_G_rf_asd_var6( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var6b().
Referenced by FLA_Apply_G_rf_blk_var6b().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_behind = i;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var6b( k_G,
FLA_Apply_G_rf_asd_var6b( k_G,
b,
n_A,
i_k,
m_behind,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bld_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bld_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bld_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var9( k_G,
FLA_Apply_G_rf_asd_var9( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bld_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asd_var9b().
Referenced by FLA_Apply_G_rf_blk_var9b().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_behind = i;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opd_var9b( k_G,
FLA_Apply_G_rf_asd_var9b( k_G,
b,
n_A,
i_k,
m_behind,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var1 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var1(), FLA_Apply_G_rf_bld_var1(), FLA_Apply_G_rf_bls_var1(), FLA_Apply_G_rf_blz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var2 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var2(), FLA_Apply_G_rf_bld_var2(), FLA_Apply_G_rf_bls_var2(), FLA_Apply_G_rf_blz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var3 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var3(), FLA_Apply_G_rf_bld_var3(), FLA_Apply_G_rf_bls_var3(), FLA_Apply_G_rf_blz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var3b | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var3b(), FLA_Apply_G_rf_bld_var3b(), FLA_Apply_G_rf_bls_var3b(), FLA_Apply_G_rf_blz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var3b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var3b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var3b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var3b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var4 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blk_var5 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blk_var5b | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blk_var6 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var6(), FLA_Apply_G_rf_bld_var6(), FLA_Apply_G_rf_bls_var6(), FLA_Apply_G_rf_blz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var6b | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var6b(), FLA_Apply_G_rf_bld_var6b(), FLA_Apply_G_rf_bls_var6b(), FLA_Apply_G_rf_blz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var6b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var6b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var6b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var6b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var7 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blk_var8 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blk_var8b | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blk_var9 | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var9(), FLA_Apply_G_rf_bld_var9(), FLA_Apply_G_rf_bls_var9(), FLA_Apply_G_rf_blz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blk_var9b | ( | FLA_Obj | G, |
| FLA_Obj | A, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Apply_G_rf_blc_var9b(), FLA_Apply_G_rf_bld_var9b(), FLA_Apply_G_rf_bls_var9b(), FLA_Apply_G_rf_blz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_bls_var9b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_bld_var9b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_blc_var9b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_blz_var9b( k_G,
m_A,
n_A,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A,
b_alg );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var1( k_G,
FLA_Apply_G_rf_ass_var1( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var2( k_G,
FLA_Apply_G_rf_ass_var2( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var3().
Referenced by FLA_Apply_G_rf_blk_var3().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var3( k_G,
FLA_Apply_G_rf_ass_var3( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var3b().
Referenced by FLA_Apply_G_rf_blk_var3b().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_behind = i;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var3b( k_G,
FLA_Apply_G_rf_ass_var3b( k_G,
b,
n_A,
i_k,
m_behind,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bls_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bls_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bls_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var6( k_G,
FLA_Apply_G_rf_ass_var6( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var6b().
Referenced by FLA_Apply_G_rf_blk_var6b().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_behind = i;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var6b( k_G,
FLA_Apply_G_rf_ass_var6b( k_G,
b,
n_A,
i_k,
m_behind,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bls_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bls_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_bls_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var9( k_G,
FLA_Apply_G_rf_ass_var9( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_bls_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_ass_var9b().
Referenced by FLA_Apply_G_rf_blk_var9b().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_behind = i;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_ops_var9b( k_G,
FLA_Apply_G_rf_ass_var9b( k_G,
b,
n_A,
i_k,
m_behind,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asz_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opz_var1( k_G,
FLA_Apply_G_rf_asz_var1( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asz_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opz_var2( k_G,
FLA_Apply_G_rf_asz_var2( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asz_var3().
Referenced by FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_v_opz_var1(), FLA_Tevd_v_opz_var1(), and FLA_Tevd_v_opz_var3().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opz_var3( k_G,
FLA_Apply_G_rf_asz_var3( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var3b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Apply_G_rf_blk_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blz_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blz_var5b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blz_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asz_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opz_var6( k_G,
FLA_Apply_G_rf_asz_var6( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Apply_G_rf_blk_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blz_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blz_var8b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
| FLA_Error FLA_Apply_G_rf_blz_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
References FLA_Apply_G_rf_asz_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{
int i;
int b = 0;
for ( i = 0; i < m_A; i += b )
{
dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
int m_ahead = max( 0, m_A - i );
b = min( b_alg, m_ahead );
//FLA_Apply_G_rf_opz_var9( k_G,
FLA_Apply_G_rf_asz_var9( k_G,
b,
n_A,
buff_G, rs_G, cs_G,
A1, rs_A, cs_A );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_blz_var9b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Apply_G_rf_blk_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opc_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opc_var9(), and FLA_Apply_G_rf_opt_var1().
{
float one = bli_s1();
float zero = bli_s0();
int nG_app = n_A - 1;
int l, j;
float gamma;
float sigma;
scomplex* a1;
scomplex* a2;
scomplex* g1;
scomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_opc( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opc_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{
float one = bli_s1();
float zero = bli_s0();
float gamma;
float sigma;
scomplex* a1;
scomplex* a2;
scomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opc( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opc( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opc( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opc_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{
float one = bli_s1();
float zero = bli_s0();
float gamma23_k1;
float sigma23_k1;
float gamma34_k1;
float sigma34_k1;
float gamma12_k2;
float sigma12_k2;
float gamma23_k2;
float sigma23_k2;
scomplex* a1;
scomplex* a2;
scomplex* a3;
scomplex* a4;
scomplex* g23_k1;
scomplex* g34_k1;
scomplex* g12_k2;
scomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opc( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opc( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_opc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_opc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opc( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_opc( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_opc( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_opc( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opc_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opc_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opc_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
scomplex* a1;
scomplex* a2;
scomplex* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opc_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opc_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opc_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
scomplex* a1;
scomplex* a2;
scomplex* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opc( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opc( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opc( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opd_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_opd_var9(), and FLA_Apply_G_rf_opt_var1().
{
double one = bli_d1();
double zero = bli_d0();
int nG_app = n_A - 1;
int l, j;
double gamma;
double sigma;
double* a1;
double* a2;
dcomplex* g1;
dcomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_opd( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opd_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{
double one = bli_d1();
double zero = bli_d0();
double gamma;
double sigma;
double* a1;
double* a2;
dcomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opd( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opd( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opd( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opd_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{
double one = bli_d1();
double zero = bli_d0();
double gamma23_k1;
double sigma23_k1;
double gamma34_k1;
double sigma34_k1;
double gamma12_k2;
double sigma12_k2;
double gamma23_k2;
double sigma23_k2;
double* a1;
double* a2;
double* a3;
double* a4;
dcomplex* g23_k1;
dcomplex* g34_k1;
dcomplex* g12_k2;
dcomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opd( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opd( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_opd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_opd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opd( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_opd( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_opd( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_opd( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opd_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opd_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opd_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opd_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opd_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opd_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opd( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opd( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opd( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ops_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_ops_var9(), and FLA_Apply_G_rf_opt_var1().
{
float one = bli_s1();
float zero = bli_s0();
int nG_app = n_A - 1;
int l, j;
float gamma;
float sigma;
float* a1;
float* a2;
scomplex* g1;
scomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_ops( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ops_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{
float one = bli_s1();
float zero = bli_s0();
float gamma;
float sigma;
float* a1;
float* a2;
scomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ops_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_ops( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_ops( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_ops( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ops_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{
float one = bli_s1();
float zero = bli_s0();
float gamma23_k1;
float sigma23_k1;
float gamma34_k1;
float sigma34_k1;
float gamma12_k2;
float sigma12_k2;
float gamma23_k2;
float sigma23_k2;
float* a1;
float* a2;
float* a3;
float* a4;
scomplex* g23_k1;
scomplex* g34_k1;
scomplex* g12_k2;
scomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ops_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_ops( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_ops( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_ops( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_ops( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_ops( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_ops( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_ops( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_ops( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_ops( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_ops( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_ops( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_ops( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ops_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ops_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ops_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
float* a1;
float* a2;
float* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ops_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_ops( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_ops( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_ops( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ops_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ops_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_ops_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{
float one = bli_s1();
float zero = bli_s0();
float gamma12;
float sigma12;
float gamma23;
float sigma23;
float* a1;
float* a2;
float* a3;
scomplex* g12;
scomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_ops_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_ops( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_ops( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_ops( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_ops( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_ops( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opt_var1 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_opc_var1(), FLA_Apply_G_rf_opd_var1(), FLA_Apply_G_rf_ops_var1(), FLA_Apply_G_rf_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
Referenced by FLA_Apply_G_internal().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ops_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_opd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_opc_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_opz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opt_var2 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ops_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_opd_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_opc_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_opz_var2( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opt_var3 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ops_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_opd_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_opc_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_opz_var3( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opt_var4 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opt_var5 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opt_var6 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_opz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ops_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_opd_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_opc_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_opz_var6( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opt_var7 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opt_var8 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opt_var9 | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_ops_var9(), FLA_Apply_G_rf_opz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ops_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_opd_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_opc_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_opz_var9( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opz_var1 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var1(), FLA_Apply_G_rf_opz_var2(), FLA_Apply_G_rf_opz_var3(), FLA_Apply_G_rf_opz_var6(), and FLA_Apply_G_rf_opz_var9().
{
double one = bli_d1();
double zero = bli_d0();
int nG_app = n_A - 1;
int l, j;
double gamma;
double sigma;
dcomplex* a1;
dcomplex* a2;
dcomplex* g1;
dcomplex* g11;
g1 = buff_G;
for ( l = 0; l < k_G; ++l )
{
a1 = buff_A;
a2 = buff_A + cs_A;
g11 = g1;
for ( j = 0; j < nG_app; ++j )
{
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma != one || sigma != zero )
{
MAC_Apply_G_mx2_opz( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
a1 += cs_A;
a2 += cs_A;
g11 += rs_G;
}
g1 += cs_G;
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opz_var2 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{
double one = bli_d1();
double zero = bli_d0();
double gamma;
double sigma;
dcomplex* a1;
dcomplex* a2;
dcomplex* g11;
int j, g, k;
int nG, nG_app;
int k_minus_1;
k_minus_1 = k_G - 1;
nG = n_A - 1;
// Use the simple variant for nG < 2(k - 1).
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opz( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
for ( k = 0, g = j; k < nG_app; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opz( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
// Shutdown stage
for ( j = nG - k_minus_1; j < nG; ++j )
{
nG_app = nG - j;
for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
{
g11 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma = g11->real;
sigma = g11->imag;
// Skip the current iteration if the rotation is identity.
if ( gamma == one && sigma == zero ) continue;
MAC_Apply_G_mx2_opz( m_A,
&gamma,
&sigma,
a1, rs_A,
a2, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opz_var3 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{
double one = bli_d1();
double zero = bli_d0();
double gamma23_k1;
double sigma23_k1;
double gamma34_k1;
double sigma34_k1;
double gamma12_k2;
double sigma12_k2;
double gamma23_k2;
double sigma23_k2;
dcomplex* a1;
dcomplex* a2;
dcomplex* a3;
dcomplex* a4;
dcomplex* g23_k1;
dcomplex* g34_k1;
dcomplex* g12_k2;
dcomplex* g23_k2;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int k_fuse;
int is_ident23_k1, is_ident34_k1;
int is_ident12_k2, is_ident23_k2;
int has_ident;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
k_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 2;
n_iter = nG_app / k_fuse;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opz( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opz( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_opz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_opz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
//n_left = 1;
//if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app / k_fuse;
n_left = nG_app % k_fuse;
for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
gamma12_k2 = g12_k2->real;
sigma12_k2 = g12_k2->imag;
gamma23_k2 = g23_k2->real;
sigma23_k2 = g23_k2->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
has_ident = ( is_ident23_k1 || is_ident34_k1 ||
is_ident12_k2 || is_ident23_k2 );
if ( has_ident )
{
// Apply to pairs of columns as needed.
if ( !is_ident23_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
if ( !is_ident34_k1 )
MAC_Apply_G_mx2_opz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
if ( !is_ident12_k2 )
MAC_Apply_G_mx2_opz( m_A,
&gamma12_k2,
&sigma12_k2,
a1, rs_A,
a2, rs_A );
if ( !is_ident23_k2 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k2,
&sigma23_k2,
a2, rs_A,
a3, rs_A );
}
else
{
// Apply to all four columns.
MAC_Apply_G_mx4s_opz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
&gamma12_k2,
&sigma12_k2,
&gamma23_k2,
&sigma23_k2,
a1, rs_A,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
if ( n_left == 1 )
{
g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
a4 = buff_A + (g + 2)*cs_A;
gamma23_k1 = g23_k1->real;
sigma23_k1 = g23_k1->imag;
gamma34_k1 = g34_k1->real;
sigma34_k1 = g34_k1->imag;
is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
if ( !is_ident23_k1 && is_ident34_k1 )
{
MAC_Apply_G_mx2_opz( m_A,
&gamma23_k1,
&sigma23_k1,
a2, rs_A,
a3, rs_A );
}
else if ( is_ident23_k1 && !is_ident34_k1 )
{
MAC_Apply_G_mx2_opz( m_A,
&gamma34_k1,
&sigma34_k1,
a3, rs_A,
a4, rs_A );
}
else
{
MAC_Apply_G_mx3_opz( m_A,
&gamma23_k1,
&sigma23_k1,
&gamma34_k1,
&sigma34_k1,
a2, rs_A,
a3, rs_A,
a4, rs_A );
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opz_var4 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opz_var5 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opz_var6 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
dcomplex* a1;
dcomplex* a2;
dcomplex* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < n_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_opz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_opz_var7 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opz_var8 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Apply_G_rf_opz_var9 | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
dcomplex* a1;
dcomplex* a2;
dcomplex* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < 2*k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_opz_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = -1; j < k_minus_1; j += n_fuse )
{
nG_app = j + 1;
n_iter = nG_app;
n_left = 1;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident23 )
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
}
// Pipeline stage
for ( ; j < nG - 1; j += n_fuse )
{
nG_app = k_G;
n_iter = nG_app;
n_left = 0;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
// Shutdown stage
for ( j = nG % n_fuse; j < k_G; j += n_fuse )
{
g = nG - 1;
k = j;
n_left = 1;
if ( n_left == 1 )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
if ( !is_ident12 )
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
++k;
--g;
}
nG_app = k_minus_1 - j;
n_iter = nG_app;
for ( i = 0; i < n_iter; ++i, ++k, --g )
{
g12 = buff_G + (g )*rs_G + (k )*cs_G;
g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
a1 = buff_A + (g )*cs_A;
a2 = buff_A + (g + 1)*cs_A;
a3 = buff_A + (g + 2)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_opz( m_A,
&gamma12,
&sigma12,
a1, rs_A,
a2, rs_A );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_opz( m_A,
&gamma23,
&sigma23,
a2, rs_A,
a3, rs_A );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3_opz( m_A,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, rs_A,
a2, rs_A,
a3, rs_A );
}
}
}
return FLA_SUCCESS;
}
1.7.6.1