|
libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
| FLA_Error | FLA_CAQR2_UT_blk_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl) |
| FLA_Error | FLA_CAQR2_UT_blk_var2 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_caqr2ut_t *cntl) |
| FLA_Error | FLA_CAQR2_UT_unb_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T) |
| FLA_Error | FLA_CAQR2_UT_opt_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T) |
| FLA_Error | FLA_CAQR2_UT_ops_var1 (int m_UT, int m_D, float *U, int rs_U, int cs_U, float *D, int rs_D, int cs_D, float *T, int rs_T, int cs_T) |
| FLA_Error | FLA_CAQR2_UT_opd_var1 (int m_UT, int m_D, double *U, int rs_U, int cs_U, double *D, int rs_D, int cs_D, double *T, int rs_T, int cs_T) |
| FLA_Error | FLA_CAQR2_UT_opc_var1 (int m_UT, int m_D, scomplex *U, int rs_U, int cs_U, scomplex *D, int rs_D, int cs_D, scomplex *T, int rs_T, int cs_T) |
| FLA_Error | FLA_CAQR2_UT_opz_var1 (int m_UT, int m_D, dcomplex *U, int rs_U, int cs_U, dcomplex *D, int rs_D, int cs_D, dcomplex *T, int rs_T, int cs_T) |
| FLA_Error FLA_CAQR2_UT_blk_var1 | ( | FLA_Obj | U, |
| FLA_Obj | D, | ||
| FLA_Obj | T, | ||
| fla_caqr2ut_t * | cntl | ||
| ) |
References FLA_Axpy_internal(), FLA_CAQR2_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_internal(), FLA_Gemm_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Trmm_internal(), and FLA_Trsm_internal().
Referenced by FLA_CAQR2_UT_internal().
{
FLA_Obj UTL, UTR, U00, U01, U02,
UBL, UBR, U10, U11, U12,
U20, U21, U22;
FLA_Obj DTL, DTR, D00, D01, D02,
DBL, DBR, D10, D11, D12,
D20, D21, D22;
FLA_Obj TL, TR, T0, T1, W12;
FLA_Obj D1;
FLA_Obj W12T, W12B;
FLA_Obj T1T, T2B;
dim_t b_alg, b;
dim_t m_DT;
// Query the algorithmic blocksize by inspecting the length of T.
b_alg = FLA_Obj_length( T );
// Begin partitioning diagonally through D with m - n rows above
// the diagonal.
m_DT = FLA_Obj_length( D ) - FLA_Obj_width( D );
FLA_Part_2x2( U, &UTL, &UTR,
&UBL, &UBR, 0, 0, FLA_TL );
FLA_Part_2x2( D, &DTL, &DTR,
&DBL, &DBR, m_DT, 0, FLA_TL );
FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
while ( FLA_Obj_min_dim( UBR ) > 0 ){
b = min( b_alg, FLA_Obj_min_dim( UBR ) );
FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02,
/* ************* */ /* ******************** */
&U10, /**/ &U11, &U12,
UBL, /**/ UBR, &U20, /**/ &U21, &U22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( DTL, /**/ DTR, &D00, /**/ &D01, &D02,
/* ************* */ /* ******************** */
&D10, /**/ &D11, &D12,
DBL, /**/ DBR, &D20, /**/ &D21, &D22,
b, b, FLA_BR );
FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
b, FLA_RIGHT );
/*------------------------------------------------------------*/
// T1T = FLA_Top_part( T1, b );
FLA_Part_2x1( T1, &T1T,
&T2B, b, FLA_TOP );
FLA_Merge_2x1( D01,
D11, &D1 );
// [ U11, ...
// D1, T1 ] = FLA_CAQR2_UT( U11
// D1, T1T );
FLA_CAQR2_UT_internal( U11,
D1, T1T,
FLA_Cntl_sub_caqr2ut( cntl ) );
if ( FLA_Obj_width( U12 ) > 0 )
{
// W12T = FLA_Top_part( W12, b );
FLA_Part_2x1( W12, &W12T,
&W12B, b, FLA_TOP );
// W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 );
// = inv( triu( T1T ) )' * ( U12 + D01' * D02 + D11' * D12 );
FLA_Copy_internal( D12, W12T,
FLA_Cntl_sub_copy( cntl ) );
FLA_Trmm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
FLA_ONE, D11, W12T,
FLA_Cntl_sub_trmm1( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_ONE, D01, D02, FLA_ONE, W12T,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Axpy_internal( FLA_ONE, U12, W12T,
FLA_Cntl_sub_axpy1( cntl ) );
FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
FLA_ONE, T1T, W12T,
FLA_Cntl_sub_trsm( cntl ) );
// U12 = U12 - W12T;
// D2 = D2 - D1 * W12T;
// => D02 = D02 - D01 * W12T;
// D12 = D12 - D11 * W12T;
FLA_Axpy_internal( FLA_MINUS_ONE, W12T, U12,
FLA_Cntl_sub_axpy2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, D01, W12T, FLA_ONE, D02,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Trmm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
FLA_ONE, D11, W12T,
FLA_Cntl_sub_trmm2( cntl ) );
FLA_Axpy_internal( FLA_MINUS_ONE, W12T, D12,
FLA_Cntl_sub_axpy3( cntl ) );
}
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02,
U10, U11, /**/ U12,
/* ************** */ /* ****************** */
&UBL, /**/ &UBR, U20, U21, /**/ U22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR, D00, D01, /**/ D02,
D10, D11, /**/ D12,
/* ************** */ /* ****************** */
&DBL, /**/ &DBR, D20, D21, /**/ D22,
FLA_TL );
FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
FLA_LEFT );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_blk_var2 | ( | FLA_Obj | U, |
| FLA_Obj | D, | ||
| FLA_Obj | T, | ||
| fla_caqr2ut_t * | cntl | ||
| ) |
References FLA_CAQR2_UT_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Obj_length(), FLA_Part_2x1(), and FLA_Repart_2x1_to_3x1().
Referenced by FLA_CAQR2_UT_internal().
{
FLA_Obj DT, D0,
DB, D1,
D2;
FLA_Obj TT, T0,
TB, T1,
T2;
dim_t b;
FLA_Part_2x1( D, &DT,
&DB, 0, FLA_TOP );
FLA_Part_2x1( T, &TT,
&TB, 0, FLA_TOP );
while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ){
b = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x1_to_3x1( DT, &D0,
/* ** */ /* ****** */
&D1,
DB, &D2, b, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( TT, &T0,
/* ** */ /* ****** */
&T1,
TB, &T2, b, FLA_BOTTOM );
/*------------------------------------------------------------*/
// [ U, ...
// D1, T ] = FLA_CAQR2_UT( U
// D1, T1 );
FLA_CAQR2_UT_internal( U,
D1, T1,
FLA_Cntl_sub_caqr2ut( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x1_to_2x1( &DT, D0,
D1,
/* ** */ /* ****** */
&DB, D2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &TT, T0,
T1,
/* ** */ /* ****** */
&TB, T2, FLA_TOP );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_opc_var1 | ( | int | m_UT, |
| int | m_D, | ||
| scomplex * | U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| scomplex * | D, | ||
| int | rs_D, | ||
| int | cs_D, | ||
| scomplex * | T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
References bli_ccopyv(), bli_cgemv(), bli_ctrmv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, BLIS_NONUNIT_DIAG, BLIS_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_opc_var1(), FLA_Househ2_UT_l_opc(), and FLA_ONE.
Referenced by FLA_CAQR2_UT_opt_var1().
{
scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
int i, j;
int m_DT = m_D - mn_UT;
for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
{
scomplex* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
scomplex* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
scomplex* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
scomplex* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
scomplex* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
scomplex* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
scomplex* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
scomplex* d1B = d1 + (m_DT)*rs_D;
scomplex* D00B = D00 + (m_DT)*rs_D;
int m_behind = i;
int n_behind = j;
int mn_ahead = mn_UT - j - 1;
//------------------------------------------------------------//
// FLA_Househ2_UT( FLA_LEFT,
// upsilon11,
// d1, tau11 );
FLA_Househ2_UT_l_opc( m_behind + 1,
upsilon11,
d1, rs_D,
tau11 );
// FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
// D2 );
FLA_Apply_H2_UT_l_opc_var1( m_behind + 1,
mn_ahead,
tau11,
d1, rs_D,
u12t, cs_U,
D2, rs_D, cs_D );
// FLA_Copy_external( d01B, t01 );
// FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
// D00B, t01 );
// FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
bli_ccopyv( BLIS_NO_CONJUGATE,
n_behind,
d1B, rs_D,
t01, rs_T );
bli_ctrmv( BLIS_UPPER_TRIANGULAR,
BLIS_CONJ_TRANSPOSE,
BLIS_NONUNIT_DIAG,
n_behind,
D00B, rs_D, cs_D,
t01, rs_T );
bli_cgemv( BLIS_CONJ_TRANSPOSE,
BLIS_NO_CONJUGATE,
m_DT,
n_behind,
buff_1,
D00, rs_D, cs_D,
d1, rs_D,
buff_1,
t01, rs_T );
//------------------------------------------------------------//
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_opd_var1 | ( | int | m_UT, |
| int | m_D, | ||
| double * | U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| double * | D, | ||
| int | rs_D, | ||
| int | cs_D, | ||
| double * | T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
References bli_dcopyv(), bli_dgemv(), bli_dtrmv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, BLIS_NONUNIT_DIAG, BLIS_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_opd_var1(), FLA_Househ2_UT_l_opd(), and FLA_ONE.
Referenced by FLA_CAQR2_UT_opt_var1().
{
double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
int i, j;
int m_DT = m_D - mn_UT;
for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
{
double* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
double* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
double* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
double* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
double* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
double* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
double* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
double* d1B = d1 + (m_DT)*rs_D;
double* D00B = D00 + (m_DT)*rs_D;
int m_behind = i;
int n_behind = j;
int mn_ahead = mn_UT - j - 1;
//------------------------------------------------------------//
// FLA_Househ2_UT( FLA_LEFT,
// upsilon11,
// d1, tau11 );
FLA_Househ2_UT_l_opd( m_behind + 1,
upsilon11,
d1, rs_D,
tau11 );
// FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
// D2 );
FLA_Apply_H2_UT_l_opd_var1( m_behind + 1,
mn_ahead,
tau11,
d1, rs_D,
u12t, cs_U,
D2, rs_D, cs_D );
// FLA_Copy_external( d01B, t01 );
// FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
// D00B, t01 );
// FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
bli_dcopyv( BLIS_NO_CONJUGATE,
n_behind,
d1B, rs_D,
t01, rs_T );
bli_dtrmv( BLIS_UPPER_TRIANGULAR,
BLIS_CONJ_TRANSPOSE,
BLIS_NONUNIT_DIAG,
n_behind,
D00B, rs_D, cs_D,
t01, rs_T );
bli_dgemv( BLIS_CONJ_TRANSPOSE,
BLIS_NO_CONJUGATE,
m_DT,
n_behind,
buff_1,
D00, rs_D, cs_D,
d1, rs_D,
buff_1,
t01, rs_T );
//------------------------------------------------------------//
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_ops_var1 | ( | int | m_UT, |
| int | m_D, | ||
| float * | U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| float * | D, | ||
| int | rs_D, | ||
| int | cs_D, | ||
| float * | T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
References bli_scopyv(), bli_sgemv(), bli_strmv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, BLIS_NONUNIT_DIAG, BLIS_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_ops_var1(), FLA_Househ2_UT_l_ops(), and FLA_ONE.
Referenced by FLA_CAQR2_UT_opt_var1().
{
float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
int i, j;
int m_DT = m_D - mn_UT;
for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
{
float* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
float* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
float* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
float* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
float* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
float* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
float* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
float* d1B = d1 + (m_DT)*rs_D;
float* D00B = D00 + (m_DT)*rs_D;
int m_behind = i;
int n_behind = j;
int mn_ahead = mn_UT - j - 1;
//------------------------------------------------------------//
// FLA_Househ2_UT( FLA_LEFT,
// upsilon11,
// d1, tau11 );
FLA_Househ2_UT_l_ops( m_behind + 1,
upsilon11,
d1, rs_D,
tau11 );
// FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
// D2 );
FLA_Apply_H2_UT_l_ops_var1( m_behind + 1,
mn_ahead,
tau11,
d1, rs_D,
u12t, cs_U,
D2, rs_D, cs_D );
// FLA_Copy_external( d01B, t01 );
// FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
// D00B, t01 );
// FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
bli_scopyv( BLIS_NO_CONJUGATE,
n_behind,
d1B, rs_D,
t01, rs_T );
bli_strmv( BLIS_UPPER_TRIANGULAR,
BLIS_CONJ_TRANSPOSE,
BLIS_NONUNIT_DIAG,
n_behind,
D00B, rs_D, cs_D,
t01, rs_T );
bli_sgemv( BLIS_CONJ_TRANSPOSE,
BLIS_NO_CONJUGATE,
m_DT,
n_behind,
buff_1,
D00, rs_D, cs_D,
d1, rs_D,
buff_1,
t01, rs_T );
//------------------------------------------------------------//
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_opt_var1 | ( | FLA_Obj | U, |
| FLA_Obj | D, | ||
| FLA_Obj | T | ||
| ) |
References FLA_CAQR2_UT_opc_var1(), FLA_CAQR2_UT_opd_var1(), FLA_CAQR2_UT_ops_var1(), FLA_CAQR2_UT_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
Referenced by FLA_CAQR2_UT_internal().
{
FLA_Datatype datatype;
int mn_UT, m_D;
int rs_U, cs_U;
int rs_D, cs_D;
int rs_T, cs_T;
datatype = FLA_Obj_datatype( U );
mn_UT = FLA_Obj_width( U );
m_D = FLA_Obj_length( D );
rs_U = FLA_Obj_row_stride( U );
cs_U = FLA_Obj_col_stride( U );
rs_D = FLA_Obj_row_stride( D );
cs_D = FLA_Obj_col_stride( D );
rs_T = FLA_Obj_row_stride( T );
cs_T = FLA_Obj_col_stride( T );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_U = FLA_FLOAT_PTR( U );
float* buff_D = FLA_FLOAT_PTR( D );
float* buff_T = FLA_FLOAT_PTR( T );
FLA_CAQR2_UT_ops_var1( mn_UT,
m_D,
buff_U, rs_U, cs_U,
buff_D, rs_D, cs_D,
buff_T, rs_T, cs_T );
break;
}
case FLA_DOUBLE:
{
double* buff_U = FLA_DOUBLE_PTR( U );
double* buff_D = FLA_DOUBLE_PTR( D );
double* buff_T = FLA_DOUBLE_PTR( T );
FLA_CAQR2_UT_opd_var1( mn_UT,
m_D,
buff_U, rs_U, cs_U,
buff_D, rs_D, cs_D,
buff_T, rs_T, cs_T );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_U = FLA_COMPLEX_PTR( U );
scomplex* buff_D = FLA_COMPLEX_PTR( D );
scomplex* buff_T = FLA_COMPLEX_PTR( T );
FLA_CAQR2_UT_opc_var1( mn_UT,
m_D,
buff_U, rs_U, cs_U,
buff_D, rs_D, cs_D,
buff_T, rs_T, cs_T );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
dcomplex* buff_D = FLA_DOUBLE_COMPLEX_PTR( D );
dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
FLA_CAQR2_UT_opz_var1( mn_UT,
m_D,
buff_U, rs_U, cs_U,
buff_D, rs_D, cs_D,
buff_T, rs_T, cs_T );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_opz_var1 | ( | int | m_UT, |
| int | m_D, | ||
| dcomplex * | U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| dcomplex * | D, | ||
| int | rs_D, | ||
| int | cs_D, | ||
| dcomplex * | T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
References bli_zcopyv(), bli_zgemv(), bli_ztrmv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, BLIS_NONUNIT_DIAG, BLIS_UPPER_TRIANGULAR, FLA_Apply_H2_UT_l_opz_var1(), FLA_Househ2_UT_l_opz(), and FLA_ONE.
Referenced by FLA_CAQR2_UT_opt_var1().
{
dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
int i, j;
int m_DT = m_D - mn_UT;
for ( i = m_DT, j = 0; j < mn_UT; ++i, ++j )
{
dcomplex* upsilon11 = buff_U + (j )*cs_U + (j )*rs_U;
dcomplex* u12t = buff_U + (j+1)*cs_U + (j )*rs_U;
dcomplex* D00 = buff_D + (0 )*cs_D + (0 )*rs_D;
dcomplex* d1 = buff_D + (j )*cs_D + (0 )*rs_D;
dcomplex* D2 = buff_D + (j+1)*cs_D + (0 )*rs_D;
dcomplex* tau11 = buff_T + (j )*cs_T + (j )*rs_T;
dcomplex* t01 = buff_T + (j )*cs_T + (0 )*rs_T;
dcomplex* d1B = d1 + (m_DT)*rs_D;
dcomplex* D00B = D00 + (m_DT)*rs_D;
int m_behind = i;
int n_behind = j;
int mn_ahead = mn_UT - j - 1;
//------------------------------------------------------------//
// FLA_Househ2_UT( FLA_LEFT,
// upsilon11,
// d1, tau11 );
FLA_Househ2_UT_l_opz( m_behind + 1,
upsilon11,
d1, rs_D,
tau11 );
// FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
// D2 );
FLA_Apply_H2_UT_l_opz_var1( m_behind + 1,
mn_ahead,
tau11,
d1, rs_D,
u12t, cs_U,
D2, rs_D, cs_D );
// FLA_Copy_external( d01B, t01 );
// FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
// D00B, t01 );
// FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
bli_zcopyv( BLIS_NO_CONJUGATE,
n_behind,
d1B, rs_D,
t01, rs_T );
bli_ztrmv( BLIS_UPPER_TRIANGULAR,
BLIS_CONJ_TRANSPOSE,
BLIS_NONUNIT_DIAG,
n_behind,
D00B, rs_D, cs_D,
t01, rs_T );
bli_zgemv( BLIS_CONJ_TRANSPOSE,
BLIS_NO_CONJUGATE,
m_DT,
n_behind,
buff_1,
D00, rs_D, cs_D,
d1, rs_D,
buff_1,
t01, rs_T );
//------------------------------------------------------------//
}
return FLA_SUCCESS;
}
| FLA_Error FLA_CAQR2_UT_unb_var1 | ( | FLA_Obj | U, |
| FLA_Obj | D, | ||
| FLA_Obj | T | ||
| ) |
References FLA_Apply_H2_UT(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Merge_2x1(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Trmv_external().
Referenced by FLA_CAQR2_UT_internal().
{
FLA_Obj UTL, UTR, U00, u01, U02,
UBL, UBR, u10t, upsilon11, u12t,
U20, u21, U22;
FLA_Obj DTL, DTR, D00, d01, D02,
DBL, DBR, d10t, delta11, d12t,
D20, d21, D22;
FLA_Obj TTL, TTR, T00, t01, T02,
TBL, TBR, t10t, tau11, t12t,
T20, t21, T22;
FLA_Obj d1, D2;
FLA_Obj d01T,
d01B;
FLA_Obj D00T,
D00B;
dim_t m_DT;
// Begin partitioning diagonally through D with m - n rows above
// the diagonal.
m_DT = FLA_Obj_length( D ) - FLA_Obj_width( D );
FLA_Part_2x2( U, &UTL, &UTR,
&UBL, &UBR, 0, 0, FLA_TL );
FLA_Part_2x2( D, &DTL, &DTR,
&DBL, &DBR, m_DT, 0, FLA_TL );
FLA_Part_2x2( T, &TTL, &TTR,
&TBL, &TBR, 0, 0, FLA_TL );
while ( FLA_Obj_min_dim( UBR ) > 0 ){
FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
/* ************* */ /* ************************** */
&u10t, /**/ &upsilon11, &u12t,
UBL, /**/ UBR, &U20, /**/ &u21, &U22,
1, 1, FLA_BR );
FLA_Repart_2x2_to_3x3( DTL, /**/ DTR, &D00, /**/ &d01, &D02,
/* ************* */ /* ************************** */
&d10t, /**/ &delta11, &d12t,
DBL, /**/ DBR, &D20, /**/ &d21, &D22,
1, 1, FLA_BR );
FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
/* ************* */ /* ************************ */
&t10t, /**/ &tau11, &t12t,
TBL, /**/ TBR, &T20, /**/ &t21, &T22,
1, 1, FLA_BR );
/*------------------------------------------------------------*/
FLA_Merge_2x1( d01,
delta11, &d1 );
FLA_Merge_2x1( D02,
d12t, &D2 );
// Compute tau11 and u2 from upsilon11 and d1 such that tau11 and u2
// determine a Householder transform H such that applying H from the
// left to the column vector consisting of upsilon11 and d1 annihilates
// the entries in d1 (and updates upsilon11).
FLA_Househ2_UT( FLA_LEFT,
upsilon11,
d1, tau11 );
// / u12t \ = H / u12t \
// \ D2 / \ D2 /
//
// where H is formed from tau11 and d1.
FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
D2 );
FLA_Part_2x1( d01, &d01T,
&d01B, m_DT, FLA_TOP );
FLA_Part_2x1( D00, &D00T,
&D00B, m_DT, FLA_TOP );
// t01 = D00' * d01;
// = D00T' * d01T + triu( D00B )' * d01B;
FLA_Copy_external( d01B, t01 );
FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
D00B, t01 );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D00T, d01T, FLA_ONE, t01 );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
u10t, upsilon11, /**/ u12t,
/* ************** */ /* ************************ */
&UBL, /**/ &UBR, U20, u21, /**/ U22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &DTL, /**/ &DTR, D00, d01, /**/ D02,
d10t, delta11, /**/ d12t,
/* ************** */ /* ************************ */
&DBL, /**/ &DBR, D20, d21, /**/ D22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
t10t, tau11, /**/ t12t,
/* ************** */ /* ********************** */
&TBL, /**/ &TBR, T20, t21, /**/ T22,
FLA_TL );
}
return FLA_SUCCESS;
}
1.7.6.1