|
libflame
revision_anchor
|
Go to the source code of this file.
| FLA_Error FLA_Tridiag_UT | ( | FLA_Uplo | uplo, |
| FLA_Obj | A, | ||
| FLA_Obj | T | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_is_double_precision(), FLA_Obj_row_stride(), FLA_Tridiag_UT_check(), and FLA_Tridiag_UT_internal().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{
FLA_Error r_val;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Tridiag_UT_check( uplo, A, T );
if ( FLA_Obj_row_stride( A ) == 1 &&
FLA_Obj_is_double_precision( A ) )
r_val = FLA_Tridiag_UT_internal( uplo, A, T, fla_tridiagut_cntl_fused );
else
r_val = FLA_Tridiag_UT_internal( uplo, A, T, fla_tridiagut_cntl_nofus );
return r_val;
}
| FLA_Error FLA_Tridiag_UT_create_T | ( | FLA_Obj | A, |
| FLA_Obj * | T | ||
| ) |
References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), and FLA_Query_blocksize().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{
FLA_Datatype datatype;
dim_t b_alg, k;
dim_t rs_T, cs_T;
// Query the datatype of A.
datatype = FLA_Obj_datatype( A );
// Query the blocksize from the library.
b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );
// Scale the blocksize by a pre-set global constant.
b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_TRIDIAG_INNER_TO_OUTER_B_RATIO );
// Query the minimum dimension of A.
k = FLA_Obj_min_dim( A );
// Figure out whether T should be row-major or column-major.
if ( FLA_Obj_row_stride( A ) == 1 )
{
rs_T = 1;
cs_T = b_alg;
}
else // if ( FLA_Obj_col_stride( A ) == 1 )
{
rs_T = k;
cs_T = 1;
}
// Create a b_alg x k matrix to hold the block Householder transforms that
// will be accumulated within the tridiagonal reduction algorithm.
FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T );
return FLA_SUCCESS;
}
References FLA_Check_error_level(), FLA_Tridiag_UT_extract_diagonals_check(), and FLA_Tridiag_UT_l_extract_diagonals().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Tridiag_UT_extract_diagonals_check( uplo, A, d, e );
if ( uplo == FLA_LOWER_TRIANGULAR )
r_val = FLA_Tridiag_UT_l_extract_diagonals( A, d, e );
//else
// FLA_Tridiag_UT_u_extract_diagonals( A, d, e );
return r_val;
}
| FLA_Error FLA_Tridiag_UT_form_Q | ( | FLA_Uplo | uplo, |
| FLA_Obj | A, | ||
| FLA_Obj | T | ||
| ) |
References FLA_Check_error_level(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_form_Q(), FLA_Tridiag_UT_form_Q_check(), and FLA_Tridiag_UT_shift_U().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
FLA_Obj ATL, ATR,
ABL, ABR;
FLA_Obj TL, TR;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Tridiag_UT_form_Q_check( uplo, A, T );
// Shift the Householder vectors one row/column towards the diagonal.
FLA_Tridiag_UT_shift_U( uplo, A );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 1, 1, FLA_TL );
FLA_Part_1x2( T, &TL, &TR, 1, FLA_RIGHT );
if ( uplo == FLA_LOWER_TRIANGULAR )
{
FLA_QR_UT_form_Q( ABR, TL, ABR );
}
else // if ( uplo == FLA_UPPER_TRIANGULAR )
{
//FLA_LQ_UT_form_Q( A, T, A );
}
return r_val;
}
| FLA_Error FLA_Tridiag_UT_form_Q_l_blk_var1 | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| FLA_Obj | W | ||
| ) |
| FLA_Error FLA_Tridiag_UT_form_Q_l_opc_var1 | ( | int | m_A, |
| int | n_AT, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| scomplex * | buff_T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
| FLA_Error FLA_Tridiag_UT_form_Q_l_opd_var1 | ( | int | m_A, |
| int | n_AT, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| double * | buff_T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
| FLA_Error FLA_Tridiag_UT_form_Q_l_ops_var1 | ( | int | m_A, |
| int | n_AT, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| float * | buff_T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
| FLA_Error FLA_Tridiag_UT_form_Q_l_opz_var1 | ( | int | m_A, |
| int | n_AT, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| dcomplex * | buff_T, | ||
| int | rs_T, | ||
| int | cs_T | ||
| ) |
| FLA_Error FLA_Tridiag_UT_form_Q_u_blk_var1 | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| FLA_Obj | W | ||
| ) |
| FLA_Error FLA_Tridiag_UT_internal | ( | FLA_Uplo | uplo, |
| FLA_Obj | A, | ||
| FLA_Obj | T, | ||
| fla_tridiagut_t * | cntl | ||
| ) |
References FLA_Check_error_level(), FLA_Tridiag_UT_internal_check(), FLA_Tridiag_UT_l(), and FLA_Tridiag_UT_u().
Referenced by FLA_Tridiag_UT().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Tridiag_UT_internal_check( uplo, A, T, cntl );
if ( uplo == FLA_LOWER_TRIANGULAR )
{
r_val = FLA_Tridiag_UT_l( A, T, cntl );
}
else // if ( uplo == FLA_UPPER_TRIANGULAR )
{
r_val = FLA_Tridiag_UT_u( A, T, cntl );
}
return r_val;
}
| FLA_Error FLA_Tridiag_UT_l | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| fla_tridiagut_t * | cntl | ||
| ) |
References FLA_Tridiag_UT_l_blf_var2(), FLA_Tridiag_UT_l_blf_var3(), FLA_Tridiag_UT_l_blk_var1(), FLA_Tridiag_UT_l_blk_var2(), FLA_Tridiag_UT_l_blk_var3(), FLA_Tridiag_UT_l_opt_var1(), FLA_Tridiag_UT_l_opt_var2(), FLA_Tridiag_UT_l_opt_var3(), FLA_Tridiag_UT_l_unb_var1(), FLA_Tridiag_UT_l_unb_var2(), and FLA_Tridiag_UT_l_unb_var3().
Referenced by FLA_Tridiag_UT_internal().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
{
r_val = FLA_Tridiag_UT_l_unb_var1( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
{
r_val = FLA_Tridiag_UT_l_unb_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 )
{
r_val = FLA_Tridiag_UT_l_unb_var3( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
{
r_val = FLA_Tridiag_UT_l_opt_var1( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
{
r_val = FLA_Tridiag_UT_l_opt_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 )
{
r_val = FLA_Tridiag_UT_l_opt_var3( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
{
r_val = FLA_Tridiag_UT_l_blk_var1( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
{
r_val = FLA_Tridiag_UT_l_blk_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
{
r_val = FLA_Tridiag_UT_l_blk_var3( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT2 )
{
r_val = FLA_Tridiag_UT_l_blf_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT3 )
{
r_val = FLA_Tridiag_UT_l_blf_var3( A, T );
}
else
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
}
return r_val;
}
| FLA_Error FLA_Tridiag_UT_l_extract_diagonals | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), scomplex::real, and dcomplex::real.
Referenced by FLA_Tridiag_UT_extract_diagonals().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
for ( i = 0; i < m_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
for ( i = 0; i < m_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
for ( i = 0; i < m_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = alpha11->real;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = a21_t->real;
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
for ( i = 0; i < m_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = alpha11->real;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = a21_t->real;
}
break;
}
}
return FLA_SUCCESS;
}
References bli_csetv(), bli_dsetv(), bli_ssetv(), bli_zsetv(), BLIS_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_ONE.
Referenced by FLA_Tridiag_UT_realify().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
int inc_d;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
bli_ssetv( m_A,
buff_1,
buff_d, inc_d );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
bli_dsetv( m_A,
buff_1,
buff_d, inc_d );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
bli_csetv( 1,
buff_1,
buff_d, inc_d );
for ( i = 1; i < m_A; ++i )
{
scomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A;
scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex absv;
scomplex conj_delta1;
int m_ahead = m_A - i - 1;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bli_ccopys( BLIS_CONJUGATE, a10t_r, delta1 );
bli_cabsval2( a10t_r, &absv );
bli_cinvscals( &absv, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r );
// FLA_Scalc( FLA_CONJUGATE, delta1, a21_t );
*a10t_r = absv;
if ( m_ahead > 0 )
{
bli_ccopyconj( delta1, &conj_delta1 );
bli_cscals( &conj_delta1, a21_t );
}
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
bli_zsetv( 1,
buff_1,
buff_d, inc_d );
for ( i = 1; i < m_A; ++i )
{
dcomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A;
dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex absv;
dcomplex conj_delta1;
int m_ahead = m_A - i - 1;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bli_zcopys( BLIS_CONJUGATE, a10t_r, delta1 );
bli_zabsval2( a10t_r, &absv );
bli_zinvscals( &absv, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r );
// FLA_Scalc( FLA_CONJUGATE, delta1, a21_t );
*a10t_r = absv;
if ( m_ahead > 0 )
{
bli_zcopyconj( delta1, &conj_delta1 );
bli_zscals( &conj_delta1, a21_t );
}
}
break;
}
}
return FLA_SUCCESS;
}
References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_Set().
{
FLA_Obj ATL, ATR, A00, a01, A02,
ABL, ABR, a10t, alpha11, a12t,
A20, a21, A22;
FLA_Obj dT, d0,
dB, delta1,
d2;
FLA_Obj a10t_l, a10t_r;
FLA_Obj a21_t,
a21_b;
FLA_Obj absv;
FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 1, 1, FLA_TL );
FLA_Part_2x1( d, &dT,
&dB, 1, FLA_TOP );
// Set first element of vector d to one.
FLA_Set( FLA_ONE, dT );
while ( FLA_Obj_min_dim( ABR ) > 0 )
{
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
/* ************* */ /* ************************** */
&a10t, /**/ &alpha11, &a12t,
ABL, /**/ ABR, &A20, /**/ &a21, &A22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( dT, &d0,
/* ** */ /* ****** */
&delta1,
dB, &d2, 1, FLA_BOTTOM );
/*------------------------------------------------------------*/
FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT );
FLA_Part_2x1( a21, &a21_t,
&a21_b, 1, FLA_TOP );
// delta1 = conj(a10t_r) / abs(a10t_r);
FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
FLA_Absolute_value( absv );
FLA_Inv_scal( absv, delta1 );
// a10t_r = delta1 * a10t_r;
// = abs(a10t_r);
// alpha11 = delta1 * alpha11 * conj(delta1);
// = alpha11;
// a21_t = a21_t * conj(delta1);
FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r );
FLA_Scalc( FLA_CONJUGATE, delta1, a21_t );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
a10t, alpha11, /**/ a12t,
/* ************** */ /* ************************ */
&ABL, /**/ &ABR, A20, a21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &dT, d0,
delta1,
/* ** */ /* ****** */
&dB, d2, FLA_TOP );
}
FLA_Obj_free( &absv );
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_realify | ( | FLA_Uplo | uplo, |
| FLA_Obj | A, | ||
| FLA_Obj | d | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_is_real(), FLA_ONE, FLA_Set(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_check(), and FLA_Tridiag_UT_u_realify_opt().
Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Tridiag_UT_realify_check( uplo, A, d );
if ( FLA_Obj_is_real( A ) )
{
FLA_Set( FLA_ONE, d );
return FLA_SUCCESS;
}
if ( uplo == FLA_LOWER_TRIANGULAR )
//r_val = FLA_Tridiag_UT_l_realify_unb( A, d );
r_val = FLA_Tridiag_UT_l_realify_opt( A, d );
else
//r_val = FLA_Tridiag_UT_u_realify_unb( A, d );
r_val = FLA_Tridiag_UT_u_realify_opt( A, d );
return r_val;
}
References FLA_Check_error_level(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Tridiag_UT_recover_tau_check(), and FLA_Tridiag_UT_recover_tau_submatrix().
{
FLA_Obj TL, TR, T0, T1, T2;
FLA_Obj tT, t0,
tB, t1,
t2;
dim_t b_alg, b;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Tridiag_UT_recover_tau_check( T, t );
b_alg = FLA_Obj_length( T );
FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
FLA_Part_2x1( t, &tT,
&tB, 0, FLA_TOP );
while ( FLA_Obj_length( tT ) < FLA_Obj_length( t ) ){
b = min( FLA_Obj_length( tB ), b_alg );
FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
b, FLA_RIGHT );
FLA_Repart_2x1_to_3x1( tT, &t0,
/* ** */ /* ** */
&t1,
tB, &t2, b, FLA_BOTTOM );
/*------------------------------------------------------------*/
FLA_Tridiag_UT_recover_tau_submatrix( T1, t1 );
/*------------------------------------------------------------*/
FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
FLA_LEFT );
FLA_Cont_with_3x1_to_2x1( &tT, t0,
t1,
/* ** */ /* ** */
&tB, t2, FLA_TOP );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_shift_U | ( | FLA_Uplo | uplo, |
| FLA_Obj | A | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_shift_U_check(), FLA_Tridiag_UT_shift_U_l_opc(), FLA_Tridiag_UT_shift_U_l_opd(), FLA_Tridiag_UT_shift_U_l_ops(), and FLA_Tridiag_UT_shift_U_l_opz().
Referenced by FLA_Tridiag_UT_form_Q().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Tridiag_UT_shift_U_check( uplo, A );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
if ( uplo == FLA_LOWER_TRIANGULAR )
FLA_Tridiag_UT_shift_U_l_ops( m_A,
buff_A, rs_A, cs_A );
//else // if ( uplo == FLA_UPPER_TRIANGULAR )
// FLA_Tridiag_UT_shift_U_u_ops( m_A,
// buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
if ( uplo == FLA_LOWER_TRIANGULAR )
FLA_Tridiag_UT_shift_U_l_opd( m_A,
buff_A, rs_A, cs_A );
//else // if ( uplo == FLA_UPPER_TRIANGULAR )
// FLA_Tridiag_UT_shift_U_u_opd( m_A,
// buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
if ( uplo == FLA_LOWER_TRIANGULAR )
FLA_Tridiag_UT_shift_U_l_opc( m_A,
buff_A, rs_A, cs_A );
//else // if ( uplo == FLA_UPPER_TRIANGULAR )
// FLA_Tridiag_UT_shift_U_u_opc( m_A,
// buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
if ( uplo == FLA_LOWER_TRIANGULAR )
FLA_Tridiag_UT_shift_U_l_opz( m_A,
buff_A, rs_A, cs_A );
//else // if ( uplo == FLA_UPPER_TRIANGULAR )
// FLA_Tridiag_UT_shift_U_u_opz( m_A,
// buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_shift_U_l_opc | ( | int | m_A, |
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_c0(), bli_c1(), bli_ccopyv(), bli_csetv(), and BLIS_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{
scomplex* a00 = buff_A;
scomplex* a10 = buff_A + rs_A;
scomplex zero = bli_c0();
scomplex one = bli_c1();
int j;
for ( j = m_A - 1; j > 0; --j )
{
scomplex* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
scomplex* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
scomplex* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
scomplex* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
int m_ahead = m_A - j - 1;
*alpha01 = zero;
*alpha11 = one;
bli_ccopyv( BLIS_NO_CONJUGATE,
m_ahead,
a20, rs_A,
a21, rs_A );
}
*a00 = one;
bli_csetv( m_A - 1,
&zero,
a10, rs_A );
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_shift_U_l_opd | ( | int | m_A, |
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), bli_dcopyv(), bli_dsetv(), and BLIS_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{
double* a00 = buff_A;
double* a10 = buff_A + rs_A;
double zero = bli_d0();
double one = bli_d1();
int j;
for ( j = m_A - 1; j > 0; --j )
{
double* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
double* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
double* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
double* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
int m_ahead = m_A - j - 1;
*alpha01 = zero;
*alpha11 = one;
bli_dcopyv( BLIS_NO_CONJUGATE,
m_ahead,
a20, rs_A,
a21, rs_A );
}
*a00 = one;
bli_dsetv( m_A - 1,
&zero,
a10, rs_A );
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_shift_U_l_ops | ( | int | m_A, |
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_s0(), bli_s1(), bli_scopyv(), bli_ssetv(), and BLIS_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{
float* a00 = buff_A;
float* a10 = buff_A + rs_A;
float zero = bli_s0();
float one = bli_s1();
int j;
for ( j = m_A - 1; j > 0; --j )
{
float* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
float* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
float* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
float* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
int m_ahead = m_A - j - 1;
*alpha01 = zero;
*alpha11 = one;
bli_scopyv( BLIS_NO_CONJUGATE,
m_ahead,
a20, rs_A,
a21, rs_A );
}
*a00 = one;
bli_ssetv( m_A - 1,
&zero,
a10, rs_A );
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_shift_U_l_opz | ( | int | m_A, |
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_z0(), bli_z1(), bli_zcopyv(), bli_zsetv(), and BLIS_NO_CONJUGATE.
Referenced by FLA_Tridiag_UT_shift_U().
{
dcomplex* a00 = buff_A;
dcomplex* a10 = buff_A + rs_A;
dcomplex zero = bli_z0();
dcomplex one = bli_z1();
int j;
for ( j = m_A - 1; j > 0; --j )
{
dcomplex* alpha01 = buff_A + (j )*cs_A + (0 )*rs_A;
dcomplex* alpha11 = buff_A + (j )*cs_A + (j )*rs_A;
dcomplex* a20 = buff_A + (j-1)*cs_A + (j+1)*rs_A;
dcomplex* a21 = buff_A + (j )*cs_A + (j+1)*rs_A;
int m_ahead = m_A - j - 1;
*alpha01 = zero;
*alpha11 = one;
bli_zcopyv( BLIS_NO_CONJUGATE,
m_ahead,
a20, rs_A,
a21, rs_A );
}
*a00 = one;
bli_zsetv( m_A - 1,
&zero,
a10, rs_A );
return FLA_SUCCESS;
}
| FLA_Error FLA_Tridiag_UT_shift_U_u_opc | ( | int | m_A, |
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Tridiag_UT_shift_U_u_opd | ( | int | m_A, |
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Tridiag_UT_shift_U_u_ops | ( | int | m_A, |
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Tridiag_UT_shift_U_u_opz | ( | int | m_A, |
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
| FLA_Error FLA_Tridiag_UT_u | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| fla_tridiagut_t * | cntl | ||
| ) |
Referenced by FLA_Tridiag_UT_internal().
{
FLA_Error r_val = FLA_SUCCESS;
/*
if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
{
r_val = FLA_Tridiag_UT_u_unb_var1( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
{
r_val = FLA_Tridiag_UT_u_unb_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 )
{
r_val = FLA_Tridiag_UT_u_unb_var3( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
{
r_val = FLA_Tridiag_UT_u_opt_var1( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
{
r_val = FLA_Tridiag_UT_u_opt_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 )
{
r_val = FLA_Tridiag_UT_u_opt_var3( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
{
r_val = FLA_Tridiag_UT_u_blk_var1( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
{
r_val = FLA_Tridiag_UT_u_blk_var2( A, T );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
{
r_val = FLA_Tridiag_UT_u_blk_var3( A, T );
}
else
*/
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
}
return r_val;
}
| FLA_Error FLA_Tridiag_UT_u_extract_diagonals | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References bli_csetv(), bli_dsetv(), bli_ssetv(), bli_zsetv(), BLIS_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_ONE.
Referenced by FLA_Tridiag_UT_realify().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
int inc_d;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
bli_ssetv( m_A,
buff_1,
buff_d, inc_d );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
bli_dsetv( m_A,
buff_1,
buff_d, inc_d );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
bli_csetv( 1,
buff_1,
buff_d, inc_d );
for ( i = 1; i < m_A; ++i )
{
scomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A;
scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex absv;
scomplex conj_delta1;
int m_ahead = m_A - i - 1;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bli_ccopys( BLIS_CONJUGATE, a01_b, delta1 );
bli_cabsval2( a01_b, &absv );
bli_cinvscals( &absv, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b );
// FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l );
*a01_b = absv;
if ( m_ahead > 0 )
{
bli_ccopyconj( delta1, &conj_delta1 );
bli_cscals( &conj_delta1, a12t_l );
}
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
bli_zsetv( 1,
buff_1,
buff_d, inc_d );
for ( i = 1; i < m_A; ++i )
{
dcomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A;
dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex absv;
dcomplex conj_delta1;
int m_ahead = m_A - i - 1;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bli_zcopys( BLIS_CONJUGATE, a01_b, delta1 );
bli_zabsval2( a01_b, &absv );
bli_zinvscals( &absv, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b );
// FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l );
*a01_b = absv;
if ( m_ahead > 0 )
{
bli_zcopyconj( delta1, &conj_delta1 );
bli_zscals( &conj_delta1, a12t_l );
}
}
break;
}
}
return FLA_SUCCESS;
}
References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_Set().
{
FLA_Obj ATL, ATR, A00, a01, A02,
ABL, ABR, a10t, alpha11, a12t,
A20, a21, A22;
FLA_Obj dT, d0,
dB, delta1,
d2;
FLA_Obj a01_t,
a01_b;
FLA_Obj a12t_l, a12t_r;
FLA_Obj absv;
FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 1, 1, FLA_TL );
FLA_Part_2x1( d, &dT,
&dB, 1, FLA_TOP );
// Set first element of vector d to one.
FLA_Set( FLA_ONE, dT );
while ( FLA_Obj_min_dim( ABR ) > 0 )
{
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
/* ************* */ /* ************************** */
&a10t, /**/ &alpha11, &a12t,
ABL, /**/ ABR, &A20, /**/ &a21, &A22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( dT, &d0,
/* ** */ /* ****** */
&delta1,
dB, &d2, 1, FLA_BOTTOM );
/*------------------------------------------------------------*/
FLA_Part_2x1( a01, &a01_t,
&a01_b, 1, FLA_BOTTOM );
FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
// delta1 = conj(a01_b) / abs(a01_b);
FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 );
FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
FLA_Absolute_value( absv );
FLA_Inv_scal( absv, delta1 );
// a01_b = delta1 * a01_b;
// = abs(a01_b);
// alpha11 = delta1 * alpha11 * conj(delta1);
// = alpha11;
// a12t_l = a12t_l * conj(delta1);
FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b );
FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
a10t, alpha11, /**/ a12t,
/* ************** */ /* ************************ */
&ABL, /**/ &ABR, A20, a21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &dT, d0,
delta1,
/* ** */ /* ****** */
&dB, d2, FLA_TOP );
}
FLA_Obj_free( &absv );
return FLA_SUCCESS;
}
1.7.6.1