|
libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
| FLA_Error | FLA_Bsvd_compute_shift (FLA_Obj tol, FLA_Obj sminl, FLA_Obj smax, FLA_Obj d, FLA_Obj e, FLA_Obj shift) |
| FLA_Error | FLA_Bsvd_compute_shift_ops (int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift) |
| FLA_Error | FLA_Bsvd_compute_shift_opd (int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift) |
| FLA_Error | FLA_Bsvd_compute_tol_thresh (FLA_Obj tolmul, FLA_Obj maxit, FLA_Obj d, FLA_Obj e, FLA_Obj tol, FLA_Obj thresh) |
| FLA_Error | FLA_Bsvd_compute_tol_thresh_ops (int m_A, float tolmul, float maxit, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh) |
| FLA_Error | FLA_Bsvd_compute_tol_thresh_opd (int m_A, double tolmul, double maxit, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh) |
| FLA_Error | FLA_Bsvd_find_converged (FLA_Obj tol, FLA_Obj d, FLA_Obj e, FLA_Obj sminl) |
| FLA_Error | FLA_Bsvd_find_converged_ops (int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl) |
| FLA_Error | FLA_Bsvd_find_converged_opd (int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl) |
| FLA_Error | FLA_Bsvd_find_max_min (FLA_Obj d, FLA_Obj e, FLA_Obj smax, FLA_Obj smin) |
| FLA_Error | FLA_Bsvd_find_max_min_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *smax, float *smin) |
| FLA_Error | FLA_Bsvd_find_max_min_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *smax, double *smin) |
| FLA_Error | FLA_Bsvd_find_submatrix_ops (int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR) |
| FLA_Error | FLA_Bsvd_find_submatrix_opd (int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR) |
| FLA_Error | FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg) |
| FLA_Error | FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg) |
| FLA_Error | FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error | FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg) |
| FLA_Error FLA_Bsvd_compute_shift | ( | FLA_Obj | tol, |
| FLA_Obj | sminl, | ||
| FLA_Obj | smax, | ||
| FLA_Obj | d, | ||
| FLA_Obj | e, | ||
| FLA_Obj | shift | ||
| ) |
References FLA_Bsvd_compute_shift_opd(), FLA_Bsvd_compute_shift_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{
FLA_Datatype datatype;
int m_A;
int inc_d;
int inc_e;
datatype = FLA_Obj_datatype( d );
m_A = FLA_Obj_vector_dim( d );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_tol = FLA_FLOAT_PTR( tol );
float* buff_sminl = FLA_FLOAT_PTR( sminl );
float* buff_smax = FLA_FLOAT_PTR( smax );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
float* buff_shift = FLA_FLOAT_PTR( shift );
FLA_Bsvd_compute_shift_ops( m_A,
*buff_tol,
*buff_sminl,
*buff_smax,
buff_d, inc_d,
buff_e, inc_e,
buff_shift );
break;
}
case FLA_DOUBLE:
{
double* buff_tol = FLA_DOUBLE_PTR( tol );
double* buff_sminl = FLA_DOUBLE_PTR( sminl );
double* buff_smax = FLA_DOUBLE_PTR( smax );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
double* buff_shift = FLA_DOUBLE_PTR( shift );
FLA_Bsvd_compute_shift_opd( m_A,
*buff_tol,
*buff_sminl,
*buff_smax,
buff_d, inc_d,
buff_e, inc_e,
buff_shift );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_compute_shift_opd | ( | int | m_A, |
| double | tol, | ||
| double | sminl, | ||
| double | smax, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| double * | shift | ||
| ) |
References FLA_Mach_params_opd(), and FLA_Sv_2x2_opd().
Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_opd_var1().
{
double hndrth = 0.01;
double eps;
double* d_first;
double* e_last;
double* d_last_m1;
double* d_last;
double sll, temp;
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
d_first = buff_d + (0 )*inc_d;
e_last = buff_e + (m_A-2)*inc_e;
d_last_m1 = buff_d + (m_A-2)*inc_d;
d_last = buff_d + (m_A-1)*inc_d;
// If the shift would ruin relative accuracy, set it to zero.
if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) )
{
#ifdef PRINTF
printf( "FLA_Bsvd_compute_shift_opd: shift would ruin accuracy; setting shift to 0.\n" );
printf( " m_A = %d \n", m_A );
printf( " tol = %20.15e\n", tol );
printf( " sminl = %20.15e\n", sminl );
printf( " smax = %20.15e\n", smax );
printf( " LHS = %20.15e\n", m_A * tol * ( sminl / smax ) );
printf( " max(eps,0.01*tol)= %20.15e\n", max( eps, hndrth * tol ) );
#endif
*shift = 0.0;
}
else
{
// Compute the shift from the last 2x2 matrix.
FLA_Sv_2x2_opd( d_last_m1,
e_last,
d_last,
shift,
&temp );
sll = fabs( *d_first );
// Check if the shift is negligible; if so, set it to zero.
if ( sll > 0.0 )
{
temp = ( *shift / sll );
if ( temp * temp < eps )
{
#ifdef PRINTF
printf( "FLA_Bsvd_compute_shift_opd: shift is negligible; setting shift to 0.\n" );
#endif
*shift = 0.0;
}
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_compute_shift_ops | ( | int | m_A, |
| float | tol, | ||
| float | sminl, | ||
| float | smax, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| float * | shift | ||
| ) |
Referenced by FLA_Bsvd_compute_shift().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_compute_tol_thresh | ( | FLA_Obj | tolmul, |
| FLA_Obj | maxit, | ||
| FLA_Obj | d, | ||
| FLA_Obj | e, | ||
| FLA_Obj | tol, | ||
| FLA_Obj | thresh | ||
| ) |
References FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{
FLA_Datatype datatype;
int n_A;
int inc_d;
int inc_e;
datatype = FLA_Obj_datatype( d );
n_A = FLA_Obj_vector_dim( d );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_tolmul = FLA_FLOAT_PTR( tolmul );
float* buff_maxitr = FLA_FLOAT_PTR( maxitr );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
float* buff_tol = FLA_FLOAT_PTR( tol );
float* buff_thresh = FLA_FLOAT_PTR( thresh );
FLA_Bsvd_compute_tol_thresh_ops( n_A,
*buff_tolmul,
*buff_maxitr,
buff_d, inc_d,
buff_e, inc_e,
buff_tol,
buff_thresh );
break;
}
case FLA_DOUBLE:
{
double* buff_tolmul = FLA_DOUBLE_PTR( tolmul );
double* buff_maxitr = FLA_DOUBLE_PTR( maxitr );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
double* buff_tol = FLA_DOUBLE_PTR( tol );
double* buff_thresh = FLA_DOUBLE_PTR( thresh );
FLA_Bsvd_compute_tol_thresh_opd( n_A,
*buff_tolmul,
*buff_maxitr,
buff_d, inc_d,
buff_e, inc_e,
buff_tol,
buff_thresh );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_compute_tol_thresh_opd | ( | int | m_A, |
| double | tolmul, | ||
| double | maxit, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| double * | tol, | ||
| double * | thresh | ||
| ) |
References bli_d0(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().
{
double zero = bli_d0();
double smin;
double eps, unfl;
double mu;
int i;
// Query machine epsilon and the safe minimum.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
unfl = FLA_Mach_params_opd( FLA_MACH_SFMIN );
// Compute tol as the product of machine epsilon and tolmul.
*tol = tolmul * eps;
// Compute the approximate maximum singular value.
// NOT NEEDED unless we're supporting absolute accuracy.
//FLA_Bsvd_sinval_find_max( n_A,
// buff_d, inc_d,
// buff_e, inc_e,
// &smax );
// Compute the approximate minimum singular value.
smin = fabs( *buff_d );
// Skip the accumulation of smin if the first element is zero.
if ( smin != zero )
{
mu = smin;
for ( i = 1; i < n_A; ++i )
{
double* epsilon1 = buff_e + (i-1)*inc_e;
double* delta2 = buff_d + (i )*inc_d;
mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
smin = min( smin, mu );
// Stop early if we encountered a zero.
if ( smin == zero ) break;
}
}
// Compute thresh either in terms of tol or as a function of the
// maximum total number of iterations, the problem size, and the
// safe minimum.
smin = smin / sqrt( ( double ) n_A );
*thresh = max( *tol * smin, maxitr * n_A * n_A * unfl );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_compute_tol_thresh_ops | ( | int | m_A, |
| float | tolmul, | ||
| float | maxit, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| float * | tol, | ||
| float * | thresh | ||
| ) |
Referenced by FLA_Bsvd_compute_tol_thresh().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
References FLA_Bsvd_find_converged_opd(), FLA_Bsvd_find_converged_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().
{
FLA_Datatype datatype;
int m_A;
int inc_d;
int inc_e;
datatype = FLA_Obj_datatype( d );
m_A = FLA_Obj_vector_dim( d );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_tol = FLA_FLOAT_PTR( tol );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
float* buff_sminl = FLA_FLOAT_PTR( sminl );
FLA_Bsvd_find_converged_ops( m_A,
*buff_tol,
buff_d, inc_d,
buff_e, inc_e,
buff_sminl );
break;
}
case FLA_DOUBLE:
{
double* buff_tol = FLA_DOUBLE_PTR( tol );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
double* buff_sminl = FLA_DOUBLE_PTR( sminl );
FLA_Bsvd_find_converged_opd( m_A,
*buff_tol,
buff_d, inc_d,
buff_e, inc_e,
buff_sminl );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_find_converged_opd | ( | int | m_A, |
| double | tol, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| double * | sminl | ||
| ) |
Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_opd_var1().
{
double* epsilon_last;
double* delta_last;
double mu;
int i;
epsilon_last = buff_e + (m_A-2)*inc_e;
delta_last = buff_d + (m_A-1)*inc_d;
// Check convergence at the bottom of the matrix first.
if ( MAC_Bsvd_sinval_is_converged_opd( tol, *delta_last, *epsilon_last ) )
{
//*epsilon_last = 0.0;
*sminl = 0.0;
return m_A - 2;
}
// If the last element is not yet converged, check interior elements.
// Also, accumulate sminl for later use when it comes time to check
// the shift.
mu = fabs( *buff_d );
*sminl = mu;
for ( i = 0; i < m_A - 1; ++i )
{
double* epsilon1 = buff_e + (i )*inc_e;
double* delta2 = buff_d + (i+1)*inc_d;
// Check convergence of epsilon1 against the value of mu accumulated
// so far.
if ( MAC_Bsvd_sinval_is_converged_opd( tol, mu, *epsilon1 ) )
{
//printf( "FLA_Bsvd_sinval_find_converged: Split occurred in col %d\n", i );
//printf( "FLA_Bsvd_sinval_find_converged: mu alpha12 = %23.19e %23.19e\n", mu, *epsilon1 );
//printf( "FLA_Bsvd_sinval_find_converged: alpha22 = %43.19e\n", *delta2 );
//*epsilon1 = 0.0;
//return FLA_FAILURE;
return i;
}
// Update mu and sminl.
mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
*sminl = min( *sminl, mu );
}
// Return with no convergence found.
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_find_converged_ops | ( | int | m_A, |
| float | tol, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| float * | sminl | ||
| ) |
Referenced by FLA_Bsvd_find_converged().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_find_max_min_opd | ( | int | m_A, |
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| double * | smax, | ||
| double * | smin | ||
| ) |
Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_opd_var1().
{
double smax_cand;
double smin_cand;
int i;
smax_cand = fabs( buff_d[ (m_A-1)*inc_d ] );
smin_cand = smax_cand;
for ( i = 0; i < m_A - 1; ++i )
{
double abs_di = fabs( buff_d[ i*inc_d ] );
double abs_ei = fabs( buff_e[ i*inc_e ] );
// Track the minimum element.
smin_cand = min( smin_cand, abs_di );
// Track the maximum element.
smax_cand = max( smax_cand, abs_di );
smax_cand = max( smax_cand, abs_ei );
}
// Save the results of the search.
*smax = smax_cand;
*smin = smin_cand;
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_find_max_min_ops | ( | int | m_A, |
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| float * | smax, | ||
| float * | smin | ||
| ) |
Referenced by FLA_Bsvd_find_max().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_find_submatrix_opd | ( | int | mn_A, |
| int | ij_begin, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| int * | ijTL, | ||
| int * | ijBR | ||
| ) |
References bli_d0().
Referenced by FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().
{
double rzero = bli_d0();
int ij_tl;
int ij_br;
// Search for the first non-zero superdiagonal element starting at
// the index specified by ij_begin.
for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl )
{
double* e1 = buff_e + (ij_tl )*inc_e;
// If we find a non-zero element, record it and break out of this
// loop.
if ( *e1 != rzero )
{
#ifdef PRINTF
printf( "FLA_Bsvd_find_submatrix_opd: found non-zero superdiagonal element\n" );
printf( " e[%3d] = %22.19e\n", ij_tl, *e1 );
#endif
*ijTL = ij_tl;
break;
}
}
// If ij_tl was incremented all the way up to mn_A - 1, then we didn't
// find any non-zeros.
if ( ij_tl == mn_A - 1 )
{
#ifdef PRINTF
printf( "FLA_Bsvd_find_submatrix_opd: no submatrices found.\n" );
#endif
return FLA_FAILURE;
}
// If we've gotten this far, then a non-zero superdiagonal element was
// found. Now we must walk the remaining portion of the superdiagonal
// to find the first zero element, or if one is not found, we simply
// use the last element of the superdiagonal.
for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br )
{
double* e1 = buff_e + (ij_br )*inc_e;
// If we find a zero element, record it and break out of this
// loop.
if ( *e1 == rzero )
{
#ifdef PRINTF
printf( "FLA_Bsvd_find_submatrix_opd: found zero superdiagonal element\n" );
printf( " e[%3d] = %22.19e\n", ij_br, *e1 );
#endif
break;
}
}
// If a zero element was found, then ij_br should hold the index of
// that element. If a zero element was not found, then ij_br should
// hold mn_A - 1. Either way, we save the value and return success.
*ijBR = ij_br;
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_find_submatrix_ops | ( | int | mn_A, |
| int | ij_begin, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| int * | ijTL, | ||
| int * | ijBR | ||
| ) |
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_v_opc_var1 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| scomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| scomplex * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Bsvd_v_opt_var1().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_v_opc_var2 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| float * | buff_RG, | ||
| int | rs_RG, | ||
| int | cs_RG, | ||
| float * | buff_RH, | ||
| int | rs_RH, | ||
| int | cs_RH, | ||
| scomplex * | buff_W, | ||
| int | rs_W, | ||
| int | cs_W, | ||
| scomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| scomplex * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Bsvd_v_opt_var2().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_v_opd_var1 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| double * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| double * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
References bli_d0(), bli_dm1(), bli_dscalv(), bli_z1(), bli_zsetm(), BLIS_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var1().
{
dcomplex one = bli_z1();
double rzero = bli_d0();
int maxitr = 6;
double eps;
double tolmul;
double tol;
double thresh;
dcomplex* G;
dcomplex* H;
double* d1;
double* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul );
printf( "FLA_Bsvd_v_opd_var1: tol = %12.6e\n", tol );
printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh );
#endif
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = min_m_n - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
for ( ij_begin = 0; ij_begin < min_m_n; )
{
#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" );
#endif
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opd_var1: ijTL = %d\n", ijTL );
printf( "FLA_Bsvd_v_opd_var1: ijBR = %d\n", ijBR );
printf( "FLA_Bsvd_v_opd_var1: m_A11 = %d\n", m_A11 );
#endif
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever a split occurs. Iteration continues as
// long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that were observed.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: deflations observed = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
#endif
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
// Make sure we haven't exceeded our maximum iteration count.
if ( n_iter_prev >= n_iter_max * min_m_n )
{
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
FLA_Abort();
//return FLA_FAILURE;
}
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply );
#endif
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
//FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
m_U,
n_UV_apply,
buff_G, rs_G, cs_G,
buff_U, rs_U, cs_U,
b_alg );
//FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
m_V,
n_UV_apply,
buff_H, rs_H, cs_H,
buff_V, rs_V, cs_V,
b_alg );
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
#endif
}
// Make all the singular values positive.
{
int i;
double minus_one = bli_dm1();
for ( i = 0; i < min_m_n; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
bli_dscalv( BLIS_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
| FLA_Error FLA_Bsvd_v_opd_var2 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| double * | buff_RG, | ||
| int | rs_RG, | ||
| int | cs_RG, | ||
| double * | buff_RH, | ||
| int | rs_RH, | ||
| int | cs_RH, | ||
| double * | buff_W, | ||
| int | rs_W, | ||
| int | cs_W, | ||
| double * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| double * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
References bli_d0(), bli_d1(), bli_dcopymt(), bli_dgemm(), bli_dident(), bli_dm1(), bli_dscalv(), bli_z1(), bli_zsetm(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var2().
{
dcomplex one = bli_z1();
double rone = bli_d1();
double rzero = bli_d0();
int maxitr = 6;
double eps;
double tolmul;
double tol;
double thresh;
dcomplex* G;
dcomplex* H;
double* d1;
double* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = min_m_n - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Initialize RG and RH to identity.
bli_dident( min_m_n,
buff_RG, rs_RG, cs_RG );
bli_dident( min_m_n,
buff_RH, rs_RH, cs_RH );
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
for ( ij_begin = 0; ij_begin < min_m_n; )
{
#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" );
#endif
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opd_var2: ijTL = %d\n", ijTL );
printf( "FLA_Bsvd_v_opd_var2: ijBR = %d\n", ijBR );
printf( "FLA_Bsvd_v_opd_var2: m_A11 = %d\n", m_A11 );
#endif
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever possible. A new singular value search is
// performed as long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that occurred.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: deflations observed = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opd_var2: num iterations performed = %d\n", n_iter_perf );
#endif
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply );
#endif
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
//FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
min_m_n,
n_UV_apply,
n_iter_prev,
buff_G, rs_G, cs_G,
buff_RG, rs_RG, cs_RG,
b_alg );
//FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
min_m_n,
n_UV_apply,
n_iter_prev,
buff_H, rs_H, cs_H,
buff_RH, rs_RH, cs_RH,
b_alg );
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev );
#endif
}
// Copy the contents of Q to temporary storage.
bli_dcopymt( BLIS_NO_TRANSPOSE,
m_U,
m_V,
buff_U, rs_U, cs_U,
buff_W, rs_W, cs_W );
// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
// Multiply U by R, overwriting U.
bli_dgemm( BLIS_NO_TRANSPOSE,
BLIS_NO_TRANSPOSE,
m_U,
m_V,
m_V,
&rone,
( double* )buff_W, rs_W, cs_W,
buff_RG, rs_RG, cs_RG,
&rzero,
( double* )buff_U, rs_U, cs_U );
bli_dcopymt( BLIS_NO_TRANSPOSE,
m_V,
m_V,
buff_V, rs_V, cs_V,
buff_W, rs_W, cs_W );
// Multiply V by R, overwriting V.
bli_dgemm( BLIS_NO_TRANSPOSE,
BLIS_NO_TRANSPOSE,
m_V,
m_V,
m_V,
&rone,
( double* )buff_W, rs_W, cs_W,
buff_RH, rs_RH, cs_RH,
&rzero,
( double* )buff_V, rs_V, cs_V );
// Make all the singular values positive.
{
int i;
double minus_one = bli_dm1();
for ( i = 0; i < min_m_n; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
bli_dscalv( BLIS_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
| FLA_Error FLA_Bsvd_v_ops_var1 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| float * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| float * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Bsvd_v_opt_var1().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_v_ops_var2 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| float * | buff_d, | ||
| int | inc_d, | ||
| float * | buff_e, | ||
| int | inc_e, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| float * | buff_RG, | ||
| int | rs_RG, | ||
| int | cs_RG, | ||
| float * | buff_RH, | ||
| int | rs_RH, | ||
| int | cs_RH, | ||
| float * | buff_W, | ||
| int | rs_W, | ||
| int | cs_W, | ||
| float * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| float * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
Referenced by FLA_Bsvd_v_opt_var2().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bsvd_v_opt_var1 | ( | dim_t | n_iter_max, |
| FLA_Obj | d, | ||
| FLA_Obj | e, | ||
| FLA_Obj | G, | ||
| FLA_Obj | H, | ||
| FLA_Obj | U, | ||
| FLA_Obj | V, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
Referenced by FLA_Svd_uv_unb_var1().
{
FLA_Error r_val = FLA_SUCCESS;
FLA_Datatype datatype;
int m_U, m_V, n_GH;
int inc_d;
int inc_e;
int rs_G, cs_G;
int rs_H, cs_H;
int rs_U, cs_U;
int rs_V, cs_V;
datatype = FLA_Obj_datatype( U );
m_U = FLA_Obj_length( U );
m_V = FLA_Obj_length( V );
n_GH = FLA_Obj_width( G );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_H = FLA_Obj_row_stride( H );
cs_H = FLA_Obj_col_stride( H );
rs_U = FLA_Obj_row_stride( U );
cs_U = FLA_Obj_col_stride( U );
rs_V = FLA_Obj_row_stride( V );
cs_V = FLA_Obj_col_stride( V );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
scomplex* buff_G = FLA_COMPLEX_PTR( G );
scomplex* buff_H = FLA_COMPLEX_PTR( H );
float* buff_U = FLA_FLOAT_PTR( U );
float* buff_V = FLA_FLOAT_PTR( V );
r_val = FLA_Bsvd_v_ops_var1( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
double* buff_U = FLA_DOUBLE_PTR( U );
double* buff_V = FLA_DOUBLE_PTR( V );
r_val = FLA_Bsvd_v_opd_var1( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
case FLA_COMPLEX:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
scomplex* buff_G = FLA_COMPLEX_PTR( G );
scomplex* buff_H = FLA_COMPLEX_PTR( H );
scomplex* buff_U = FLA_COMPLEX_PTR( U );
scomplex* buff_V = FLA_COMPLEX_PTR( V );
r_val = FLA_Bsvd_v_opc_var1( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
r_val = FLA_Bsvd_v_opz_var1( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
}
return r_val;
}
| FLA_Error FLA_Bsvd_v_opt_var2 | ( | dim_t | n_iter_max, |
| FLA_Obj | d, | ||
| FLA_Obj | e, | ||
| FLA_Obj | G, | ||
| FLA_Obj | H, | ||
| FLA_Obj | RG, | ||
| FLA_Obj | RH, | ||
| FLA_Obj | W, | ||
| FLA_Obj | U, | ||
| FLA_Obj | V, | ||
| dim_t | b_alg | ||
| ) |
References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
Referenced by FLA_Svd_uv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
FLA_Datatype datatype;
int m_U, m_V, n_GH;
int inc_d;
int inc_e;
int rs_G, cs_G;
int rs_H, cs_H;
int rs_RG, cs_RG;
int rs_RH, cs_RH;
int rs_W, cs_W;
int rs_U, cs_U;
int rs_V, cs_V;
datatype = FLA_Obj_datatype( U );
m_U = FLA_Obj_length( U );
m_V = FLA_Obj_length( V );
n_GH = FLA_Obj_width( G );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_H = FLA_Obj_row_stride( H );
cs_H = FLA_Obj_col_stride( H );
rs_RG = FLA_Obj_row_stride( RG );
cs_RG = FLA_Obj_col_stride( RG );
rs_RH = FLA_Obj_row_stride( RH );
cs_RH = FLA_Obj_col_stride( RH );
rs_W = FLA_Obj_row_stride( W );
cs_W = FLA_Obj_col_stride( W );
rs_U = FLA_Obj_row_stride( U );
cs_U = FLA_Obj_col_stride( U );
rs_V = FLA_Obj_row_stride( V );
cs_V = FLA_Obj_col_stride( V );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
scomplex* buff_G = FLA_COMPLEX_PTR( G );
scomplex* buff_H = FLA_COMPLEX_PTR( H );
float* buff_RG = FLA_FLOAT_PTR( RG );
float* buff_RH = FLA_FLOAT_PTR( RH );
float* buff_W = FLA_FLOAT_PTR( W );
float* buff_U = FLA_FLOAT_PTR( U );
float* buff_V = FLA_FLOAT_PTR( V );
r_val = FLA_Bsvd_v_ops_var2( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_RG, rs_RG, cs_RG,
buff_RH, rs_RH, cs_RH,
buff_W, rs_W, cs_W,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
double* buff_RG = FLA_DOUBLE_PTR( RG );
double* buff_RH = FLA_DOUBLE_PTR( RH );
double* buff_W = FLA_DOUBLE_PTR( W );
double* buff_U = FLA_DOUBLE_PTR( U );
double* buff_V = FLA_DOUBLE_PTR( V );
r_val = FLA_Bsvd_v_opd_var2( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_RG, rs_RG, cs_RG,
buff_RH, rs_RH, cs_RH,
buff_W, rs_W, cs_W,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
case FLA_COMPLEX:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
scomplex* buff_G = FLA_COMPLEX_PTR( G );
scomplex* buff_H = FLA_COMPLEX_PTR( H );
float* buff_RG = FLA_FLOAT_PTR( RG );
float* buff_RH = FLA_FLOAT_PTR( RH );
scomplex* buff_W = FLA_COMPLEX_PTR( W );
scomplex* buff_U = FLA_COMPLEX_PTR( U );
scomplex* buff_V = FLA_COMPLEX_PTR( V );
r_val = FLA_Bsvd_v_opc_var2( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_RG, rs_RG, cs_RG,
buff_RH, rs_RH, cs_RH,
buff_W, rs_W, cs_W,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
case FLA_DOUBLE_COMPLEX:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
double* buff_RG = FLA_DOUBLE_PTR( RG );
double* buff_RH = FLA_DOUBLE_PTR( RH );
dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
r_val = FLA_Bsvd_v_opz_var2( min( m_U, m_V ),
m_U,
m_V,
n_GH,
n_iter_max,
buff_d, inc_d,
buff_e, inc_e,
buff_G, rs_G, cs_G,
buff_H, rs_H, cs_H,
buff_RG, rs_RG, cs_RG,
buff_RH, rs_RH, cs_RH,
buff_W, rs_W, cs_W,
buff_U, rs_U, cs_U,
buff_V, rs_V, cs_V,
b_alg );
break;
}
}
return r_val;
}
| FLA_Error FLA_Bsvd_v_opz_var1 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| dcomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| dcomplex * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
References bli_d0(), bli_dm1(), bli_z1(), bli_zdscalv(), bli_zsetm(), BLIS_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var1().
{
dcomplex one = bli_z1();
double rzero = bli_d0();
int maxitr = 6;
double eps;
double tolmul;
double tol;
double thresh;
dcomplex* G;
dcomplex* H;
double* d1;
double* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul );
printf( "FLA_Bsvd_v_opz_var1: tol = %12.6e\n", tol );
printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh );
#endif
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = min_m_n - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
for ( ij_begin = 0; ij_begin < min_m_n; )
{
#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" );
#endif
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opz_var1: ijTL = %d\n", ijTL );
printf( "FLA_Bsvd_v_opz_var1: ijBR = %d\n", ijBR );
printf( "FLA_Bsvd_v_opz_var1: m_A11 = %d\n", m_A11 );
#endif
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever a split occurs. Iteration continues as
// long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that were observed.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: deflations observed = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
#endif
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
// Make sure we haven't exceeded our maximum iteration count.
if ( n_iter_prev >= n_iter_max * min_m_n )
{
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
FLA_Abort();
//return FLA_FAILURE;
}
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply );
#endif
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
//FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
m_U,
n_UV_apply,
buff_G, rs_G, cs_G,
buff_U, rs_U, cs_U,
b_alg );
//FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
m_V,
n_UV_apply,
buff_H, rs_H, cs_H,
buff_V, rs_V, cs_V,
b_alg );
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
#endif
}
// Make all the singular values positive.
{
int i;
double minus_one = bli_dm1();
for ( i = 0; i < min_m_n; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
bli_zdscalv( BLIS_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
| FLA_Error FLA_Bsvd_v_opz_var2 | ( | int | min_m_n, |
| int | m_U, | ||
| int | m_V, | ||
| int | n_GH, | ||
| int | n_iter_max, | ||
| double * | buff_d, | ||
| int | inc_d, | ||
| double * | buff_e, | ||
| int | inc_e, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_H, | ||
| int | rs_H, | ||
| int | cs_H, | ||
| double * | buff_RG, | ||
| int | rs_RG, | ||
| int | cs_RG, | ||
| double * | buff_RH, | ||
| int | rs_RH, | ||
| int | cs_RH, | ||
| dcomplex * | buff_W, | ||
| int | rs_W, | ||
| int | cs_W, | ||
| dcomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| dcomplex * | buff_V, | ||
| int | rs_V, | ||
| int | cs_V, | ||
| int | b_alg | ||
| ) |
References bli_d0(), bli_d1(), bli_dgemm(), bli_dident(), bli_dm1(), bli_z1(), bli_zcopymt(), bli_zdscalv(), bli_zsetm(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().
Referenced by FLA_Bsvd_v_opt_var2().
{
dcomplex one = bli_z1();
double rone = bli_d1();
double rzero = bli_d0();
int maxitr = 6;
double eps;
double tolmul;
double tol;
double thresh;
dcomplex* G;
dcomplex* H;
double* d1;
double* e1;
int r_val;
int done;
int m_GH_sweep_max;
int ij_begin;
int ijTL, ijBR;
int m_A11;
int n_iter_perf;
int n_UV_apply;
int total_deflations;
int n_deflations;
int n_iter_prev;
int n_iter_perf_sweep_max;
// Compute some convergence constants.
eps = FLA_Mach_params_opd( FLA_MACH_EPS );
tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
tolmul,
maxitr,
buff_d, inc_d,
buff_e, inc_e,
&tol,
&thresh );
// Initialize our completion flag.
done = FALSE;
// Initialize a counter that holds the maximum number of rows of G
// and H that we would need to initialize for the next sweep.
m_GH_sweep_max = min_m_n - 1;
// Initialize a counter for the total number of iterations performed.
n_iter_prev = 0;
// Initialize RG and RH to identity.
bli_dident( min_m_n,
buff_RG, rs_RG, cs_RG );
bli_dident( min_m_n,
buff_RH, rs_RH, cs_RH );
// Iterate until the matrix has completely deflated.
for ( total_deflations = 0; done != TRUE; )
{
// Initialize G and H to contain only identity rotations.
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_G, rs_G, cs_G );
bli_zsetm( m_GH_sweep_max,
n_GH,
&one,
buff_H, rs_H, cs_H );
// Keep track of the maximum number of iterations performed in the
// current sweep. This is used when applying the sweep's Givens
// rotations.
n_iter_perf_sweep_max = 0;
// Perform a sweep: Move through the matrix and perform a bidiagonal
// SVD on each non-zero submatrix that is encountered. During the
// first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
for ( ij_begin = 0; ij_begin < min_m_n; )
{
#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif
// Search for the first submatrix along the diagonal that is
// bounded by zeroes (or endpoints of the matrix). If no
// submatrix is found (ie: if the entire superdiagonal is zero
// then FLA_FAILURE is returned. This function also inspects
// superdiagonal elements for proximity to zero. If a given
// element is close enough to zero, then it is deemed
// converged and manually set to zero.
r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
ij_begin,
buff_d, inc_d,
buff_e, inc_e,
&ijTL,
&ijBR );
// Verify that a submatrix was found. If one was not found,
// then we are done with the current sweep. Furthermore, if
// a submatrix was not found AND we began our search at the
// beginning of the matrix (ie: ij_begin == 0), then the
// matrix has completely deflated and so we are done with
// Francis step iteration.
if ( r_val == FLA_FAILURE )
{
if ( ij_begin == 0 )
{
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" );
#endif
done = TRUE;
}
// Break out of the current sweep so we can apply the last
// remaining Givens rotations.
break;
}
// If we got this far, then:
// (a) ijTL refers to the index of the first non-zero
// superdiagonal along the diagonal, and
// (b) ijBR refers to either:
// - the first zero element that occurs after ijTL, or
// - the the last diagonal element.
// Note that ijTL and ijBR also correspond to the first and
// last diagonal elements of the submatrix of interest. Thus,
// we may compute the dimension of this submatrix as:
m_A11 = ijBR - ijTL + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opz_var2: ijTL = %d\n", ijTL );
printf( "FLA_Bsvd_v_opz_var2: ijBR = %d\n", ijBR );
printf( "FLA_Bsvd_v_opz_var2: m_A11 = %d\n", m_A11 );
#endif
// Adjust ij_begin, which gets us ready for the next submatrix
// search in the current sweep.
ij_begin = ijBR + 1;
// Index to the submatrices upon which we will operate.
d1 = buff_d + ijTL * inc_d;
e1 = buff_e + ijTL * inc_e;
G = buff_G + ijTL * rs_G;
H = buff_H + ijTL * rs_H;
// Search for a batch of singular values, recursing on deflated
// subproblems whenever possible. A new singular value search is
// performed as long as
// (a) there is still matrix left to operate on, and
// (b) the number of iterations performed in this batch is
// less than n_G.
// If/when either of the two above conditions fails to hold,
// the function returns.
n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
n_GH,
ijTL,
tol,
thresh,
d1, inc_d,
e1, inc_e,
G, rs_G, cs_G,
H, rs_H, cs_H,
&n_iter_perf );
// Record the number of deflations that occurred.
total_deflations += n_deflations;
// Update the maximum number of iterations performed in the
// current sweep.
n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: deflations observed = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opz_var2: num iterations performed = %d\n", n_iter_perf );
#endif
// Store the most recent value of ijBR in m_G_sweep_max.
// When the sweep is done, this value will contain the minimum
// number of rows of G and H we can apply and safely include all
// non-identity rotations that were computed during the
// singular value searches.
m_GH_sweep_max = ijBR;
}
// The sweep is complete. Now we must apply the Givens rotations
// that were accumulated during the sweep.
// Recall that the number of columns of U and V to which we apply
// rotations is one more than the number of rotations.
n_UV_apply = m_GH_sweep_max + 1;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply );
#endif
// Apply the Givens rotations. Note that we only apply k sets of
// rotations, where k = n_iter_perf_sweep_max. Also note that we only
// apply to n_UV_apply columns of U and V since this is the most we
// need to touch given the most recent value stored to m_GH_sweep_max.
//FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
min_m_n,
n_UV_apply,
n_iter_prev,
buff_G, rs_G, cs_G,
buff_RG, rs_RG, cs_RG,
b_alg );
//FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
//FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
min_m_n,
n_UV_apply,
n_iter_prev,
buff_H, rs_H, cs_H,
buff_RH, rs_RH, cs_RH,
b_alg );
// Increment the total number of iterations previously performed.
n_iter_prev += n_iter_perf_sweep_max;
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev );
#endif
}
// Copy the contents of Q to temporary storage.
bli_zcopymt( BLIS_NO_TRANSPOSE,
m_U,
m_V,
buff_U, rs_U, cs_U,
buff_W, rs_W, cs_W );
// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
// Multiply U by R, overwriting U.
bli_dgemm( BLIS_NO_TRANSPOSE,
BLIS_NO_TRANSPOSE,
2*m_U,
m_V,
m_V,
&rone,
( double* )buff_W, rs_W, 2*cs_W,
buff_RG, rs_RG, cs_RG,
&rzero,
( double* )buff_U, rs_U, 2*cs_U );
bli_zcopymt( BLIS_NO_TRANSPOSE,
m_V,
m_V,
buff_V, rs_V, cs_V,
buff_W, rs_W, cs_W );
// Multiply V by R, overwriting V.
bli_dgemm( BLIS_NO_TRANSPOSE,
BLIS_NO_TRANSPOSE,
2*m_V,
m_V,
m_V,
&rone,
( double* )buff_W, rs_W, 2*cs_W,
buff_RH, rs_RH, cs_RH,
&rzero,
( double* )buff_V, rs_V, 2*cs_V );
// Make all the singular values positive.
{
int i;
double minus_one = bli_dm1();
for ( i = 0; i < min_m_n; ++i )
{
if ( buff_d[ (i )*inc_d ] < rzero )
{
buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
// Scale the right singular vectors.
bli_zdscalv( BLIS_NO_CONJUGATE,
m_V,
&minus_one,
buff_V + (i )*cs_V, rs_V );
}
}
}
return n_iter_prev;
}
1.7.6.1