|
libflame
revision_anchor
|
Functions | |
| FLA_Error | FLA_Fused_UZhu_ZUhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj w) |
| FLA_Error | FLA_Fused_UZhu_ZUhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w) |
| FLA_Error | FLA_Fused_UZhu_ZUhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w) |
| FLA_Error | FLA_Fused_UZhu_ZUhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w) |
| FLA_Error | FLA_Fused_UZhu_ZUhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w) |
| FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 | ( | int | m_U, |
| int | n_U, | ||
| scomplex * | buff_delta, | ||
| scomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| scomplex * | buff_Z, | ||
| int | rs_Z, | ||
| int | cs_Z, | ||
| scomplex * | buff_t, | ||
| int | inc_t, | ||
| scomplex * | buff_u, | ||
| int | inc_u, | ||
| scomplex * | buff_w, | ||
| int | inc_w | ||
| ) |
References bli_cdot(), BLIS_CONJUGATE, and F77_caxpy().
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var3().
{
int i;
for ( i = 0; i < n_U; ++i )
{
scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
scomplex* delta = buff_delta;
scomplex* tau1 = buff_t + (i )*inc_t;
scomplex* u = buff_u;
scomplex* w = buff_w;
scomplex alpha;
scomplex beta;
/*------------------------------------------------------------*/
bli_cdot( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
u, inc_u,
&alpha );
bli_cdot( BLIS_CONJUGATE,
m_U,
u1, rs_U,
u, inc_u,
&beta );
*tau1 = beta;
bli_cscals( delta, &alpha );
bli_cscals( delta, &beta );
// bli_caxpyv( BLIS_NO_CONJUGATE,
// m_U,
// &alpha,
// u1, rs_U,
// w, inc_w );
F77_caxpy( &m_U,
&alpha,
u1, &rs_U,
w, &inc_w );
// bli_caxpyv( BLIS_NO_CONJUGATE,
// m_U,
// &beta,
// z1, rs_U,
// w, inc_w );
F77_caxpy( &m_U,
&beta,
z1, &rs_Z,
w, &inc_w );
/*------------------------------------------------------------*/
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 | ( | int | m_U, |
| int | n_U, | ||
| double * | buff_delta, | ||
| double * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| double * | buff_Z, | ||
| int | rs_Z, | ||
| int | cs_Z, | ||
| double * | buff_t, | ||
| int | inc_t, | ||
| double * | buff_u, | ||
| int | inc_u, | ||
| double * | buff_w, | ||
| int | inc_w | ||
| ) |
References bli_d0(), bli_daxpyv(), bli_daxpyv2b(), bli_ddot(), bli_ddotaxpy(), bli_ddotsv2(), bli_ddotv2axpyv2b(), BLIS_CONJUGATE, and BLIS_NO_CONJUGATE.
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var3().
{
double zero = bli_d0();
int n_run = n_U / 2;
int n_left = n_U % 2;
int step_u = 2*cs_U;
int step_z = 2*cs_Z;
int step_tau = 2*inc_t;
int i;
double* u = buff_u;
double* w = buff_w;
//double* delta = buff_delta;
double* u1;
double* u2;
double* u3;
double* z1;
double* z2;
double* z3;
double* tau1;
double* tau2;
double* tau3;
u1 = buff_U;
u2 = buff_U + cs_U;
u3 = buff_U + 2*cs_U;
z1 = buff_Z;
z2 = buff_Z + cs_Z;
z3 = buff_Z + 2*cs_Z;
tau1 = buff_t;
tau2 = buff_t + inc_t;
tau3 = buff_t + 2*inc_t;
for ( i = 0; i < n_run; ++i )
{
double rho_z1u;
double rho_z2u;
//double rho_z3u;
double rho_u1u;
double rho_u2u;
//double rho_u3u;
/*------------------------------------------------------------*/
/*
bli_ddotsv3( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
z2, rs_Z,
z3, rs_Z,
u, inc_u,
&zero,
&rho_z1u,
&rho_z2u,
&rho_z3u );
bli_dneg1( &rho_z1u );
bli_dneg1( &rho_z2u );
bli_dneg1( &rho_z3u );
bli_ddotv2axpyv2b( m_U,
u1, rs_U,
u2, rs_U,
u, inc_u,
&rho_z1u,
&rho_z2u,
&rho_u1u,
&rho_u2u,
w, inc_w );
bli_ddotaxpy( m_U,
u3, rs_U,
u, inc_u,
&rho_z3u,
&rho_u3u,
w, inc_w );
*tau1 = rho_u1u;
*tau2 = rho_u2u;
*tau3 = rho_u3u;
bli_dneg1( &rho_u1u );
bli_dneg1( &rho_u2u );
bli_dneg1( &rho_u3u );
bli_daxpyv3b( m_U,
&rho_u1u,
&rho_u2u,
&rho_u3u,
z1, rs_Z,
z2, rs_Z,
z3, rs_Z,
w, inc_w );
*/
bli_ddotsv2( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
z2, rs_Z,
u, inc_u,
&zero,
&rho_z1u,
&rho_z2u );
bli_dneg1( &rho_z1u );
bli_dneg1( &rho_z2u );
bli_ddotv2axpyv2b( m_U,
u1, rs_U,
u2, rs_U,
u, inc_u,
&rho_z1u,
&rho_z2u,
&rho_u1u,
&rho_u2u,
w, inc_w );
*tau1 = rho_u1u;
*tau2 = rho_u2u;
bli_dneg1( &rho_u1u );
bli_dneg1( &rho_u2u );
bli_daxpyv2b( m_U,
&rho_u1u,
&rho_u2u,
z1, rs_Z,
z2, rs_Z,
w, inc_w );
/*------------------------------------------------------------*/
u1 += step_u;
u2 += step_u;
u3 += step_u;
z1 += step_z;
z2 += step_z;
z3 += step_z;
tau1 += step_tau;
tau2 += step_tau;
tau3 += step_tau;
}
if ( n_left > 0 )
{
for ( i = 0; i < n_left; ++i )
{
double rho_z1u;
double rho_u1u;
bli_ddot( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
u, inc_u,
&rho_z1u );
bli_dneg1( &rho_z1u );
bli_ddotaxpy( m_U,
u1, rs_U,
u, inc_u,
&rho_z1u,
&rho_u1u,
w, inc_w );
*tau1 = rho_u1u;
bli_dneg1( &rho_u1u );
bli_daxpyv( BLIS_NO_CONJUGATE,
m_U,
&rho_u1u,
z1, rs_Z,
w, inc_w );
u1 += cs_U;
z1 += cs_Z;
tau1 += inc_t;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 | ( | int | m_U, |
| int | n_U, | ||
| float * | buff_delta, | ||
| float * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| float * | buff_Z, | ||
| int | rs_Z, | ||
| int | cs_Z, | ||
| float * | buff_t, | ||
| int | inc_t, | ||
| float * | buff_u, | ||
| int | inc_u, | ||
| float * | buff_w, | ||
| int | inc_w | ||
| ) |
References F77_saxpy(), and F77_sdot().
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var3().
{
int i;
for ( i = 0; i < n_U; ++i )
{
float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
float* delta = buff_delta;
float* tau1 = buff_t + (i )*inc_t;
float* u = buff_u;
float* w = buff_w;
float alpha;
float beta;
/*------------------------------------------------------------*/
// bli_sdot( BLIS_CONJUGATE,
// m_U,
// z1, rs_Z,
// u, inc_u,
// &alpha );
alpha = F77_sdot( &m_U,
z1, &rs_Z,
u, &inc_u );
// bli_sdot( BLIS_CONJUGATE,
// m_U,
// u1, rs_U,
// u, inc_u,
// &beta );
beta = F77_sdot( &m_U,
u1, &rs_U,
u, &inc_u );
*tau1 = beta;
// bli_sscals( delta, &alpha );
// bli_sscals( delta, &beta );
alpha *= *delta;
beta *= *delta;
// bli_saxpyv( BLIS_NO_CONJUGATE,
// m_U,
// &alpha,
// u1, rs_U,
// w, inc_w );
F77_saxpy( &m_U,
&alpha,
u1, &rs_U,
w, &inc_w );
// bli_saxpyv( BLIS_NO_CONJUGATE,
// m_U,
// &beta,
// z1, rs_U,
// w, inc_w );
F77_saxpy( &m_U,
&beta,
z1, &rs_Z,
w, &inc_w );
/*------------------------------------------------------------*/
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 | ( | FLA_Obj | delta, |
| FLA_Obj | U, | ||
| FLA_Obj | Z, | ||
| FLA_Obj | t, | ||
| FLA_Obj | u, | ||
| FLA_Obj | w | ||
| ) |
References FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
{
/*
Effective computation:
w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
t = U' u;
*/
FLA_Datatype datatype;
int m_U, n_U;
int rs_U, cs_U;
int rs_Z, cs_Z;
int inc_u, inc_w, inc_t;
datatype = FLA_Obj_datatype( U );
m_U = FLA_Obj_length( U );
n_U = FLA_Obj_width( U );
rs_U = FLA_Obj_row_stride( U );
cs_U = FLA_Obj_col_stride( U );
rs_Z = FLA_Obj_row_stride( Z );
cs_Z = FLA_Obj_col_stride( Z );
inc_u = FLA_Obj_vector_inc( u );
inc_w = FLA_Obj_vector_inc( w );
inc_t = FLA_Obj_vector_inc( t );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_U = FLA_FLOAT_PTR( U );
float* buff_Z = FLA_FLOAT_PTR( Z );
float* buff_t = FLA_FLOAT_PTR( t );
float* buff_u = FLA_FLOAT_PTR( u );
float* buff_w = FLA_FLOAT_PTR( w );
float* buff_delta = FLA_FLOAT_PTR( delta );
FLA_Fused_UZhu_ZUhu_ops_var1( m_U,
n_U,
buff_delta,
buff_U, rs_U, cs_U,
buff_Z, rs_Z, cs_Z,
buff_t, inc_t,
buff_u, inc_u,
buff_w, inc_w );
break;
}
case FLA_DOUBLE:
{
double* buff_U = FLA_DOUBLE_PTR( U );
double* buff_Z = FLA_DOUBLE_PTR( Z );
double* buff_t = FLA_DOUBLE_PTR( t );
double* buff_u = FLA_DOUBLE_PTR( u );
double* buff_w = FLA_DOUBLE_PTR( w );
double* buff_delta = FLA_DOUBLE_PTR( delta );
FLA_Fused_UZhu_ZUhu_opd_var1( m_U,
n_U,
buff_delta,
buff_U, rs_U, cs_U,
buff_Z, rs_Z, cs_Z,
buff_t, inc_t,
buff_u, inc_u,
buff_w, inc_w );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_U = FLA_COMPLEX_PTR( U );
scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
scomplex* buff_t = FLA_COMPLEX_PTR( t );
scomplex* buff_u = FLA_COMPLEX_PTR( u );
scomplex* buff_w = FLA_COMPLEX_PTR( w );
scomplex* buff_delta = FLA_COMPLEX_PTR( delta );
FLA_Fused_UZhu_ZUhu_opc_var1( m_U,
n_U,
buff_delta,
buff_U, rs_U, cs_U,
buff_Z, rs_Z, cs_Z,
buff_u, inc_u,
buff_t, inc_t,
buff_w, inc_w );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t );
dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );
FLA_Fused_UZhu_ZUhu_opz_var1( m_U,
n_U,
buff_delta,
buff_U, rs_U, cs_U,
buff_Z, rs_Z, cs_Z,
buff_t, inc_t,
buff_u, inc_u,
buff_w, inc_w );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 | ( | int | m_U, |
| int | n_U, | ||
| dcomplex * | buff_delta, | ||
| dcomplex * | buff_U, | ||
| int | rs_U, | ||
| int | cs_U, | ||
| dcomplex * | buff_Z, | ||
| int | rs_Z, | ||
| int | cs_Z, | ||
| dcomplex * | buff_t, | ||
| int | inc_t, | ||
| dcomplex * | buff_u, | ||
| int | inc_u, | ||
| dcomplex * | buff_w, | ||
| int | inc_w | ||
| ) |
References bli_zaxpyv(), bli_zdot(), bli_zdotaxpy(), BLIS_CONJUGATE, and BLIS_NO_CONJUGATE.
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var3().
{
//dcomplex zero = bli_z0();
int n_run = n_U / 1;
int n_left = n_U % 1;
int step_u = 1*cs_U;
int step_z = 1*cs_Z;
int step_tau = 1*inc_t;
int i;
dcomplex* u = buff_u;
dcomplex* w = buff_w;
//dcomplex* delta = buff_delta;
dcomplex* u1;
dcomplex* u2;
dcomplex* z1;
dcomplex* z2;
dcomplex* tau1;
dcomplex* tau2;
u1 = buff_U;
u2 = buff_U + cs_U;
z1 = buff_Z;
z2 = buff_Z + cs_Z;
tau1 = buff_t;
tau2 = buff_t + inc_t;
for ( i = 0; i < n_run; ++i )
{
dcomplex rho_z1u;
//dcomplex rho_z2u;
dcomplex rho_u1u;
//dcomplex rho_u2u;
/*------------------------------------------------------------*/
/*
Effective computation:
w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
*/
/*
bli_zdotsv2( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
u1, rs_U,
u, inc_u,
&zero,
&rho_z1u,
&rho_u1u );
*tau1 = rho_u1u;
//bli_zscals( delta, &rho_z1u );
//bli_zscals( delta, &rho_u1u );
bli_zneg1( &rho_z1u );
bli_zneg1( &rho_u1u );
bli_zaxpyv2b( m_U,
&rho_z1u,
&rho_u1u,
u1, rs_U,
z1, rs_Z,
w, inc_w );
*/
/*
bli_zdotsv2( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
z2, rs_Z,
u, inc_u,
&zero,
&rho_z1u,
&rho_z2u );
bli_zneg1( &rho_z1u );
bli_zneg1( &rho_z2u );
bli_zdotv2axpyv2b( m_U,
u1, rs_U,
u2, rs_U,
u, inc_u,
&rho_z1u,
&rho_z2u,
&rho_u1u,
&rho_u2u,
w, inc_w );
*tau1 = rho_u1u;
*tau2 = rho_u2u;
bli_zneg1( &rho_u1u );
bli_zneg1( &rho_u2u );
bli_zaxpyv2b( m_U,
&rho_u1u,
&rho_u2u,
z1, rs_Z,
z2, rs_Z,
w, inc_w );
*/
bli_zdot( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
u, inc_u,
&rho_z1u );
bli_zneg1( &rho_z1u );
bli_zdotaxpy( m_U,
u1, rs_U,
u, inc_u,
&rho_z1u,
&rho_u1u,
w, inc_w );
*tau1 = rho_u1u;
bli_zneg1( &rho_u1u );
bli_zaxpyv( BLIS_NO_CONJUGATE,
m_U,
&rho_u1u,
z1, rs_Z,
w, inc_w );
/*------------------------------------------------------------*/
u1 += step_u;
u2 += step_u;
z1 += step_z;
z2 += step_z;
tau1 += step_tau;
tau2 += step_tau;
}
if ( n_left == 1 )
{
dcomplex rho_z1u;
dcomplex rho_u1u;
bli_zdot( BLIS_CONJUGATE,
m_U,
z1, rs_Z,
u, inc_u,
&rho_z1u );
bli_zneg1( &rho_z1u );
bli_zdotaxpy( m_U,
u1, rs_U,
u, inc_u,
&rho_z1u,
&rho_u1u,
w, inc_w );
*tau1 = rho_u1u;
bli_zneg1( &rho_u1u );
bli_zaxpyv( BLIS_NO_CONJUGATE,
m_U,
&rho_u1u,
z1, rs_Z,
w, inc_w );
}
return FLA_SUCCESS;
}
1.7.6.1