|
libflame
revision_anchor
|
Functions | |
| void | bli_saxmyv2 (conj_t conjx, int n, float *alpha, float *beta, float *x, int inc_x, float *y, int inc_y, float *z, int inc_z) |
| void | bli_daxmyv2 (conj_t conjx, int n, double *alpha, double *beta, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z) |
| void bli_daxmyv2 | ( | conj_t | conjx, |
| int | n, | ||
| double * | alpha, | ||
| double * | beta, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | y, | ||
| int | inc_y, | ||
| double * | z, | ||
| int | inc_z | ||
| ) |
References bli_abort(), and v2df_t::v.
Referenced by FLA_Fused_UYx_ZVx_opd_var1().
{
double* restrict chi1;
double* restrict psi1;
double* restrict zeta1;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t a1v, b1v;
v2df_t x1v, y1v, z1v;
v2df_t x2v, y2v, z2v;
if ( inc_x != 1 ||
inc_y != 1 ||
inc_z != 1 ) bli_abort();
n_pre = 0;
if ( ( unsigned long ) z % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) y % 16 == 0 ) bli_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
chi1 = x;
psi1 = y;
zeta1 = z;
if ( n_pre == 1 )
{
double alpha_c = *alpha;
double beta_c = *beta;
double chi1_c = *chi1;
*psi1 -= alpha_c * chi1_c;
*zeta1 -= beta_c * chi1_c;
chi1 += inc_x;
psi1 += inc_y;
zeta1 += inc_z;
}
a1v.v = _mm_loaddup_pd( ( double* )alpha );
b1v.v = _mm_loaddup_pd( ( double* )beta );
for ( i = 0; i < n_run; ++i )
{
x1v.v = _mm_load_pd( ( double* )chi1 );
y1v.v = _mm_load_pd( ( double* )psi1 );
z1v.v = _mm_load_pd( ( double* )zeta1 );
x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
z2v.v = _mm_load_pd( ( double* )(zeta1 + 2) );
y1v.v = y1v.v - a1v.v * x1v.v;
z1v.v = z1v.v - b1v.v * x1v.v;
_mm_store_pd( ( double* )psi1, y1v.v );
_mm_store_pd( ( double* )zeta1, z1v.v );
y2v.v = y2v.v - a1v.v * x2v.v;
z2v.v = z2v.v - b1v.v * x2v.v;
_mm_store_pd( ( double* )(psi1 + 2), y2v.v );
_mm_store_pd( ( double* )(zeta1 + 2), z2v.v );
chi1 += 4;
psi1 += 4;
zeta1 += 4;
}
if ( n_left > 0 )
{
double alpha_c = *alpha;
double beta_c = *beta;
for( i = 0; i < n_left; ++i )
{
double chi1_c = *chi1;
*psi1 -= alpha_c * chi1_c;
*zeta1 -= beta_c * chi1_c;
chi1 += inc_x;
psi1 += inc_y;
zeta1 += inc_z;
}
}
}
| void bli_saxmyv2 | ( | conj_t | conjx, |
| int | n, | ||
| float * | alpha, | ||
| float * | beta, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | y, | ||
| int | inc_y, | ||
| float * | z, | ||
| int | inc_z | ||
| ) |
References bli_abort().
{
bli_abort();
}
1.7.6.1