|
libflame
revision_anchor
|
Functions | |
| FLA_Error | FLA_Apply_G_rf_asm_var6b (FLA_Obj G, FLA_Obj A) |
| FLA_Error | FLA_Apply_G_rf_ass_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asd_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asc_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error | FLA_Apply_G_rf_asz_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
| FLA_Error FLA_Apply_G_rf_asc_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asd_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bld_var6b().
{
double one = bli_d1();
double zero = bli_d0();
double gamma12;
double sigma12;
double gamma23;
double sigma23;
double* a1;
double* a2;
double* a3;
dcomplex* g12;
dcomplex* g23;
int i, j, g, k;
int nG, nG_app;
int n_iter;
int n_left;
int k_minus_1;
int n_fuse;
int is_ident12, is_ident23;
int m_app;
k_minus_1 = k_G - 1;
nG = n_A - 1;
n_fuse = 2;
// Use the simple variant for nG < (k - 1) or k == 1.
if ( nG < k_minus_1 || k_G == 1 )
{
FLA_Apply_G_rf_asd_var1( k_G,
m_A,
n_A,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
return FLA_SUCCESS;
}
// Start-up phase.
for ( j = 0; j < k_minus_1; ++j )
{
nG_app = j + 1;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Pipeline stage
for ( j = k_minus_1; j < nG; ++j )
{
nG_app = k_G;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = min( i_k + 2 + j - iTL, m_A );
m_app = max( m_app, 0 );
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
// Shutdown stage
for ( j = 1; j < k_G; ++j )
{
nG_app = k_G - j;
n_iter = nG_app / n_fuse;
n_left = nG_app % n_fuse;
for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
{
g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a1 = buff_A + (g - 1)*cs_A;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma12 = g12->real;
sigma12 = g12->imag;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident12 = ( gamma12 == one && sigma12 == zero );
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = m_A;
if ( !is_ident12 && is_ident23 )
{
// Apply only to columns 1 and 2.
MAC_Apply_G_mx2_asd( m_app,
&gamma12,
&sigma12,
a1, 1,
a2, 1 );
}
else if ( is_ident12 && !is_ident23 )
{
// Apply only to columns 2 and 3.
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
else if ( !is_ident12 && !is_ident23 )
{
// Apply to all three columns.
MAC_Apply_G_mx3b_asd( m_app,
&gamma12,
&sigma12,
&gamma23,
&sigma23,
a1, 1,
a2, 1,
a3, 1 );
}
}
//for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
if ( n_left == 1 )
{
g23 = buff_G + (g )*rs_G + (k )*cs_G;
a2 = buff_A + (g )*cs_A;
a3 = buff_A + (g + 1)*cs_A;
gamma23 = g23->real;
sigma23 = g23->imag;
is_ident23 = ( gamma23 == one && sigma23 == zero );
m_app = m_A;
if ( !is_ident23 )
MAC_Apply_G_mx2_asd( m_app,
&gamma23,
&sigma23,
a2, 1,
a3, 1 );
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asm_var6b | ( | FLA_Obj | G, |
| FLA_Obj | A | ||
| ) |
References FLA_Apply_G_rf_asc_var6b(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_ass_var6b(), FLA_Apply_G_rf_asz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{
FLA_Datatype datatype;
int k_G, m_A, n_A;
int rs_G, cs_G;
int rs_A, cs_A;
datatype = FLA_Obj_datatype( A );
k_G = FLA_Obj_width( G );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_G = FLA_Obj_row_stride( G );
cs_G = FLA_Obj_col_stride( G );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
FLA_Apply_G_rf_ass_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
FLA_Apply_G_rf_asd_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
FLA_Apply_G_rf_asc_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
FLA_Apply_G_rf_asz_var6b( k_G,
m_A,
n_A,
0,
0,
buff_G, rs_G, cs_G,
buff_A, rs_A, cs_A );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_ass_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| scomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bls_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
| FLA_Error FLA_Apply_G_rf_asz_var6b | ( | int | k_G, |
| int | m_A, | ||
| int | n_A, | ||
| int | i_k, | ||
| int | iTL, | ||
| dcomplex * | buff_G, | ||
| int | rs_G, | ||
| int | cs_G, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A | ||
| ) |
Referenced by FLA_Apply_G_rf_asm_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
1.7.6.1