|
libflame
revision_anchor
|
Go to the source code of this file.
| FLA_Error FLA_Sylv_hh_blk_var1 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 - A01' * C02 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C02, FLA_ONE, C12,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C01 = sylv( A00', B11', C01 -/+ C02 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C02, B12, FLA_ONE, C01,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var10 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C01 = sylv( A00', B11', C01 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C20 = C20 -/+ C21 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B01, FLA_ONE, C20,
FLA_Cntl_sub_gemm4( cntl ) );
// C10 = C10 -/+ C11 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm5( cntl ) );
// C00 = C00 -/+ C01 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var11 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C22 = C22 - A12' * C12;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
FLA_Cntl_sub_gemm1( cntl ) );
// C11 = sylv( A11', B11', C11 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C21 = C21 - A12' * C11;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm3( cntl ) );
// C10 = sylv( A11', B00', C10 -/+ C12 * B02' -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C20 = C20 - A12' * C10;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C10, FLA_ONE, C20,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var12 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C11 = sylv( A11', B11', C11 - A01' * C01 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C20 = C20 -/+ C21 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B01, FLA_ONE, C20,
FLA_Cntl_sub_gemm4( cntl ) );
// C10 = sylv( A11', B00', C10 - A01' * C00 -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var13 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C11 = sylv( A11', B11', C11 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C21 = sylv( A22', B11', C21 - A12' * C11 -/+ C22 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C10 = sylv( A11', B00', C10 -/+ C12 * B02' -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C20 = C20 - A12' * C10;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C10, FLA_ONE, C20,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var14 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C11 = sylv( A11', B11', C11 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C21 = sylv( A22', B11', C21 - A12' * C11 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C10 = sylv( A11', B00', C10 -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C20 = C20 - A12' * C10 -/+ C21 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B01, FLA_ONE, C20,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C10, FLA_ONE, C20,
FLA_Cntl_sub_gemm4( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var15 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj CT, C0,
CB, C1,
C2;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x1( C, &CT,
&CB, 0, FLA_TOP );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x1_to_3x1( CT, &C0,
/* ** */ /* ** */
&C1,
CB, &C2, b, FLA_BOTTOM );
// Loop Invariant:
// CT = sylv( ATL', B', CT )
// CB = CB
/*------------------------------------------------------------*/
// C1 = sylv( A11', B', C1 - A01' * C0 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C0, FLA_ONE, C1,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &CT, C0,
C1,
/* ** */ /* ** */
&CB, C2, FLA_TOP );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var16 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj CT, C0,
CB, C1,
C2;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x1( C, &CT,
&CB, 0, FLA_TOP );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x1_to_3x1( CT, &C0,
/* ** */ /* ** */
&C1,
CB, &C2, b, FLA_BOTTOM );
// Loop Invariant:
// CT = sylv( ATL', B', CT )
// CB = CB - ATR' * sylv( ATL', B', CT )
/*------------------------------------------------------------*/
// C1 = sylv( A11', B', C1 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C2 = C2 - A12' * C1;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C1, FLA_ONE, C2,
FLA_Cntl_sub_gemm1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &CT, C0,
C1,
/* ** */ /* ** */
&CB, C2, FLA_TOP );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var17 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CL, CR, C0, C1, C2;
dim_t b;
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT );
while ( FLA_Obj_length( BBR ) < FLA_Obj_length( B ) ){
b = FLA_Determine_blocksize( CL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2,
b, FLA_LEFT );
// Loop Invariant:
// CL =
// CR =
/*------------------------------------------------------------*/
// C1 = sylv( A', B11', C1 -/+ C2 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C2, B12, FLA_ONE, C1,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A, B11, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2,
FLA_RIGHT );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var18 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CL, CR, C0, C1, C2;
dim_t b;
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT );
while ( FLA_Obj_length( BBR ) < FLA_Obj_length( B ) ){
b = FLA_Determine_blocksize( CL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2,
b, FLA_LEFT );
// Loop Invariant:
// CL =
// CR =
/*------------------------------------------------------------*/
// C1 = sylv( A', B11', C1 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A, B11, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C0 = C0 -/+ C1 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C1, B01, FLA_ONE, C0,
FLA_Cntl_sub_gemm1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2,
FLA_RIGHT );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var2 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C22 = C22 - A12' * C12;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
FLA_Cntl_sub_gemm1( cntl ) );
// C01 = sylv( A00', B11', C01 -/+ C02 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C02, B12, FLA_ONE, C01,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C21 = C21 - A12' * C11 - A02' * C01;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var3 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C01 = sylv( A00', B11', C01 -/+ C02 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C02, B12, FLA_ONE, C01,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 -/+ C22 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var4 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 - A01' * C02 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C02, FLA_ONE, C12,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C01 = sylv( A00', B11', C01 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C10 = C10 -/+ C12 * B02' -/+ C11 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm5( cntl ) );
// C00 = C00 -/+ C01 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var5 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 - A01' * C02 );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C02, FLA_ONE, C12,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C10 = sylv( A11', B00', C10 - A01' * C00 -/+ C12 * B02' -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var6 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C22 = C22 - A12' * C12;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
FLA_Cntl_sub_gemm1( cntl ) );
// C01 = sylv( A00', B11', C01 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C21 = C21 - A12' * C11 - A02' * C01;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm5( cntl ) );
// C10 = C10 -/+ C12 * B02' -/+ C11 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm7( cntl ) );
// C00 = C00 -/+ C01 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
FLA_Cntl_sub_gemm8( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var7 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C01 = sylv( A00', B11', C01 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 -/+ C22 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C10 = C10 -/+ C12 * B02' -/+ C11 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm7( cntl ) );
// C00 = C00 -/+ C01 * B01';
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
FLA_Cntl_sub_gemm8( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var8 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C12 = sylv( A11', B22', C12 );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C22 = C22 - A12' * C12;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
FLA_Cntl_sub_gemm1( cntl ) );
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C21 = C21 - A12' * C11 - A02' * C01;
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm5( cntl ) );
// C10 = sylv( A11', B00, C10 - A01' * C00 -/+ C12 * B02' -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm7( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
FLA_Cntl_sub_gemm8( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_blk_var9 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_hh().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_BR );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_TR );
while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
&B10, &B11, /**/ &B12,
/* ************* */ /* ******************** */
BBL, /**/ BBR, &B20, &B21, /**/ &B22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
/* ************* */ /* ******************** */
&C10, &C11, /**/ &C12,
CBL, /**/ CBR, &C20, &C21, /**/ &C22,
b, b, FLA_BL );
// Loop Invariant:
// CTL =
// CTR =
// CBL =
// CBR =
/*------------------------------------------------------------*/
// C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C21 = sylv( A22, B11', C21 - A12' * C11 - A02' * C01 -/+ C22 * B12' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C10 = sylv( A11', B00', C10 - A01' * C00 -/+ C12 * B02' -/+ C11 * B01' );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
FLA_Cntl_sub_gemm7( cntl ) );
FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
FLA_Cntl_sub_gemm8( cntl ) );
FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
/* ************** */ /* ****************** */
B10, /**/ B11, B12,
&BBL, /**/ &BBR, B20, /**/ B21, B22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
C10, /**/ C11, C12,
/* ************** */ /* ****************** */
&CBL, /**/ &CBR, C20, /**/ C21, C22,
FLA_TR );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_opc_var1 | ( | float | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| scomplex * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| scomplex * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| scomplex * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_cdot(), BLIS_CONJUGATE, scomplex::imag, and scomplex::real.
Referenced by FLA_Sylv_hh_opt_var1().
{
int l, k;
for ( l = n_C - 1; l >= 0; l-- )
{
for ( k = 0; k < m_C; k++ )
{
scomplex* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
scomplex* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
scomplex* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
scomplex* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
scomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
scomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
scomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
scomplex suml;
scomplex sumr;
scomplex vec;
scomplex a11;
scomplex x11;
int m_behind = k;
int n_behind = n_C - l - 1;
/*------------------------------------------------------------*/
bli_cdot( BLIS_CONJUGATE,
m_behind,
a01, rs_A,
c01, rs_C,
&suml );
bli_cdot( BLIS_CONJUGATE,
n_behind,
c12t, cs_C,
b12t, cs_B,
&sumr );
vec.real = ckl->real - ( suml.real + sgn * sumr.real );
vec.imag = ckl->imag - ( suml.imag + sgn * -sumr.imag );
a11.real = alpha11->real + sgn * beta11->real;
a11.imag = -alpha11->imag + sgn * -beta11->imag;
bli_cdiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_opd_var1 | ( | double | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| double * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| double * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| double * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_ddot(), and BLIS_CONJUGATE.
Referenced by FLA_Sylv_hh_opt_var1().
{
int l, k;
for ( l = n_C - 1; l >= 0; l-- )
{
for ( k = 0; k < m_C; k++ )
{
double* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
double* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
double* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
double* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
double* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
double* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
double* ckl = buff_C + (l )*cs_C + (k )*rs_C;
double suml;
double sumr;
double vec;
double a11;
double x11;
int m_behind = k;
int n_behind = n_C - l - 1;
/*------------------------------------------------------------*/
bli_ddot( BLIS_CONJUGATE,
m_behind,
a01, rs_A,
c01, rs_C,
&suml );
bli_ddot( BLIS_CONJUGATE,
n_behind,
c12t, cs_C,
b12t, cs_B,
&sumr );
vec = (*ckl) - ( suml + sgn * sumr );
a11 = (*alpha11) + sgn * (*beta11);
bli_ddiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_hh_ops_var1 | ( | float | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| float * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| float * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| float * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_sdot(), and BLIS_CONJUGATE.
Referenced by FLA_Sylv_hh_opt_var1().
{
int l, k;
for ( l = n_C - 1; l >= 0; l-- )
{
for ( k = 0; k < m_C; k++ )
{
float* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
float* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
float* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
float* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
float* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
float* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
float* ckl = buff_C + (l )*cs_C + (k )*rs_C;
float suml;
float sumr;
float vec;
float a11;
float x11;
int m_behind = k;
int n_behind = n_C - l - 1;
/*------------------------------------------------------------*/
bli_sdot( BLIS_CONJUGATE,
m_behind,
a01, rs_A,
c01, rs_C,
&suml );
bli_sdot( BLIS_CONJUGATE,
n_behind,
c12t, cs_C,
b12t, cs_B,
&sumr );
vec = (*ckl) - ( suml + sgn * sumr );
a11 = (*alpha11) + sgn * (*beta11);
bli_sdiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Sylv_hh_opc_var1(), FLA_Sylv_hh_opd_var1(), FLA_Sylv_hh_ops_var1(), and FLA_Sylv_hh_opz_var1().
Referenced by FLA_Sylv_hh(), FLA_Sylv_hh_opt_var10(), FLA_Sylv_hh_opt_var11(), FLA_Sylv_hh_opt_var12(), FLA_Sylv_hh_opt_var13(), FLA_Sylv_hh_opt_var14(), FLA_Sylv_hh_opt_var15(), FLA_Sylv_hh_opt_var16(), FLA_Sylv_hh_opt_var17(), FLA_Sylv_hh_opt_var18(), FLA_Sylv_hh_opt_var2(), FLA_Sylv_hh_opt_var3(), FLA_Sylv_hh_opt_var4(), FLA_Sylv_hh_opt_var5(), FLA_Sylv_hh_opt_var6(), FLA_Sylv_hh_opt_var7(), FLA_Sylv_hh_opt_var8(), and FLA_Sylv_hh_opt_var9().
{
FLA_Datatype datatype;
int m_C, n_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
int info;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
switch ( datatype )
{
case FLA_FLOAT:
{
int* buff_isgn = FLA_INT_PTR( isgn );
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_B = FLA_FLOAT_PTR( B );
float* buff_C = FLA_FLOAT_PTR( C );
float* buff_scale = FLA_FLOAT_PTR( scale );
float sgn = ( float ) *buff_isgn;
FLA_Sylv_hh_ops_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
case FLA_DOUBLE:
{
int* buff_isgn = FLA_INT_PTR( isgn );
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_B = FLA_DOUBLE_PTR( B );
double* buff_C = FLA_DOUBLE_PTR( C );
double* buff_scale = FLA_DOUBLE_PTR( scale );
double sgn = ( double ) *buff_isgn;
FLA_Sylv_hh_opd_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
case FLA_COMPLEX:
{
int* buff_isgn = FLA_INT_PTR( isgn );
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_B = FLA_COMPLEX_PTR( B );
scomplex* buff_C = FLA_COMPLEX_PTR( C );
scomplex* buff_scale = FLA_COMPLEX_PTR( scale );
float sgn = ( float ) *buff_isgn;
FLA_Sylv_hh_opc_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
case FLA_DOUBLE_COMPLEX:
{
int* buff_isgn = FLA_INT_PTR( isgn );
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex* buff_scale = FLA_DOUBLE_COMPLEX_PTR( scale );
double sgn = ( double ) *buff_isgn;
FLA_Sylv_hh_opz_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
}
return FLA_SUCCESS;
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_hh_opt_var1().
{
return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
}
| FLA_Error FLA_Sylv_hh_opz_var1 | ( | double | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| dcomplex * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| dcomplex * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| dcomplex * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_zdot(), BLIS_CONJUGATE, dcomplex::imag, and dcomplex::real.
Referenced by FLA_Sylv_hh_opt_var1().
{
int l, k;
for ( l = n_C - 1; l >= 0; l-- )
{
for ( k = 0; k < m_C; k++ )
{
dcomplex* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
dcomplex* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
dcomplex* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
dcomplex* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
dcomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
dcomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
dcomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
dcomplex suml;
dcomplex sumr;
dcomplex vec;
dcomplex a11;
dcomplex x11;
int m_behind = k;
int n_behind = n_C - l - 1;
/*------------------------------------------------------------*/
bli_zdot( BLIS_CONJUGATE,
m_behind,
a01, rs_A,
c01, rs_C,
&suml );
bli_zdot( BLIS_CONJUGATE,
n_behind,
c12t, cs_C,
b12t, cs_B,
&sumr );
vec.real = ckl->real - ( suml.real + sgn * sumr.real );
vec.imag = ckl->imag - ( suml.imag + sgn * -sumr.imag );
a11.real = alpha11->real + sgn * beta11->real;
a11.imag = -alpha11->imag + sgn * -beta11->imag;
bli_zdiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
1.7.6.1