|
libflame
revision_anchor
|
Go to the source code of this file.
| FLA_Error FLA_Sylv_nn_blk_var1 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 - A12 * C20 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C20, FLA_ONE, C10,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C21 = sylv( A22, B11, C21 -/+ C20 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C20, B01, FLA_ONE, C21,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var10 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR - sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) * BTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR - sylv( ABR, BTL, CBL ) * BTR
/*------------------------------------------------------------*/
// C21 = sylv( A22, B11, C21 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C02 = C02 -/+ C01 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B12, FLA_ONE, C02,
FLA_Cntl_sub_gemm4( cntl ) );
// C12 = C12 -/+ C11 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm5( cntl ) );
// C22 = C22 -/+ C21 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var11 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL - ATR * sylv( ABR, BTL, CBL )
// CTR = CTR - ATR * sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
// CBL = sylv( ABR, BTL, CBL )
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C00 = C00 - A01 * C10;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C10, FLA_ONE, C00,
FLA_Cntl_sub_gemm1( cntl ) );
// C11 = sylv( A11, B11, C11 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C01 = C01 - A01 * C11;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm3( cntl ) );
// C12 = sylv( A11, B22, C12 -/+ C10 * B02 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C02 = C02 - A01 * C12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C12, FLA_ONE, C02,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var12 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR - sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) * BTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C11 = sylv( A11, B11, C11 - A12 * C21 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C02 = C02 -/+ C01 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B12, FLA_ONE, C02,
FLA_Cntl_sub_gemm4( cntl ) );
// C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C22, FLA_ONE, C12,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var13 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR - ATR * sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
// CBL = sylv( ABR, BTL, CBL )
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C11 = sylv( A11, B11, C11 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 -/+ C00 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C12 = sylv( A11, B22, C12 -/+ C10 * B02 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C02 = C02 - A01 * C12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C12, FLA_ONE, C02,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var14 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR - ATR * sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
// - sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) ) * BTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C11 = sylv( A11, B11, C11 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C12 = sylv( A11, B22, C12 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C02 = C02 - A01 * C12 -/+ C01 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C01, B12, FLA_ONE, C02,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C12, FLA_ONE, C02,
FLA_Cntl_sub_gemm4( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var15 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj CT, C0,
CB, C1,
C2;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x1( C, &CT,
&CB, 0, FLA_BOTTOM );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x1_to_3x1( CT, &C0,
&C1,
/* ** */ /* ** */
CB, &C2, b, FLA_TOP );
// Loop Invariant:
// CT = CT
// CB = sylv( ABR, B, CB )
/*------------------------------------------------------------*/
// C1 = sylv( A11, B, C1 - A12 * C2 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C2, FLA_ONE, C1,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x1_to_2x1( &CT, C0,
/* ** */ /* ** */
C1,
&CB, C2, FLA_BOTTOM );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var16 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj CT, C0,
CB, C1,
C2;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x1( C, &CT,
&CB, 0, FLA_BOTTOM );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x1_to_3x1( CT, &C0,
&C1,
/* ** */ /* ** */
CB, &C2, b, FLA_TOP );
// Loop Invariant:
// CT = CT - ATR * sylv( ABR, B, CB )
// CB = sylv( ABR, B, CB )
/*------------------------------------------------------------*/
// C1 = sylv( A11, B, C1 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C0 = C0 - A01 * C1;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C1, FLA_ONE, C0,
FLA_Cntl_sub_gemm1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x1_to_2x1( &CT, C0,
/* ** */ /* ** */
C1,
&CB, C2, FLA_BOTTOM );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var17 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CL, CR, C0, C1, C2;
dim_t b;
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT );
while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){
b = FLA_Determine_blocksize( CR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2,
b, FLA_RIGHT );
// Loop Invariant:
// CL =
// CR =
/*------------------------------------------------------------*/
// C1 = sylv( A, B11, C1 -/+ C0 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C0, B01, FLA_ONE, C1,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A, B11, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2,
FLA_LEFT );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var18 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CL, CR, C0, C1, C2;
dim_t b;
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT );
while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){
b = FLA_Determine_blocksize( CR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2,
b, FLA_RIGHT );
// Loop Invariant:
// CL =
// CR =
/*------------------------------------------------------------*/
// C1 = sylv( A, B11, C1 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A, B11, C1, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C2 = C2 -/+ C1 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C1, B12, FLA_ONE, C2,
FLA_Cntl_sub_gemm1( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2,
FLA_LEFT );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var2 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL - ATR * sylv( ABR, BTL, CBL)
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C00 = C00 - A01 * C10;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C10, FLA_ONE, C00,
FLA_Cntl_sub_gemm1( cntl ) );
// C21 = sylv( A22, B11, C21 -/+ C20 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C20, B01, FLA_ONE, C21,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C01 = C01 - A01 * C11 - A02 * C21;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var3 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR
/*------------------------------------------------------------*/
// C21 = sylv( A22, B11, C21 -/+ C20 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C20, B01, FLA_ONE, C21,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 -/+ C00 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var4 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR - sylv( ABR, BTL, CBL ) * BTR
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 - A12 * C20 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C20, FLA_ONE, C10,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C21 = sylv( A22, B11, C21 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C12 = C12 -/+ C10 * B02 -/+ C11 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm5( cntl ) );
// C22 = C22 -/+ C21 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22,
FLA_Cntl_sub_gemm6( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var5 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 - A12 * C20 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C20, FLA_ONE, C10,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C22, FLA_ONE, C12,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var6 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL - ATR * sylv( ABR, BTL, CBL )
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR - sylv( ABR, BTL, CBL ) * BTR
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C00 = C00 - A01 * C10;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C10, FLA_ONE, C00,
FLA_Cntl_sub_gemm1( cntl ) );
// C21 = sylv( A22, B11, C21 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C01 = C01 - A01 * C11 - A02 * C21;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm5( cntl ) );
// C12 = C12 -/+ C10 * B02 -/+ C11 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm7( cntl ) );
// C22 = C22 -/+ C21 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22,
FLA_Cntl_sub_gemm8( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var7 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = CBR - sylv( ABR, BTL, CBL ) * BTR
/*------------------------------------------------------------*/
// C21 = sylv( A22, B11, C21 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A22, B11, C21, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 -/+ C00 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv3( cntl ) );
// C12 = C12 -/+ C10 * B02 -/+ C11 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm7( cntl ) );
// C22 = C22 -/+ C21 * B12;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C21, B12, FLA_ONE, C22,
FLA_Cntl_sub_gemm8( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var8 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = CTL - ATR * sylv( ABR, BTL, CBL )
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL )
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C10 = sylv( A11, B00, C10 );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B00, C10, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C00 = C00 - A01 * C10;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C10, FLA_ONE, C00,
FLA_Cntl_sub_gemm1( cntl ) );
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C01 = C01 - A01 * C11 - A02 * C21;
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm5( cntl ) );
// C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm7( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C22, FLA_ONE, C12,
FLA_Cntl_sub_gemm8( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_blk_var9 | ( | FLA_Obj | isgn, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | C, | ||
| FLA_Obj | scale, | ||
| fla_sylv_t * | cntl | ||
| ) |
References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().
Referenced by FLA_Sylv_nn().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj BTL, BTR, B00, B01, B02,
BBL, BBR, B10, B11, B12,
B20, B21, B22;
FLA_Obj CTL, CTR, C00, C01, C02,
CBL, CBR, C10, C11, C12,
C20, C21, C22;
dim_t b;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_BR );
FLA_Part_2x2( B, &BTL, &BTR,
&BBL, &BBR, 0, 0, FLA_TL );
FLA_Part_2x2( C, &CTL, &CTR,
&CBL, &CBR, 0, 0, FLA_BL );
while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){
b = FLA_Determine_blocksize( CTR, FLA_TR, FLA_Cntl_blocksize( cntl ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02,
&A10, &A11, /**/ &A12,
/* ************* */ /* ******************** */
ABL, /**/ ABR, &A20, &A21, /**/ &A22,
b, b, FLA_TL );
FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02,
/* ************* */ /* ******************** */
&B10, /**/ &B11, &B12,
BBL, /**/ BBR, &B20, /**/ &B21, &B22,
b, b, FLA_BR );
FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02,
&C10, /**/ &C11, &C12,
/* ************* */ /* ******************** */
CBL, /**/ CBR, &C20, /**/ &C21, &C22,
b, b, FLA_TR );
// Loop Invariant:
// CTL = sylv( ATL, BTL, CTL - ATR * sylv( ABR, BTL, CBL ) )
// CTR = CTR
// CBL = sylv( ABR, BTL, CBL)
// CBR = sylv( ABR, BBR, CBR - sylv( ABR, BTL, CBL ) * BTR )
/*------------------------------------------------------------*/
// C11 = sylv( A11, B11, C11 - A12 * C21 -/+ C10 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B01, FLA_ONE, C11,
FLA_Cntl_sub_gemm1( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C21, FLA_ONE, C11,
FLA_Cntl_sub_gemm2( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B11, C11, scale,
FLA_Cntl_sub_sylv1( cntl ) );
// C01 = sylv( A00, B11, C01 - A01 * C11 - A02 * C21 -/+ C00 * B01 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C00, B01, FLA_ONE, C01,
FLA_Cntl_sub_gemm3( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A02, C21, FLA_ONE, C01,
FLA_Cntl_sub_gemm4( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A01, C11, FLA_ONE, C01,
FLA_Cntl_sub_gemm5( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A00, B11, C01, scale,
FLA_Cntl_sub_sylv2( cntl ) );
// C12 = sylv( A11, B22, C12 - A12 * C22 -/+ C10 * B02 -/+ C11 * B12 );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C11, B12, FLA_ONE, C12,
FLA_Cntl_sub_gemm6( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_NEGATE( isgn ), C10, B02, FLA_ONE, C12,
FLA_Cntl_sub_gemm7( cntl ) );
FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A12, C22, FLA_ONE, C12,
FLA_Cntl_sub_gemm8( cntl ) );
FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
isgn, A11, B22, C12, scale,
FLA_Cntl_sub_sylv3( cntl ) );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02,
/* ************** */ /* ****************** */
A10, /**/ A11, A12,
&ABL, /**/ &ABR, A20, /**/ A21, A22,
FLA_BR );
FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02,
B10, B11, /**/ B12,
/* ************** */ /* ****************** */
&BBL, /**/ &BBR, B20, B21, /**/ B22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02,
/* ************** */ /* ****************** */
C10, C11, /**/ C12,
&CBL, /**/ &CBR, C20, C21, /**/ C22,
FLA_BL );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_opc_var1 | ( | float | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| scomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| scomplex * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| scomplex * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| scomplex * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_cdot(), BLIS_NO_CONJUGATE, scomplex::imag, and scomplex::real.
Referenced by FLA_Sylv_nn_opt_var1().
{
int l, k;
for ( l = 0; l < n_C; l++ )
{
for ( k = m_C - 1; k >= 0; k-- )
{
scomplex* a12t = buff_A + (k+1)*cs_A + (k )*rs_A;
scomplex* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
scomplex* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
scomplex* c21 = buff_C + (l )*cs_C + (k+1)*rs_C;
scomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
scomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
scomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
scomplex suml;
scomplex sumr;
scomplex vec;
scomplex a11;
scomplex x11;
int m_behind = m_C - k - 1;
int n_behind = l;
/*------------------------------------------------------------*/
bli_cdot( BLIS_NO_CONJUGATE,
m_behind,
a12t, cs_A,
c21, rs_C,
&suml );
bli_cdot( BLIS_NO_CONJUGATE,
n_behind,
c10t, cs_C,
b01, rs_B,
&sumr );
vec.real = ckl->real - ( suml.real + sgn * sumr.real );
vec.imag = ckl->imag - ( suml.imag + sgn * sumr.imag );
a11.real = alpha11->real + sgn * beta11->real;
a11.imag = alpha11->imag + sgn * beta11->imag;
bli_cdiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_opd_var1 | ( | double | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| double * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| double * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| double * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| double * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_ddot(), and BLIS_NO_CONJUGATE.
Referenced by FLA_Sylv_nn_opt_var1().
{
int l, k;
for ( l = 0; l < n_C; l++ )
{
for ( k = m_C - 1; k >= 0; k-- )
{
double* a12t = buff_A + (k+1)*cs_A + (k )*rs_A;
double* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
double* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
double* c21 = buff_C + (l )*cs_C + (k+1)*rs_C;
double* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
double* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
double* ckl = buff_C + (l )*cs_C + (k )*rs_C;
double suml;
double sumr;
double vec;
double a11;
double x11;
int m_behind = m_C - k - 1;
int n_behind = l;
/*------------------------------------------------------------*/
bli_ddot( BLIS_NO_CONJUGATE,
m_behind,
a12t, cs_A,
c21, rs_C,
&suml );
bli_ddot( BLIS_NO_CONJUGATE,
n_behind,
c10t, cs_C,
b01, rs_B,
&sumr );
vec = (*ckl) - ( suml + sgn * sumr );
a11 = (*alpha11) + sgn * (*beta11);
bli_ddiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Sylv_nn_ops_var1 | ( | float | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| float * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| float * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| float * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| float * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_sdot(), and BLIS_NO_CONJUGATE.
Referenced by FLA_Sylv_nn_opt_var1().
{
int l, k;
for ( l = 0; l < n_C; l++ )
{
for ( k = m_C - 1; k >= 0; k-- )
{
float* a12t = buff_A + (k+1)*cs_A + (k )*rs_A;
float* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
float* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
float* c21 = buff_C + (l )*cs_C + (k+1)*rs_C;
float* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
float* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
float* ckl = buff_C + (l )*cs_C + (k )*rs_C;
float suml;
float sumr;
float vec;
float a11;
float x11;
int m_behind = m_C - k - 1;
int n_behind = l;
/*------------------------------------------------------------*/
bli_sdot( BLIS_NO_CONJUGATE,
m_behind,
a12t, cs_A,
c21, rs_C,
&suml );
bli_sdot( BLIS_NO_CONJUGATE,
n_behind,
c10t, cs_C,
b01, rs_B,
&sumr );
vec = (*ckl) - ( suml + sgn * sumr );
a11 = (*alpha11) + sgn * (*beta11);
bli_sdiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Sylv_nn_opc_var1(), FLA_Sylv_nn_opd_var1(), FLA_Sylv_nn_ops_var1(), and FLA_Sylv_nn_opz_var1().
Referenced by FLA_Sylv_nn(), FLA_Sylv_nn_opt_var10(), FLA_Sylv_nn_opt_var11(), FLA_Sylv_nn_opt_var12(), FLA_Sylv_nn_opt_var13(), FLA_Sylv_nn_opt_var14(), FLA_Sylv_nn_opt_var15(), FLA_Sylv_nn_opt_var16(), FLA_Sylv_nn_opt_var17(), FLA_Sylv_nn_opt_var18(), FLA_Sylv_nn_opt_var2(), FLA_Sylv_nn_opt_var3(), FLA_Sylv_nn_opt_var4(), FLA_Sylv_nn_opt_var5(), FLA_Sylv_nn_opt_var6(), FLA_Sylv_nn_opt_var7(), FLA_Sylv_nn_opt_var8(), and FLA_Sylv_nn_opt_var9().
{
FLA_Datatype datatype;
int m_C, n_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
int info;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
switch ( datatype )
{
case FLA_FLOAT:
{
int* buff_isgn = FLA_INT_PTR( isgn );
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_B = FLA_FLOAT_PTR( B );
float* buff_C = FLA_FLOAT_PTR( C );
float* buff_scale = FLA_FLOAT_PTR( scale );
float sgn = ( float ) *buff_isgn;
FLA_Sylv_nn_ops_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
case FLA_DOUBLE:
{
int* buff_isgn = FLA_INT_PTR( isgn );
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_B = FLA_DOUBLE_PTR( B );
double* buff_C = FLA_DOUBLE_PTR( C );
double* buff_scale = FLA_DOUBLE_PTR( scale );
double sgn = ( double ) *buff_isgn;
FLA_Sylv_nn_opd_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
case FLA_COMPLEX:
{
int* buff_isgn = FLA_INT_PTR( isgn );
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_B = FLA_COMPLEX_PTR( B );
scomplex* buff_C = FLA_COMPLEX_PTR( C );
scomplex* buff_scale = FLA_COMPLEX_PTR( scale );
float sgn = ( float ) *buff_isgn;
FLA_Sylv_nn_opc_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
case FLA_DOUBLE_COMPLEX:
{
int* buff_isgn = FLA_INT_PTR( isgn );
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex* buff_scale = FLA_DOUBLE_COMPLEX_PTR( scale );
double sgn = ( double ) *buff_isgn;
FLA_Sylv_nn_opz_var1( sgn,
m_C,
n_C,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_C, rs_C, cs_C,
buff_scale,
&info );
break;
}
}
return FLA_SUCCESS;
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
References FLA_Sylv_nn_opt_var1().
{
return FLA_Sylv_nn_opt_var1( isgn, A, B, C, scale );
}
| FLA_Error FLA_Sylv_nn_opz_var1 | ( | double | sgn, |
| int | m_C, | ||
| int | n_C, | ||
| dcomplex * | buff_A, | ||
| int | rs_A, | ||
| int | cs_A, | ||
| dcomplex * | buff_B, | ||
| int | rs_B, | ||
| int | cs_B, | ||
| dcomplex * | buff_C, | ||
| int | rs_C, | ||
| int | cs_C, | ||
| dcomplex * | buff_scale, | ||
| int * | info | ||
| ) |
References bli_zdot(), BLIS_NO_CONJUGATE, dcomplex::imag, and dcomplex::real.
Referenced by FLA_Sylv_nn_opt_var1().
{
int l, k;
for ( l = 0; l < n_C; l++ )
{
for ( k = m_C - 1; k >= 0; k-- )
{
dcomplex* a12t = buff_A + (k+1)*cs_A + (k )*rs_A;
dcomplex* b01 = buff_B + (l )*cs_B + (0 )*rs_B;
dcomplex* c10t = buff_C + (0 )*cs_C + (k )*rs_C;
dcomplex* c21 = buff_C + (l )*cs_C + (k+1)*rs_C;
dcomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
dcomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
dcomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
dcomplex suml;
dcomplex sumr;
dcomplex vec;
dcomplex a11;
dcomplex x11;
int m_behind = m_C - k - 1;
int n_behind = l;
/*------------------------------------------------------------*/
bli_zdot( BLIS_NO_CONJUGATE,
m_behind,
a12t, cs_A,
c21, rs_C,
&suml );
bli_zdot( BLIS_NO_CONJUGATE,
n_behind,
c10t, cs_C,
b01, rs_B,
&sumr );
vec.real = ckl->real - ( suml.real + sgn * sumr.real );
vec.imag = ckl->imag - ( suml.imag + sgn * sumr.imag );
a11.real = alpha11->real + sgn * beta11->real;
a11.imag = alpha11->imag + sgn * beta11->imag;
bli_zdiv3( &vec, &a11, &x11 );
*ckl = x11;
/*------------------------------------------------------------*/
}
}
return FLA_SUCCESS;
}
1.7.6.1