Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
lvanroye committed Jun 19, 2024
1 parent 5ba6e18 commit 8059687
Showing 1 changed file with 0 additions and 112 deletions.
112 changes: 0 additions & 112 deletions fatrop/blasfeo_wrapper/LinearAlgebraBlasfeo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,118 +36,6 @@ namespace fatrop
}
}

// void fatrop_potrf_l_mn(fatrop_int m, fatrop_int n, struct blasfeo_dmat *sC, fatrop_int ci, fatrop_int cj, struct blasfeo_dmat *sD, fatrop_int di, fatrop_int dj)
// {
// blasfeo_dpotrf_l_mn(m, n, sC, ci, cj, sD, di, dj);
// fatrop_int minmn = (m < n) ? m : n;
// for (fatrop_int i =0; i<minmn; i++){
// assert(MATEL(sD, di+i,dj+i)>0);
// }
// }
/** \brief D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal, fatrop uses its own (naive) implementation since it not implemented yet in blasfeo*/
void fatrop_dtrsm_rlnn(fatrop_int m, fatrop_int n, double alpha, MAT *sA, fatrop_int offs_ai, fatrop_int offs_aj, MAT *sB, fatrop_int offs_bi, fatrop_int offs_bj, MAT *sD, fatrop_int offs_di, fatrop_int offs_dj)
{
sD->use_dA = 0;
for (fatrop_int aj = n - 1; aj >= 0; aj--)
{
double ajj = MATEL(sA, offs_ai + aj, aj + offs_aj);
double inv_ajj = 1.0 / ajj;
double scjj = alpha * inv_ajj;
for (fatrop_int k = 0; k < m; k++)
{
MATEL(sD, offs_di + k, offs_dj + aj) = scjj * MATEL(sB, offs_bi + k, offs_bj + aj);
}
for (fatrop_int ai = aj + 1; ai < n; ai++)
{
double sc = -inv_ajj * MATEL(sA, offs_ai + ai, offs_aj + aj);
for (fatrop_int k = 0; k < m; k++)
{
// this algorithm is "store bounded", the loops can be switched like in the alt version to make this more efficient
MATEL(sD, offs_di + k, offs_dj + aj) += sc * MATEL(sD, offs_di + k, offs_dj + ai);
}
}
}
}
// /** \brief D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal, fatrop uses its own (naive) implementation since it not implemented yet in blasfeo*/
// // this is an experimental, more efficient, but the corner cases are not treated in unrolled loop!! it achieves a speed-up of about factor 3 w.r.t naive implementation
void fatrop_dtrsm_rlnn_alt(fatrop_int m, fatrop_int n, double alpha, MAT *sA, fatrop_int offs_ai, fatrop_int offs_aj, MAT *sB, fatrop_int offs_bi, fatrop_int offs_bj, MAT *sD, fatrop_int offs_di, fatrop_int offs_dj)
{
for (fatrop_int aj = n - 1; aj >= 0; aj--)
{
double ajj = MATEL(sA, offs_ai + aj, aj + offs_aj);
double inv_ajj = 1.0 / ajj;
double scjj = alpha * inv_ajj;
for (fatrop_int k = 0; k < m; k++)
{
// todo, check if possible to incude in main loop
MATEL(sD, offs_di + k, offs_dj + aj) = scjj * MATEL(sB, offs_bi + k, offs_bj + aj);
}
for (fatrop_int k = 0; k < m; k++)
{
double res = 0.0;
// double res4 = 0.0;
// double res5 = 0.0;
// double res6 = 0.0;
// double res7 = 0.0;
for (fatrop_int ai = aj + 1; ai < n; ai = ai + 1)
{
// todo unroll loop -> more independent operations -> filled pipelines
double sc = -inv_ajj * MATEL(sA, offs_ai + ai, offs_aj + aj);
res += sc * MATEL(sD, offs_di + k, offs_dj + ai);
}
MATEL(sD, offs_di + k, offs_dj + aj) += res;
}
}
}
// /** \brief D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal, fatrop uses its own (naive) implementation since it not implemented yet in blasfeo*/
// // this is an experimental, more efficient, but the corner cases are not treated in unrolled loop!! it achieves a speed-up of about factor 3 w.r.t naive implementation
// void fatrop_dtrsm_rlnn_alt(fatrop_int m, fatrop_int n, double alpha, MAT *sA, fatrop_int offs_ai, fatrop_int offs_aj, MAT *sB, fatrop_int offs_bi, fatrop_int offs_bj, MAT *sD, fatrop_int offs_di, fatrop_int offs_dj)
// {
// for (fatrop_int aj = n - 1; aj >= 0; aj--)
// {
// double ajj = MATEL(sA, offs_ai + aj, aj + offs_aj);
// double inv_ajj = 1.0 / ajj;
// double scjj = alpha * inv_ajj;
// for (fatrop_int k = 0; k < m; k++)
// {
// // todo, check if possible to incude in main loop
// MATEL(sD, offs_di + k, offs_dj + aj) = scjj * MATEL(sB, offs_bi + k, offs_bj + aj);
// }
// for (fatrop_int k = 0; k < m; k++)
// {
// double res = 0.0;
// double res1 = 0.0;
// double res2 = 0.0;
// double res3 = 0.0;
// // double res4 = 0.0;
// // double res5 = 0.0;
// // double res6 = 0.0;
// // double res7 = 0.0;
// for (fatrop_int ai = aj + 1; ai < n; ai = ai + 4)
// {
// // todo unroll loop -> more independent operations -> filled pipelines
// double sc = -inv_ajj * MATEL(sA, offs_ai + ai, offs_aj + aj);
// res += sc * MATEL(sD, offs_di + k, offs_dj + ai);
// double sc1 = -inv_ajj * MATEL(sA, offs_ai + ai + 1, offs_aj + aj);
// res1 += sc1 * MATEL(sD, offs_di + k, offs_dj + ai + 1);
// double sc2 = -inv_ajj * MATEL(sA, offs_ai + ai + 2, offs_aj + aj);
// res2 += sc2 * MATEL(sD, offs_di + k, offs_dj + ai + 2);
// double sc3 = -inv_ajj * MATEL(sA, offs_ai + ai + 3, offs_aj + aj);
// res3 += sc3 * MATEL(sD, offs_di + k, offs_dj + ai + 3);
// // double sc4 = -inv_ajj * MATEL(sA, offs_ai + ai+4, offs_aj + aj);
// // res4 += sc * MATEL(sD, offs_di + k, offs_dj + ai+4);
// // double sc5 = -inv_ajj * MATEL(sA, offs_ai + ai+5, offs_aj + aj);
// // res5 += sc * MATEL(sD, offs_di + k, offs_dj + ai+5);
// // double sc6 = -inv_ajj * MATEL(sA, offs_ai + ai+6, offs_aj + aj);
// // res6 += sc * MATEL(sD, offs_di + k, offs_dj + ai+6);
// // double sc7 = -inv_ajj * MATEL(sA, offs_ai + ai+7, offs_aj + aj);
// // res7 += sc * MATEL(sD, offs_di + k, offs_dj + ai+7);
// }
// // MATEL(sD, offs_di + k, offs_dj + aj) += (res+res1+res2+res3+res4+res5+res6+res7);
// MATEL(sD, offs_di + k, offs_dj + aj) += (res + res1) + (res2 + res3);
// }
// }
// }
// B <= B + alpha*A^T (B is mxn)
void fatrop_dgead_transposed(fatrop_int m, fatrop_int n, double alpha, struct blasfeo_dmat *sA, fatrop_int offs_ai, fatrop_int offs_aj, struct blasfeo_dmat *sB, fatrop_int offs_bi, fatrop_int offs_bj)
{
Expand Down

0 comments on commit 8059687

Please sign in to comment.