From 1e960f237e3856a716c0db92362a405051f2f68a Mon Sep 17 00:00:00 2001 From: huiscliu Date: Mon, 8 Jan 2024 16:44:28 -0800 Subject: [PATCH] add LU for coarsest level solver --- example/amg.c | 2 +- include/amg-coarse-solver.h | 2 +- include/amg-type-def.h | 31 +++++++++++ include/utils.h | 9 ++++ src/.mat.c.swp | Bin 16384 -> 0 bytes src/amg-coarse-solver.c | 100 +++++++++++++++++++++++++--------- src/amg-cycle.c | 2 +- src/amg-utils.c | 3 ++ src/internal.h | 28 ---------- src/utils.c | 105 ++++++++++++++++++++++++++++++++++++ 10 files changed, 225 insertions(+), 57 deletions(-) delete mode 100644 src/.mat.c.swp diff --git a/example/amg.c b/example/amg.c index b064279..450942c 100644 --- a/example/amg.c +++ b/example/amg.c @@ -10,7 +10,7 @@ int main(void) char *mat_file = "A.dat"; SX_RTN rtn; SX_INT prob = 3; - SX_INT ncx = 23, ncy = 13, ncz = 14; + SX_INT ncx = 130, ncy = 130, ncz = 14; SX_INT nglobal = 0; /* create distributed matrix */ diff --git a/include/amg-coarse-solver.h b/include/amg-coarse-solver.h index 2c2b76d..42bd38e 100644 --- a/include/amg-coarse-solver.h +++ b/include/amg-coarse-solver.h @@ -10,7 +10,7 @@ extern "C" { #endif -void sx_amg_coarest_solve(SX_MAT *A, SX_VEC *b, SX_VEC *x, const SX_FLT ctol, const SX_INT verb); +void sx_amg_coarest_solve(SX_AMG_COMP *cg, const SX_FLT ctol, const SX_INT verb); #ifdef __cplusplus } diff --git a/include/amg-type-def.h b/include/amg-type-def.h index b74e417..acc6963 100644 --- a/include/amg-type-def.h +++ b/include/amg-type-def.h @@ -194,6 +194,9 @@ typedef struct SX_AMG_COMP_ SX_VEC wp; /** cache work space */ + SX_FLT *LU; + int *pvt; + } SX_AMG_COMP; /** @@ -225,4 +228,32 @@ typedef struct SX_KRYLOV_ } SX_KRYLOV; +typedef enum +{ + ERROR_OPEN_FILE = -10, /**< fail to open a file */ + ERROR_WRONG_FILE = -11, /**< input contains wrong format */ + ERROR_INPUT_PAR = -12, /**< wrong input argument */ + ERROR_MAT_SIZE = -13, /**< wrong problem size */ + ERROR_MISC = -14, /**< other error */ + + ERROR_ALLOC_MEM = -20, /**< fail to allocate memory */ + ERROR_DATA_STRUCTURE = -21, /**< problem with data structures */ + ERROR_DATA_ZERODIAG = -22, /**< matrix has zero diagonal entries */ + ERROR_DUMMY_VAR = -23, /**< unexpected input data */ + + ERROR_AMG_INTERP_TYPE = -30, /**< unknown interpolation type */ + ERROR_AMG_SMOOTH_TYPE = -31, /**< unknown smoother type */ + ERROR_AMG_COARSE_TYPE = -32, /**< unknown coarsening type */ + ERROR_AMG_COARSEING = -33, /**< coarsening step failed to complete */ + + ERROR_SOLVER_STAG = -42, /**< solver stagnates */ + ERROR_SOLVER_SOLSTAG = -43, /**< solver's solution is too small */ + ERROR_SOLVER_TOLSMALL = -44, /**< solver's tolerance is too small */ + ERROR_SOLVER_MAXIT = -48, /**< maximal iteration number exceeded */ + ERROR_SOLVER_EXIT = -49, /**< solver does not quit successfully */ + + ERROR_UNKNOWN = -99, /**< an unknown error type */ + +} SX_ERROR_CODE; + #endif diff --git a/include/utils.h b/include/utils.h index 44b378f..f184af7 100644 --- a/include/utils.h +++ b/include/utils.h @@ -21,6 +21,15 @@ SX_FLT sx_get_mem(void); void sx_exit_on_errcode(const SX_INT status, const char *fctname); +/* LU factorization */ +int sx_mat_dense_lu(int n, SX_FLT *A, int pvt[]); + +/* solves L*U*X = B */ +void sx_mat_dense_sv(int n, SX_FLT *A, int pvt[], int m, SX_FLT *B); + +/* solves AX = B, returns 1 if successful and 0 if A is singular */ +int sx_mat_dense_solve(int n, int m, SX_FLT *A, SX_FLT *B); + #ifdef __cplusplus } #endif diff --git a/src/.mat.c.swp b/src/.mat.c.swp deleted file mode 100644 index d98298c2ed1a2b1a5a91d7db5f1e0a5a0819662c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeI2ZEPIH8OMh-l-B?ON~tQukRWn*_W30NLE|{pH7ZghC$3_R8CMV4>to<_lLul zQ#&vm2F2l;6AcyDZp-Fc1*`(6tH1`aZR=<})i-atMqYO5%;_4k^{fI`0jq#jz$#!B zunJfOtO8bnGf;u3zFypk-|LL;C(QY*p7SYl{NAKL)|3B*$-m#EZ#M=~J3lb#KQ`wV zdh$)OJy`{;0#*U5fK|XMU=^?mSOu&CRspMkRlq9n9Vph8n-h-FnA$S1p1|M7~!wg&o8(6MAFa{a;;9Mczf!E>h za1x$@N8us36AnWiJgC8ya0Q$TpPfUS@HV^%|A4399=IEBhZ*=8Y=fW63ue}qTi zC>()5zynYQ1zTYl&W3gH#o0o<32(sP;8}PSj=@nl3{^M?Q&51P!%o-^8{s1O=XzoX zPQYK_INSqwK^YFfB#grjxE3yjjqo{}MeoDA@D98M$KVJ|z;3t>&V&CDx9@_E-?wxO z_Z~7gEM$&9d0X8HLRpa<1<#MlDv-*Hf<@_ive_=+>H?FV>YLfrnZNQ~FQRtjORPvI z2%N>VjrqV;3o>jv^*GPXM`h`FC0WrIs;X)x)!m-^N>&W#azgx0XtxXborRVhmwRU> zCdT5Fur*gxwYh~rsZ5-k9V5z3(aM@dE1gB_YZk4iMcoCNDai4}gRBfx)CjyWaY)4d z!x@)<0GcbQQ2)BHe>nz{j3Z&=Tn4m`gBGq+aqk+hp(pVcOYwrKe4K*Lmi?f;UNnqBpIh- zeKAuQDzpZr#`Bh#)SH&09!rnAJF4&Zj_3ZY(JS%x+NzJRS;#ES=X4rPCV$$tMugQiJ%?YDMzyMkePDKAw3_N0mOf!7|j&} zvgtz9GqQytuTh)Bi;x~IJCmf+>o_%WFiE3z zomMlwXb**s4Kmv6o2oL|5A_U~DU~j{xoj*u%V5!|R{f$Ln8A*)>Ijs6!`9V$_ma6< z>$=pp)H;39YwxPoYC4Mh^d^=hjS3xz<8f4U#H<$wXCk{PSTjj+Wj)B9!4lu3OL%tx zn|P=#M~2tF@|L<5_cStaL_Z)s)r_NQG|H03jB~&+&t?& z-Jx~?f~isSI`V7X`8u%DM?mu6*cDH~uleXmrRl52@>s1Z$LdSq_cu|@9R5F3rE3MY zlcj5+?VKz6y6;l|jc3XkH&IrT;ww%Q)#5$8{jxS+b;7y$b$l*VQKswN*c(J$6UVK= z<*X#-4yVdPadm6O`dY@?h8Ojtwa1LRmP7ibKiA=ozVGXdvC`KOW5h&HE_PzG>lLew zl9HR^ZxxS!!7pzT?Nr_~bb`>||C4-ozX1CCKmC3FCBE?=hX)~oNyx+1@FTbw*2BN~ z);|tSn1LJNC-5ou{tIt{-UE0IUIg9F^I#9FfK|XMU=^?mSOu&CRspMkRlq7>6*w&g zl5NOz`)qo})@Xa1Qg3;6-|3XXG&_m2mHlic9@Kk=$;O|~Ne^A3GJ8;$a98FYBssOQ zGFR_->fe_tv-SNetuOe2%;;@QcU~QEy)jvrJEW_;(kPb$1A2!%bwZ!bXKqs17_QH{ z`v;Rc?G)V-JL75HxEYu1kumAg0u2oexT*ENo3-6Kb>C^TX^%0whH9-uwfF}%1ZaDd LdiFg?yR7V&;>g}; diff --git a/src/amg-coarse-solver.c b/src/amg-coarse-solver.c index 0ea8a93..7cfe277 100644 --- a/src/amg-coarse-solver.c +++ b/src/amg-coarse-solver.c @@ -582,43 +582,91 @@ static SX_INT sx_solver_gmres(SX_KRYLOV *ks) } /** - * \fn static void sx_amg_coarest_solve(SX_MAT *A, SX_VEC *b, SX_VEC *x, - * const SX_FLT ctol, const SX_INT verb) + * \fn static void sx_amg_coarest_solve(SX_AMG_COMP *cg, const SX_FLT ctol, const SX_INT verb) * * \brief Iterative on the coarset level * - * \pars A pointer to matrix data - * \pars b pointer to rhs data - * \pars x pointer to sol data - * \pars ctol tolerance for the coarsest level + * \pars cg pointer to matrix data + * \pars ctol tolerance for the coarsest level * \pars verb level of output * */ -void sx_amg_coarest_solve(SX_MAT *A, SX_VEC *b, SX_VEC *x, const SX_FLT ctol, const SX_INT verb) +void sx_amg_coarest_solve(SX_AMG_COMP *cg, const SX_FLT ctol, const SX_INT verb) { + SX_MAT *A = &cg->A; + SX_VEC *b = &cg->b; + SX_VEC *x = &cg->x; const SX_INT n = A->num_rows; const SX_INT maxit = SX_MAX(250, SX_MIN(n * n, 1000)); + int use_krylov = 0; - SX_INT status; - SX_KRYLOV ks; - - /* try cg first */ - ks.A = A; - ks.b = b; - ks.u = x; - ks.tol = ctol; - ks.maxit = maxit; - ks.stop_type = 1; - ks.verb = 0; - status = sx_solver_cg(&ks); - - /* try GMRES if cg fails */ - if (status < 0) { - ks.restart = MAX_RESTART; - status = sx_solver_gmres(&ks); + /* init */ + if (!use_krylov) { + if (cg->LU == NULL) { + SX_INT i, k, j, ibegin, iend; + int rtn; + + cg->LU = sx_calloc(n * (size_t) n, sizeof(*cg->LU)); + cg->pvt = sx_calloc(n, sizeof(*cg->pvt)); + + /* init LU */ + for (i = 0; i < n; ++i) { + ibegin = A->Ap[i]; + iend = A->Ap[i + 1]; + for (k = ibegin; k < iend; ++k) { + j = A->Aj[k]; + + cg->LU[i * n + j] = A->Ax[k]; + } + } + + /* LU factorization */ + rtn = sx_mat_dense_lu(n, cg->LU, cg->pvt); + + /* failed */ + if (!rtn) { + use_krylov = 1; + + sx_free(cg->LU); + sx_free(cg->pvt); + cg->LU = NULL; + cg->pvt = NULL; + } + } + } + + if (use_krylov) { + SX_INT status; + SX_KRYLOV ks; + + /* try cg first */ + ks.A = A; + ks.b = b; + ks.u = x; + ks.tol = ctol; + ks.maxit = maxit; + ks.stop_type = 1; + ks.verb = 0; + status = sx_solver_cg(&ks); + + /* try GMRES if cg fails */ + if (status < 0) { + ks.restart = MAX_RESTART; + status = sx_solver_gmres(&ks); + } + + if (status < 0 && verb >= 2) { + sx_printf("### WARNING: Coarse level solver failed to converge!\n"); + } } + else { + assert(cg->LU != NULL); + assert(cg->pvt != NULL); + + /* init x */ + memcpy(x->d, b->d, n * sizeof(*x->d)); - if (status < 0 && verb >= 2) { - sx_printf("### WARNING: Coarse level solver failed to converge!\n"); + /* solve */ + sx_mat_dense_sv(n, cg->LU, cg->pvt, 1, x->d); } } diff --git a/src/amg-cycle.c b/src/amg-cycle.c index c40359e..1ac8369 100644 --- a/src/amg-cycle.c +++ b/src/amg-cycle.c @@ -59,7 +59,7 @@ void sx_amg_cycle(SX_AMG *mg) } // call the coarse space solver: - sx_amg_coarest_solve(&mg->cg[nl - 1].A, &mg->cg[nl - 1].b, &mg->cg[nl - 1].x, tol, verb); + sx_amg_coarest_solve(&mg->cg[nl - 1], tol, verb); // BackwardSweep: while (l > 0) { diff --git a/src/amg-utils.c b/src/amg-utils.c index 25ed632..463ea3b 100644 --- a/src/amg-utils.c +++ b/src/amg-utils.c @@ -58,6 +58,9 @@ void sx_amg_data_destroy(SX_AMG *mg) sx_vec_destroy(&mg->cg[i].wp); sx_ivec_destroy(&mg->cg[i].cfmark); + + sx_free(mg->cg[i].LU); + sx_free(mg->cg[i].pvt); } sx_free(mg->cg); diff --git a/src/internal.h b/src/internal.h index 2f754cf..651ee97 100644 --- a/src/internal.h +++ b/src/internal.h @@ -46,34 +46,6 @@ typedef enum } SX_STOP_TYPE; -typedef enum -{ - ERROR_OPEN_FILE = -10, /**< fail to open a file */ - ERROR_WRONG_FILE = -11, /**< input contains wrong format */ - ERROR_INPUT_PAR = -12, /**< wrong input argument */ - ERROR_MAT_SIZE = -13, /**< wrong problem size */ - ERROR_MISC = -14, /**< other error */ - - ERROR_ALLOC_MEM = -20, /**< fail to allocate memory */ - ERROR_DATA_STRUCTURE = -21, /**< problem with data structures */ - ERROR_DATA_ZERODIAG = -22, /**< matrix has zero diagonal entries */ - ERROR_DUMMY_VAR = -23, /**< unexpected input data */ - - ERROR_AMG_INTERP_TYPE = -30, /**< unknown interpolation type */ - ERROR_AMG_SMOOTH_TYPE = -31, /**< unknown smoother type */ - ERROR_AMG_COARSE_TYPE = -32, /**< unknown coarsening type */ - ERROR_AMG_COARSEING = -33, /**< coarsening step failed to complete */ - - ERROR_SOLVER_STAG = -42, /**< solver stagnates */ - ERROR_SOLVER_SOLSTAG = -43, /**< solver's solution is too small */ - ERROR_SOLVER_TOLSMALL = -44, /**< solver's tolerance is too small */ - ERROR_SOLVER_MAXIT = -48, /**< maximal iteration number exceeded */ - ERROR_SOLVER_EXIT = -49, /**< solver does not quit successfully */ - - ERROR_UNKNOWN = -99, /**< an unknown error type */ - -} SX_ERROR_CODE; - #ifdef __cplusplus extern "C" { #endif diff --git a/src/utils.c b/src/utils.c index 9fa8b39..5e7d6c7 100644 --- a/src/utils.c +++ b/src/utils.c @@ -342,3 +342,108 @@ void sx_iarray_cp(const SX_INT n, SX_INT *x, SX_INT *y) memcpy(y, x, n * sizeof(SX_INT)); } + +/* LU factorization */ +int sx_mat_dense_lu(int n, SX_FLT *a0, int pvt[]) +{ + int i, j, k; + SX_FLT d, r; + +#define a(i,j) (a0[(i) * n + (j)]) + + for (i = 0; i < n; i++) { + d = fabs(a(i,i)); + k = i; + for (j = i + 1; j < n; j++) { + if ((r = fabs(a(j,i))) > d) { + d = r; + k = j; + } + } + + if (d == 0.0) { + return 0; + } + + pvt[i] = k; + if (k != i) { + /* exchange row i and row k */ + for (j = i; j < n; j++) { + d = a(i,j); + a(i,j) = a(k,j); + a(k,j) = d; + } + } + + if ((d = a(i,i)) != 1.0) { + a(i,i) = d = 1.0 / d; + for (j = i + 1; j < n; j++) a(i,j) *= d; + } + + for (j = i + 1; j < n; j++) { + if ((d = a(j,i)) == 0.0) continue; + + for (k = i + 1; k < n; k++) a(j,k) -= d * a(i,k); + } + } + +#undef a + + return 1; +} + +/* solves L*U*X = B */ +void sx_mat_dense_sv(int n, SX_FLT *a0, int pvt[], int m, SX_FLT *b0) +{ + int i, j, k; + SX_FLT d; + +#define a(i,j) (a0[(i) * n + (j)]) +#define b(i,j) (b0[(i) * m + (j)]) + + for (i = 0; i < n; i++) { + k = pvt[i]; + if (k != i) { + /* exchange row i with row k */ + for (j = 0; j < m; j++) { + d = b(i,j); + b(i,j) = b(k,j); + b(k,j) = d; + } + } + + if ((d = a(i,i)) != 1.0) { + for (j = 0; j < m; j++) + b(i,j) *= d; + } + + for (j = i + 1; j < n; j++) { + if ((d = a(j,i)) == 0.0) continue; + + for (k = 0; k < m; k++) b(j,k) -= d * b(i,k); + } + } + + for (i = n - 2; i >= 0; i--) { + for (j = i + 1; j < n; j++) { + d = a(i,j); + for (k = 0; k < m; k++) b(i,k) -= d * b(j,k); + } + } + +#undef a +#undef b +} + +/* solves AX = B, returns 1 if successful and 0 if A is singular */ +int sx_mat_dense_solve(int n, int m, SX_FLT *a0, SX_FLT *b0) +{ + int *pvt = malloc(n * sizeof(*pvt)); + + if (sx_mat_dense_lu(n, a0, pvt) == 0) return 0; + + sx_mat_dense_sv(n, a0, pvt, m, b0); + free(pvt); + + return 1; +}