21#include "fasp_functs.h"
27#include "PreMGUtil.inl"
52 const INT nb = diag->
nb;
78 const INT nb2 = nb * nb;
84 INT myid, mybegin, myend;
85 INT nthreads = fasp_get_num_threads();
86#pragma omp parallel for private(myid, mybegin, myend, i)
87 for (myid = 0; myid < nthreads; myid++) {
89 for (i = mybegin; i < myend; ++i) {
96 for (i = 0; i < m; ++i) {
134 INT myid, mybegin, myend;
135 INT nthreads = fasp_get_num_threads();
136#pragma omp parallel for private(myid, mybegin, myend, i)
137 for (myid = 0; myid < nthreads; myid++) {
139 for (i = mybegin; i < myend; ++i) {
145 for (i = 0; i < m; ++i) {
179 INT myid, mybegin, myend;
180 INT nthreads = fasp_get_num_threads();
181#pragma omp parallel for private(myid, mybegin, myend, i)
182 for (myid = 0; myid < nthreads; myid++) {
184 for (i = mybegin; i < myend; ++i) {
190 for (i = 0; i < m; ++i) {
224 INT myid, mybegin, myend;
225 INT nthreads = fasp_get_num_threads();
226#pragma omp parallel for private(myid, mybegin, myend, i)
227 for (myid = 0; myid < nthreads; myid++) {
229 for (i = mybegin; i < myend; ++i) {
235 for (i = 0; i < m; ++i) {
269 INT myid, mybegin, myend;
270 INT nthreads = fasp_get_num_threads();
271#pragma omp parallel for private(myid, mybegin, myend, i)
272 for (myid = 0; myid < nthreads; myid++) {
274 for (i = mybegin; i < myend; ++i) {
280 for (i = 0; i < m; ++i) {
314 INT myid, mybegin, myend;
315 INT nthreads = fasp_get_num_threads();
316#pragma omp parallel for private(myid, mybegin, myend, i)
317 for (myid = 0; myid < nthreads; myid++) {
319 for (i = mybegin; i < myend; ++i) {
325 for (i = 0; i < m; ++i) {
350 const INT m = iludata->
row, mm1 = m - 1, mm2 = m - 2, memneed = 2 * m;
351 const INT nb = iludata->
nb, nb2 = nb * nb, size = m * nb;
356 INT ib, ibstart, ibstart1;
357 INT i, j, jj, begin_row, end_row;
358 REAL *zz, *zr, *mult;
360 if (iludata->
nwork < memneed) {
361 printf(
"### ERROR: Need %d memory, only %d available!\n", memneed,
370 memcpy(zr, r, size *
sizeof(
REAL));
378 for (i = 1; i <= mm1; ++i) {
380 end_row = ijlu[i + 1] - 1;
381 for (j = begin_row; j <= end_row; ++j) {
384 zr[i] -= lu[j] * zz[jj];
392 z[mm1] = zz[mm1] * lu[mm1];
393 for (i = mm2; i >= 0; i--) {
395 end_row = ijlu[i + 1] - 1;
396 for (j = end_row; j >= begin_row; j--) {
399 zz[i] -= lu[j] * z[jj];
403 z[i] = zz[i] * lu[i];
415 for (i = 1; i <= mm1; ++i) {
417 end_row = ijlu[i + 1] - 1;
419 for (j = begin_row; j <= end_row; ++j) {
423 for (ib = 0; ib < nb; ++ib) zr[ibstart + ib] -= mult[ib];
428 zz[ibstart] = zr[ibstart];
429 zz[ibstart + 1] = zr[ibstart + 1];
430 zz[ibstart + 2] = zr[ibstart + 2];
438 for (i = mm2; i >= 0; i--) {
440 end_row = ijlu[i + 1] - 1;
443 for (j = end_row; j >= begin_row; j--) {
447 for (ib = 0; ib < nb; ++ib) zz[ibstart1 + ib] -= mult[ib];
464 for (i = 1; i <= mm1; ++i) {
466 end_row = ijlu[i + 1] - 1;
468 for (j = begin_row; j <= end_row; ++j) {
472 for (ib = 0; ib < nb; ++ib) zr[ibstart + ib] -= mult[ib];
485 for (i = mm2; i >= 0; i--) {
487 end_row = ijlu[i + 1] - 1;
490 for (j = end_row; j >= begin_row; j--) {
494 for (ib = 0; ib < nb; ++ib) zz[ibstart1 + ib] -= mult[ib];
511 for (i = 1; i <= mm1; ++i) {
513 end_row = ijlu[i + 1] - 1;
515 for (j = begin_row; j <= end_row; ++j) {
519 for (ib = 0; ib < nb; ++ib) zr[ibstart + ib] -= mult[ib];
532 for (i = mm2; i >= 0; i--) {
534 end_row = ijlu[i + 1] - 1;
537 for (j = end_row; j >= begin_row; j--) {
541 for (ib = 0; ib < nb; ++ib) zz[ibstart1 + ib] -= mult[ib];
558 for (i = 1; i <= mm1; ++i) {
560 end_row = ijlu[i + 1] - 1;
562 for (j = begin_row; j <= end_row; ++j) {
566 for (ib = 0; ib < nb; ++ib) zr[ibstart + ib] -= mult[ib];
579 for (i = mm2; i >= 0; i--) {
581 end_row = ijlu[i + 1] - 1;
584 for (j = end_row; j >= begin_row; j--) {
588 for (ib = 0; ib < nb; ++ib) zz[ibstart1 + ib] -= mult[ib];
622 const INT m = iludata->
row, memneed = 2 * m;
623 const INT nb = iludata->
nb, nb2 = nb * nb, size = m * nb;
630 INT ib, ibstart, ibstart1;
631 INT i, j, jj, k, begin_row, end_row;
632 REAL *zz, *zr, *mult;
634 if (iludata->
nwork < memneed) {
635 printf(
"### ERROR: Need %d memory, only %d available!\n", memneed,
643 memcpy(zr, r, size *
sizeof(
REAL));
649 for (k = 0; k < ncolors; ++k) {
650#pragma omp parallel for private(i, begin_row, end_row, j, jj)
651 for (i = ic[k]; i < ic[k + 1]; ++i) {
653 end_row = ijlu[i + 1] - 1;
654 for (j = begin_row; j <= end_row; ++j) {
657 zr[i] -= lu[j] * zz[jj];
665 for (k = ncolors - 1; k >= 0; k--) {
666#pragma omp parallel for private(i, begin_row, end_row, j, jj)
667 for (i = ic[k + 1] - 1; i >= ic[k]; i--) {
669 end_row = ijlu[i + 1] - 1;
670 for (j = end_row; j >= begin_row; j--) {
673 zz[i] -= lu[j] * z[jj];
677 z[i] = zz[i] * lu[i];
685 for (k = 0; k < ncolors; ++k) {
686#pragma omp parallel private(i, begin_row, end_row, ibstart, j, jj, ib, mult)
690 for (i = ic[k]; i < ic[k + 1]; ++i) {
692 end_row = ijlu[i + 1] - 1;
694 for (j = begin_row; j <= end_row; ++j) {
699 for (ib = 0; ib < nb; ++ib)
700 zr[ibstart + ib] -= mult[ib];
705 zz[ibstart] = zr[ibstart];
706 zz[ibstart + 1] = zr[ibstart + 1];
714 for (k = ncolors - 1; k >= 0; k--) {
715#pragma omp parallel private(i, begin_row, end_row, ibstart, ibstart1, j, jj, ib, mult)
719 for (i = ic[k + 1] - 1; i >= ic[k]; i--) {
721 end_row = ijlu[i + 1] - 1;
724 for (j = end_row; j >= begin_row; j--) {
729 for (ib = 0; ib < nb; ++ib)
730 zz[ibstart1 + ib] -= mult[ib];
749 for (k = 0; k < ncolors; ++k) {
750#pragma omp parallel private(i, begin_row, end_row, ibstart, j, jj, ib, mult)
754 for (i = ic[k]; i < ic[k + 1]; ++i) {
756 end_row = ijlu[i + 1] - 1;
758 for (j = begin_row; j <= end_row; ++j) {
763 for (ib = 0; ib < nb; ++ib)
764 zr[ibstart + ib] -= mult[ib];
769 zz[ibstart] = zr[ibstart];
770 zz[ibstart + 1] = zr[ibstart + 1];
771 zz[ibstart + 2] = zr[ibstart + 2];
779 for (k = ncolors - 1; k >= 0; k--) {
780#pragma omp parallel private(i, begin_row, end_row, ibstart, ibstart1, j, jj, ib, mult)
784 for (i = ic[k + 1] - 1; i >= ic[k]; i--) {
786 end_row = ijlu[i + 1] - 1;
789 for (j = end_row; j >= begin_row; j--) {
794 for (ib = 0; ib < nb; ++ib)
795 zz[ibstart1 + ib] -= mult[ib];
816 printf(
"### ERROR: Multi-thread Parallel ILU for %d components \
817 has not yet been implemented!!!",
848 const INT m = iludata->
row, memneed = 2 * m;
849 const INT nb = iludata->
nb, nb2 = nb * nb, size = m * nb;
860 INT ib, ibstart, ibstart1;
861 INT i, ii, j, jj, k, begin_row, end_row;
862 REAL *zz, *zr, *mult;
864 if (iludata->
nwork < memneed) {
865 printf(
"### ERROR: Need %d memory, only %d available!\n", memneed,
874 memcpy(zr, r, size *
sizeof(
REAL));
880 for (k = 0; k < nlevL; ++k) {
881#pragma omp parallel for private(i, ii, begin_row, end_row, j, jj)
882 for (ii = ilevL[k]; ii < ilevL[k + 1]; ++ii) {
885 end_row = ijlu[i + 1] - 1;
886 for (j = begin_row; j <= end_row; ++j) {
889 zr[i] -= lu[j] * zz[jj];
897 for (k = 0; k < nlevU; k++) {
898#pragma omp parallel for private(i, ii, begin_row, end_row, j, jj)
899 for (ii = ilevU[k + 1] - 1; ii >= ilevU[k]; ii--) {
902 end_row = ijlu[i + 1] - 1;
903 for (j = end_row; j >= begin_row; j--) {
906 zz[i] -= lu[j] * z[jj];
910 z[i] = zz[i] * lu[i];
918 for (k = 0; k < nlevL; ++k) {
919#pragma omp parallel private(i, ii, begin_row, end_row, ibstart, j, jj, ib, mult)
923 for (ii = ilevL[k]; ii < ilevL[k + 1]; ++ii) {
926 end_row = ijlu[i + 1] - 1;
928 for (j = begin_row; j <= end_row; ++j) {
933 for (ib = 0; ib < nb; ++ib)
934 zr[ibstart + ib] -= mult[ib];
939 zz[ibstart] = zr[ibstart];
940 zz[ibstart + 1] = zr[ibstart + 1];
948 for (k = 0; k < nlevU; k++) {
949#pragma omp parallel private(i, ii, begin_row, end_row, ibstart, ibstart1, j, jj, ib, \
954 for (ii = ilevU[k + 1] - 1; ii >= ilevU[k]; ii--) {
957 end_row = ijlu[i + 1] - 1;
960 for (j = end_row; j >= begin_row; j--) {
965 for (ib = 0; ib < nb; ++ib)
966 zz[ibstart1 + ib] -= mult[ib];
985 for (k = 0; k < nlevL; ++k) {
986#pragma omp parallel private(i, ii, begin_row, end_row, ibstart, j, jj, ib, mult)
990 for (ii = ilevL[k]; ii < ilevL[k + 1]; ++ii) {
993 end_row = ijlu[i + 1] - 1;
995 for (j = begin_row; j <= end_row; ++j) {
1000 for (ib = 0; ib < nb; ++ib)
1001 zr[ibstart + ib] -= mult[ib];
1006 zz[ibstart] = zr[ibstart];
1007 zz[ibstart + 1] = zr[ibstart + 1];
1008 zz[ibstart + 2] = zr[ibstart + 2];
1016 for (k = 0; k < nlevU; k++) {
1017#pragma omp parallel private(i, ii, begin_row, end_row, ibstart, ibstart1, j, jj, ib, \
1022 for (ii = ilevU[k + 1] - 1; ii >= ilevU[k]; ii--) {
1024 begin_row = ijlu[i];
1025 end_row = ijlu[i + 1] - 1;
1028 for (j = end_row; j >= begin_row; j--) {
1033 for (ib = 0; ib < nb; ++ib)
1034 zz[ibstart1 + ib] -= mult[ib];
1055 for (k = 0; k < nlevL; ++k) {
1056#pragma omp parallel private(i, ii, begin_row, end_row, ibstart, j, jj, ib, mult)
1060 for (ii = ilevL[k]; ii < ilevL[k + 1]; ++ii) {
1062 begin_row = ijlu[i];
1063 end_row = ijlu[i + 1] - 1;
1065 for (j = begin_row; j <= end_row; ++j) {
1070 for (ib = 0; ib < nb; ++ib)
1071 zr[ibstart + ib] -= mult[ib];
1076 for (j = 0; j < nb; j++)
1086 for (k = 0; k < nlevU; k++) {
1087#pragma omp parallel private(i, ii, begin_row, end_row, ibstart, ibstart1, j, jj, ib, \
1092 for (ii = ilevU[k + 1] - 1; ii >= ilevU[k]; ii--) {
1094 begin_row = ijlu[i];
1095 end_row = ijlu[i + 1] - 1;
1098 for (j = end_row; j >= begin_row; j--) {
1103 for (ib = 0; ib < nb; ++ib)
1104 zz[ibstart1 + ib] -= mult[ib];
1112 &(z[ibstart1]), nb);
1155 const INT m = row * nb;
1200 const INT m = row * nb;
1225 fasp_coarse_itsolver(A_nk, &r_nk, &z_nk, 1e-12, 0);
1269 fasp_coarse_itsolver(A_nk, &r_nk, &z_nk, 1e-12, 0);
1276double PreSmoother_time_zl = 0.0;
1277double PostSmoother_time_zl = 0.0;
1278double Krylov_time_zl = 0.0;
1279double Coarsen_time_zl = 0.0;
1280double AMLI_cycle_time_zl = 0.0;
1301 const INT m = row * nb;
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_param_precbsr_to_amg(AMG_param *amgparam, const precond_data_bsr *pcdata)
Set AMG_param with precond_data.
void fasp_param_amg_init(AMG_param *amgparam)
Initialize AMG parameters.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_dvec_set(INT n, dvector *x, const REAL val)
Initialize dvector x[i]=val for i=0:n-1.
void fasp_dvec_alloc(const INT m, dvector *u)
Create dvector data space of REAL type.
void fasp_blas_smat_mxv_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 7*7 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 5*5 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc4(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 4*4 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 3*3 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 2*2 matrix a and a array b, stored in c.
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
void fasp_blas_dcsr_mxv(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
void fasp_precond_dbsr_diag_nc7(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_amg(REAL *r, REAL *z, void *data)
AMG preconditioner.
void fasp_precond_dbsr_ilu_mc_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on graph coloring.
void fasp_precond_dbsr_diag_nc5(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_diag_nc4(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_ilu_ls_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on level schedule strategy.
void fasp_precond_dbsr_amg_nk(REAL *r, REAL *z, void *data)
AMG with extra near kernel solve preconditioner.
void fasp_precond_dbsr_diag(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_ilu(REAL *r, REAL *z, void *data)
ILU preconditioner.
void fasp_precond_dbsr_diag_nc2(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_diag_nc3(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_namli(REAL *r, REAL *z, void *data)
Nonlinear AMLI-cycle AMG preconditioner.
void fasp_solver_mgcycle_bsr(AMG_data_bsr *mgl, AMG_param *param)
Solve Ax=b with non-recursive multigrid cycle.
void fasp_solver_namli_bsr(AMG_data_bsr *mgl, AMG_param *param, INT l, INT num_levels)
Solve Ax=b with recursive nonlinear AMLI-cycle.
INT fasp_solver_umfpack(dCSRmat *ptrA, dvector *b, dvector *u, const SHORT prtlvl)
Solve Au=b by UMFpack.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
Data for multigrid levels in dBSRmat format.
dBSRmat A
pointer to the matrix at level level_num
dvector b
pointer to the right-hand side at level level_num
INT ILU_levels
number of levels use ILU smoother
dvector x
pointer to the iterative solution at level level_num
Parameters for AMG methods.
SHORT coarse_scaling
switch of scaling of the coarse grid correction
SHORT ILU_levels
number of levels use ILU smoother
REAL relaxation
relaxation parameter for Jacobi and SOR smoother
SHORT smoother
smoother type
SHORT cycle_type
type of AMG cycle
REAL tentative_smooth
relaxation parameter for smoothing the tentative prolongation
SHORT postsmooth_iter
number of postsmoothers
SHORT presmooth_iter
number of presmoothers
SHORT smooth_order
smoother order
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
REAL * luval
nonzero entries of LU
INT * jlevU
mapping from row to color for upper triangle
INT nlevU
number of colors for upper triangle
INT nb
block size for BSR type only
INT row
row number of matrix LU, m
INT nlevL
number of colors for lower triangle
INT * jlevL
mapping from row to color for lower triangle
INT * ilevL
number of vertices in each color for lower triangle
INT * ilevU
number of vertices in each color for upper triangle
INT nb
dimension of each sub-block
INT ROW
number of rows of sub-blocks in matrix A, M
Sparse matrix of REAL type in CSR format.
INT row
row number of matrix A, m
Vector with n entries of REAL type.
REAL * val
actual vector entries
Data for preconditioners in dBSRmat format.
SHORT coarse_scaling
switch of scaling of the coarse grid correction
dCSRmat * A_nk
Matrix data for near kernal.
REAL relaxation
relaxation parameter for SOR smoother
SHORT smoother
AMG smoother type.
SHORT cycle_type
AMG cycle type.
REAL tentative_smooth
smooth factor for smoothing the tentative prolongation
SHORT postsmooth_iter
number of postsmoothing
dCSRmat * R_nk
Resriction for near kernal.
AMG_data_bsr * mgl_data
AMG preconditioner data.
INT max_levels
max number of AMG levels
dCSRmat * P_nk
Prolongation for near kernal.
SHORT presmooth_iter
number of presmoothing
INT maxit
max number of iterations of AMG preconditioner
SHORT smooth_order
AMG smoother ordering.
Data for diagnal preconditioners in dBSRmat format.
INT nb
dimension of each sub-block
dvector diag
diagnal elements