24#include "fasp_functs.h"
64 const INT nb2 = nb * nb;
65 const INT size = ROW * nb2;
66 const INT* IA = A->
IA;
67 const INT* JA = A->
JA;
78 nthreads = fasp_get_num_threads();
84 INT mybegin, myend, myid;
86#pragma omp parallel for private(myid, mybegin, myend, i, k)
88 for (myid = 0; myid < nthreads; myid++) {
90 for (i = mybegin; i < myend; i++) {
91 for (k = IA[i]; k < IA[i + 1]; ++k)
93 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
97 for (i = 0; i < ROW; ++i) {
98 for (k = IA[i]; k < IA[i + 1]; ++k) {
100 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
108 INT mybegin, myend, myid;
110#pragma omp parallel for private(myid, mybegin, myend, i)
112 for (myid = 0; myid < nthreads; myid++) {
114 for (i = mybegin; i < myend; i++) {
119 for (i = 0; i < ROW; ++i) {
125 INT mybegin, myend, myid;
127#pragma omp parallel for private(myid, mybegin, myend, i)
129 for (myid = 0; myid < nthreads; myid++) {
131 for (i = mybegin; i < myend; i++) {
132 diaginv[i] = 1.0 / diaginv[i];
136 for (i = 0; i < ROW; ++i) {
138 diaginv[i] = 1.0 / diaginv[i];
167 const INT nb = A->
nb;
168 const INT nb2 = nb * nb;
169 const INT* IA = A->
IA;
170 const INT* JA = A->
JA;
181 nthreads = fasp_get_num_threads();
187 INT mybegin, myend, myid;
189#pragma omp parallel for private(myid, mybegin, myend, i, k)
191 for (myid = 0; myid < nthreads; myid++) {
193 for (i = mybegin; i < myend; i++) {
194 for (k = IA[i]; k < IA[i + 1]; ++k)
196 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
200 for (i = 0; i < ROW; ++i) {
201 for (k = IA[i]; k < IA[i + 1]; ++k) {
203 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
211 INT mybegin, myend, myid;
213#pragma omp parallel for private(myid, mybegin, myend, i)
215 for (myid = 0; myid < nthreads; myid++) {
217 for (i = mybegin; i < myend; i++) {
222 for (i = 0; i < ROW; ++i) {
228 INT mybegin, myend, myid;
230#pragma omp parallel for private(myid, mybegin, myend, i)
232 for (myid = 0; myid < nthreads; myid++) {
234 for (i = mybegin; i < myend; i++) {
235 diaginv[i] = 1.0 / diaginv[i];
239 for (i = 0; i < ROW; ++i) {
241 diaginv[i] = 1.0 / diaginv[i];
267 const INT nb = A->
nb;
268 const INT nb2 = nb * nb;
269 const INT size = ROW * nb;
270 const INT* IA = A->
IA;
271 const INT* JA = A->
JA;
279 nthreads = fasp_get_num_threads();
296 memcpy(b_tmp, b_val, size *
sizeof(
REAL));
301 INT mybegin, myend, myid;
303#pragma omp parallel for private(myid, mybegin, myend, i, j, k)
305 for (myid = 0; myid < nthreads; myid++) {
307 for (i = mybegin; i < myend; i++) {
308 for (k = IA[i]; k < IA[i + 1]; ++k) {
310 if (j != i) b_tmp[i] -= val[k] * u_val[j];
315#pragma omp parallel for private(myid, mybegin, myend, i)
317 for (myid = 0; myid < nthreads; myid++) {
319 for (i = mybegin; i < myend; i++) {
320 u_val[i] = b_tmp[i] * diaginv[i];
324 for (i = 0; i < ROW; ++i) {
325 for (k = IA[i]; k < IA[i + 1]; ++k) {
327 if (j != i) b_tmp[i] -= val[k] * u_val[j];
330 for (i = 0; i < ROW; ++i) {
331 u_val[i] = b_tmp[i] * diaginv[i];
339 INT mybegin, myend, myid;
341#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
343 for (myid = 0; myid < nthreads; myid++) {
345 for (i = mybegin; i < myend; i++) {
347 for (k = IA[i]; k < IA[i + 1]; ++k) {
356#pragma omp parallel for private(myid, mybegin, myend, i, pb)
358 for (myid = 0; myid < nthreads; myid++) {
360 for (i = mybegin; i < myend; i++) {
366 for (i = 0; i < ROW; ++i) {
368 for (k = IA[i]; k < IA[i + 1]; ++k) {
376 for (i = 0; i < ROW; ++i) {
384 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
414 const INT nb = A->
nb;
415 const INT nb2 = nb * nb;
416 const INT size = ROW * nb2;
417 const INT* IA = A->
IA;
418 const INT* JA = A->
JA;
429 nthreads = fasp_get_num_threads();
435 INT mybegin, myend, myid;
437#pragma omp parallel for private(myid, mybegin, myend, i, k)
439 for (myid = 0; myid < nthreads; myid++) {
441 for (i = mybegin; i < myend; i++) {
442 for (k = IA[i]; k < IA[i + 1]; ++k)
444 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
448 for (i = 0; i < ROW; ++i) {
449 for (k = IA[i]; k < IA[i + 1]; ++k) {
451 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
459 INT mybegin, myend, myid;
461#pragma omp parallel for private(myid, mybegin, myend, i)
463 for (myid = 0; myid < nthreads; myid++) {
465 for (i = mybegin; i < myend; i++) {
470 for (i = 0; i < ROW; ++i) {
476 INT mybegin, myend, myid;
478#pragma omp parallel for private(myid, mybegin, myend, i)
480 for (myid = 0; myid < nthreads; myid++) {
482 for (i = mybegin; i < myend; i++) {
483 diaginv[i] = 1.0 / diaginv[i];
487 for (i = 0; i < ROW; ++i) {
489 diaginv[i] = 1.0 / diaginv[i];
556 const INT nb = A->
nb;
557 const INT nb2 = nb * nb;
558 const INT* IA = A->
IA;
559 const INT* JA = A->
JA;
572 for (i = 0; i < ROW; ++i) {
574 for (k = IA[i]; k < IA[i + 1]; ++k) {
576 if (j != i) rhs -= val[k] * u_val[j];
578 u_val[i] = rhs * diaginv[i];
583 for (i = 0; i < ROW; ++i) {
585 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
586 for (k = IA[i]; k < IA[i + 1]; ++k) {
597 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
623 const INT nb = A->
nb;
624 const INT nb2 = nb * nb;
625 const INT* IA = A->
IA;
626 const INT* JA = A->
JA;
639 for (i = 0; i < ROW; ++i) {
641 for (k = IA[i]; k < IA[i + 1]; ++k) {
643 if (j != i) rhs -= val[k] * u_val[j];
650 for (i = 0; i < ROW; ++i) {
652 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
653 for (k = IA[i]; k < IA[i + 1]; ++k) {
658 memcpy(u_val + pb, b_tmp, nb *
sizeof(
REAL));
664 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
687 const INT nb = A->
nb;
688 const INT nb2 = nb * nb;
689 const INT* IA = A->
IA;
690 const INT* JA = A->
JA;
703 for (i = ROW - 1; i >= 0; i--) {
705 for (k = IA[i]; k < IA[i + 1]; ++k) {
707 if (j != i) rhs -= val[k] * u_val[j];
709 u_val[i] = rhs * diaginv[i];
714 for (i = ROW - 1; i >= 0; i--) {
716 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
717 for (k = IA[i]; k < IA[i + 1]; ++k) {
728 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
755 const INT nb = A->
nb;
756 const INT nb2 = nb * nb;
757 const INT* IA = A->
IA;
758 const INT* JA = A->
JA;
771 for (i = ROW - 1; i >= 0; i--) {
773 for (k = IA[i]; k < IA[i + 1]; ++k) {
775 if (j != i) rhs -= val[k] * u_val[j];
782 for (i = ROW - 1; i >= 0; i--) {
784 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
785 for (k = IA[i]; k < IA[i + 1]; ++k) {
790 memcpy(u_val + pb, b_tmp, nb *
sizeof(
REAL));
796 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
821 const INT nb = A->
nb;
822 const INT nb2 = nb * nb;
823 const INT* IA = A->
IA;
824 const INT* JA = A->
JA;
837 for (I = 0; I < ROW; ++I) {
840 for (k = IA[i]; k < IA[i + 1]; ++k) {
842 if (j != i) rhs -= val[k] * u_val[j];
844 u_val[i] = rhs * diaginv[i];
849 for (I = 0; I < ROW; ++I) {
852 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
853 for (k = IA[i]; k < IA[i + 1]; ++k) {
893 const INT nb = A->
nb;
894 const INT nb2 = nb * nb;
895 const INT* IA = A->
IA;
896 const INT* JA = A->
JA;
911 for (I = 0; I < ROW; ++I) {
914 for (k = IA[i]; k < IA[i + 1]; ++k) {
916 if (j != i) rhs -= val[k] * u_val[j];
921 for (I = 0; I < ROW; ++I) {
924 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
925 for (k = IA[i]; k < IA[i + 1]; ++k) {
930 memcpy(u_val + pb, b_tmp, nb *
sizeof(
REAL));
964 const INT nb = A->
nb;
965 const INT nb2 = nb * nb;
966 const INT size = ROW * nb2;
967 const INT* IA = A->
IA;
968 const INT* JA = A->
JA;
973 REAL* diaginv = NULL;
980 nthreads = fasp_get_num_threads();
989 INT mybegin, myend, myid;
991#pragma omp parallel for private(myid, mybegin, myend, i, k)
993 for (myid = 0; myid < nthreads; myid++) {
995 for (i = mybegin; i < myend; i++) {
996 for (k = IA[i]; k < IA[i + 1]; ++k)
998 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
1002 for (i = 0; i < ROW; ++i) {
1003 for (k = IA[i]; k < IA[i + 1]; ++k) {
1005 memcpy(diaginv + i * nb2, val + k * nb2, nb2 *
sizeof(
REAL));
1013 INT mybegin, myend, myid;
1015#pragma omp parallel for private(myid, mybegin, myend, i)
1017 for (myid = 0; myid < nthreads; myid++) {
1019 for (i = mybegin; i < myend; i++) {
1024 for (i = 0; i < ROW; ++i) {
1030 INT mybegin, myend, myid;
1032#pragma omp parallel for private(myid, mybegin, myend, i)
1034 for (myid = 0; myid < nthreads; myid++) {
1036 for (i = mybegin; i < myend; i++) {
1037 diaginv[i] = 1.0 / diaginv[i];
1041 for (i = 0; i < ROW; ++i) {
1043 diaginv[i] = 1.0 / diaginv[i];
1120 const INT nb = A->
nb;
1121 const INT* IA = A->
IA;
1122 const INT* JA = A->
JA;
1130 const INT nb2 = nb * nb;
1134 REAL one_minus_weight = 1.0 - weight;
1138 INT myid, mybegin, myend;
1139 INT nthreads = fasp_get_num_threads();
1145#pragma omp parallel for private(myid, mybegin, myend, i, rhs, k, j)
1146 for (myid = 0; myid < nthreads; myid++) {
1148 for (i = mybegin; i < myend; i++) {
1150 for (k = IA[i]; k < IA[i + 1]; ++k) {
1152 if (j != i) rhs -= val[k] * u_val[j];
1155 one_minus_weight * u_val[i] + weight * (rhs * diaginv[i]);
1160 for (i = 0; i < ROW; ++i) {
1162 for (k = IA[i]; k < IA[i + 1]; ++k) {
1164 if (j != i) rhs -= val[k] * u_val[j];
1166 u_val[i] = one_minus_weight * u_val[i] + weight * (rhs * diaginv[i]);
1171 }
else if (nb > 1) {
1175#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
1176 for (myid = 0; myid < nthreads; myid++) {
1178 for (i = mybegin; i < myend; i++) {
1180 memcpy(b_tmp + myid * nb, b_val + pb, nb *
sizeof(
REAL));
1181 for (k = IA[i]; k < IA[i + 1]; ++k) {
1188 one_minus_weight, u_val + pb, nb);
1196 for (i = 0; i < ROW; ++i) {
1198 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
1199 for (k = IA[i]; k < IA[i + 1]; ++k) {
1205 one_minus_weight, u_val + pb, nb);
1239 const INT nb = A->
nb;
1240 const INT nb2 = nb * nb;
1241 const INT* IA = A->
IA;
1242 const INT* JA = A->
JA;
1244 const REAL one_minus_weight = 1.0 - weight;
1257 INT myid, mybegin, myend;
1258 INT nthreads = fasp_get_num_threads();
1264#pragma omp parallel for private(myid, mybegin, myend, i, rhs, k, j)
1265 for (myid = 0; myid < nthreads; myid++) {
1267 mybegin = ROW - 1 - mybegin;
1268 myend = ROW - 1 - myend;
1269 for (i = mybegin; i > myend; i--) {
1271 for (k = IA[i]; k < IA[i + 1]; ++k) {
1273 if (j != i) rhs -= val[k] * u_val[j];
1276 one_minus_weight * u_val[i] + weight * (rhs * diaginv[i]);
1281 for (i = ROW - 1; i >= 0; i--) {
1283 for (k = IA[i]; k < IA[i + 1]; ++k) {
1285 if (j != i) rhs -= val[k] * u_val[j];
1287 u_val[i] = one_minus_weight * u_val[i] + weight * (rhs * diaginv[i]);
1292 }
else if (nb > 1) {
1296#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
1297 for (myid = 0; myid < nthreads; myid++) {
1299 mybegin = ROW - 1 - mybegin;
1300 myend = ROW - 1 - myend;
1301 for (i = mybegin; i > myend; i--) {
1303 memcpy(b_tmp + myid * nb, b_val + pb, nb *
sizeof(
REAL));
1304 for (k = IA[i]; k < IA[i + 1]; ++k) {
1308 b_tmp + myid * nb, nb);
1311 one_minus_weight, u_val + pb, nb);
1319 for (i = ROW - 1; i >= 0; i--) {
1321 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
1322 for (k = IA[i]; k < IA[i + 1]; ++k) {
1328 one_minus_weight, u_val + pb, nb);
1363 const INT nb = A->
nb;
1364 const INT nb2 = nb * nb;
1365 const INT* IA = A->
IA;
1366 const INT* JA = A->
JA;
1368 const REAL one_minus_weight = 1.0 - weight;
1381 INT myid, mybegin, myend;
1382 INT nthreads = fasp_get_num_threads();
1388#pragma omp parallel for private(myid, mybegin, myend, I, i, rhs, k, j)
1389 for (myid = 0; myid < nthreads; myid++) {
1391 for (I = mybegin; I < myend; ++I) {
1394 for (k = IA[i]; k < IA[i + 1]; ++k) {
1396 if (j != i) rhs -= val[k] * u_val[j];
1399 one_minus_weight * u_val[i] + weight * (rhs * diaginv[i]);
1404 for (I = 0; I < ROW; ++I) {
1407 for (k = IA[i]; k < IA[i + 1]; ++k) {
1409 if (j != i) rhs -= val[k] * u_val[j];
1411 u_val[i] = one_minus_weight * u_val[i] + weight * (rhs * diaginv[i]);
1416 }
else if (nb > 1) {
1420#pragma omp parallel for private(myid, mybegin, myend, I, i, pb, k, j)
1421 for (myid = 0; myid < nthreads; myid++) {
1423 for (I = mybegin; I < myend; ++I) {
1426 memcpy(b_tmp + myid * nb, b_val + pb, nb *
sizeof(
REAL));
1427 for (k = IA[i]; k < IA[i + 1]; ++k) {
1431 b_tmp + myid * nb, nb);
1434 one_minus_weight, u_val + pb, nb);
1442 for (I = 0; I < ROW; ++I) {
1445 memcpy(b_tmp, b_val + pb, nb *
sizeof(
REAL));
1446 for (k = IA[i]; k < IA[i + 1]; ++k) {
1452 one_minus_weight, u_val + pb, nb);
1482 const INT nb = iludata->
nb, m = A->
ROW * nb, memneed = 5 * m;
1490 if (iludata->
nwork < memneed)
goto MEMERR;
1502 perm(A->
ROW, A->
nb, zr, iludata->
jlevL, tzr);
1508 invperm(A->
ROW, A->
nb, tz, iludata->
jlevL, z);
1536 printf(
"### ERROR: ILU needs %d memory, only %d available! [%s:%d]\n", memneed,
1537 iludata->
nwork, __FILE__, __LINE__);
1564static inline void perm(
const INT n,
const INT nb,
const REAL* x,
const INT* p,
REAL* y)
1566 INT i, j, indx, indy;
1569#pragma omp parallel for private(i, j, indx, indy)
1571 for (i = 0; i < n; ++i) {
1574 for (j = 0; j < nb; ++j) {
1575 y[indy + j] = x[indx + j];
1598 INT i, j, indx, indy;
1601#pragma omp parallel for private(i, j, indx, indy)
1603 for (i = 0; i < n; ++i) {
1606 for (j = 0; j < nb; ++j) {
1607 y[indy + j] = x[indx + j];
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_gettime(REAL *time)
Get system time.
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
void fasp_blas_smat_aAxpby(const REAL alpha, const REAL *A, const REAL *x, const REAL beta, REAL *y, const INT n)
Compute y:=alpha*A*x + beta*y.
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
void fasp_blas_smat_ymAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y - Ax, where 'A' is a n*n dense matrix.
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
void fasp_smoother_dbsr_gs_descend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel relaxation in the descending order.
void fasp_smoother_dbsr_gs1(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL *diaginv)
Gauss-Seidel relaxation.
void fasp_smoother_dbsr_gs_order2(dBSRmat *A, dvector *b, dvector *u, INT *mark, REAL *work)
Gauss-Seidel relaxation in the user-defined order.
void fasp_smoother_dbsr_gs_descend1(dBSRmat *A, dvector *b, dvector *u)
Gauss-Seidel relaxation in the descending order.
void fasp_smoother_dbsr_jacobi1(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Jacobi relaxation.
void fasp_smoother_dbsr_gs_ascend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel relaxation in the ascending order.
void fasp_smoother_dbsr_gs_ascend1(dBSRmat *A, dvector *b, dvector *u)
Gauss-Seidel relaxation in the ascending order.
void fasp_smoother_dbsr_sor_ascend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR relaxation in the ascending order.
void fasp_smoother_dbsr_ilu(dBSRmat *A, dvector *b, dvector *x, void *data)
ILU method as the smoother in solving Au=b with multigrid method.
void fasp_smoother_dbsr_jacobi(dBSRmat *A, dvector *b, dvector *u)
Jacobi relaxation.
void fasp_smoother_dbsr_gs(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark)
Gauss-Seidel relaxation.
void fasp_smoother_dbsr_sor1(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL *diaginv, REAL weight)
SOR relaxation.
void fasp_smoother_dbsr_sor_order(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark, REAL weight)
SOR relaxation in the user-defined order.
void fasp_smoother_dbsr_sor_descend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR relaxation in the descending order.
void fasp_smoother_dbsr_jacobi_setup(dBSRmat *A, REAL *diaginv)
Setup for jacobi relaxation, fetch the diagonal sub-block matrixes and make them inverse first.
void fasp_smoother_dbsr_gs_order1(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark)
Gauss-Seidel relaxation in the user-defined order.
void fasp_smoother_dbsr_sor(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL weight)
SOR relaxation.
void fasp_precond_dbsr_ilu_mc_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on graph coloring.
void fasp_precond_dbsr_ilu_ls_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on level schedule strategy.
void fasp_precond_dbsr_ilu(REAL *r, REAL *z, void *data)
ILU preconditioner.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define TRUE
Definition of logic type.
INT nb
block size for BSR type only
INT * jlevL
mapping from row to color for lower triangle
Block sparse row storage matrix of REAL type.
INT nb
dimension of each sub-block
INT * IA
integer array of row pointers, the size is ROW+1
INT ROW
number of rows of sub-blocks in matrix A, M
Vector with n entries of REAL type.
REAL * val
actual vector entries