20#include "fasp_functs.h"
61 const INT n = A->
COL, nnz = A->
NNZ, nb = A->
nb, nb2 = nb*nb;
65 INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
67 REAL setup_start, setup_end, setup_duration;
70 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
71 printf(
"### DEBUG: m = %d, n = %d, nnz = %d\n", A->
ROW, n, nnz);
81 printf(
"### WARNING: iludata->type = %d not supported!\n",
88 iludata->
iperm = NULL;
90 iludata->
row = iludata->
col = n;
99 printf(
"### DEBUG: symbolic factorization ... \n");
107 printf(
"### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
115 printf(
"### DEBUG: numerical factorization ... \n");
119 status = numfactor(A, iludata->
luval, ijlu, uptr);
122 printf(
"### ERROR: ILU factorization failed! [%s]\n", __FUNCTION__);
128 nwork = 20*A->
ROW*A->
nb;
129 iludata->
nzlu = nzlu;
130 iludata->
nwork = nwork;
133 memcpy(iludata->
ijlu,ijlu,nzlu*
sizeof(
INT));
138 printf(
"### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
139 printf(
"### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
143 printf(
"### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
150 setup_duration = setup_end - setup_start;
151 printf(
"BSR ILU(%d)-seq setup costs %f seconds.\n", lfil, setup_duration);
159 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
194 const INT n = A->
COL, nnz = A->
NNZ, nb = A->
nb, nb2 = nb*nb;
198 static INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
201 REAL setup_start, setup_end, setup_duration;
204 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
205 printf(
"### DEBUG: m = %d, n = %d, nnz = %d\n", A->
ROW, n, nnz);
216 printf(
"### WARNING: iludata->type = %d not supported!\n",
223 iludata->
iperm = NULL;
225 iludata->
row = iludata->
col = n;
236 printf(
"### DEBUG: symbolic factorization ... \n");
247 printf(
"### DEBUG: numerical factorization ... \n");
251 nwork = 5*A->
ROW*A->
nb;
252 iludata->
nwork = nwork;
253 iludata->
nzlu = nzlu;
256 memcpy(iludata->
ijlu,ijlu,nzlu*
sizeof(
INT));
263 printf(
"### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
264 printf(
"### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
268 printf(
"### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
274 printf(
"### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
281 numfactor(A, iludata->
luval, iludata->
ijlu, uptr);
291 setup_duration = setup_end - setup_start;
292 printf(
"BSR ILU(%d) setup costs %f seconds.\n", lfil, setup_duration);
296 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
326 const INT n = A->
COL, nnz = A->
NNZ, nb = A->
nb, nb2 = nb*nb;
330 INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
333 REAL setup_start, setup_end, setup_duration;
334 REAL symbolic_start, symbolic_end, numfac_start, numfac_end;
337 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
338 printf(
"### DEBUG: m = %d, n = %d, nnz = %d\n", A->
ROW, n, nnz);
348 printf(
"### WARNING: iludata->type = %d not supported any more!\n",
355 iludata->
iperm = NULL;
357 iludata->
row = iludata->
col = n;
364 printf(
"### DEBUG: symbolic factorization ... \n");
375#if prtlvl > PRINT_MIN
376 printf(
"ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);
379 nwork = 5*A->
ROW*A->
nb;
380 iludata->
nzlu = nzlu;
381 iludata->
nwork = nwork;
385 memcpy(iludata->
ijlu,ijlu,nzlu*
sizeof(
INT));
389 printf(
"### DEBUG: numerical factorization ... \n");
395 numfactor_mulcol(A, iludata->
luval, ijlu, uptr, iludata->
nlevL,
400#if prtlvl > PRINT_MIN
401 printf(
"ILU numerical factorization time = %f\n", numfac_end-numfac_start);
405 printf(
"### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
406 printf(
"### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
410 printf(
"### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
416 printf(
"### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
423 setup_duration = setup_end - setup_start;
424 printf(
"BSR ILU(%d)-mc setup costs %f seconds.\n", lfil, setup_duration);
432 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
461 const INT n = A->
COL, nnz = A->
NNZ, nb = A->
nb, nb2 = nb*nb;
465 INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
468 REAL setup_start, setup_end, setup_duration;
469 REAL symbolic_start, symbolic_end, numfac_start, numfac_end;
472 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
473 printf(
"### DEBUG: m=%d, n=%d, nnz=%d\n", A->
ROW, n, nnz);
483 printf(
"### WARNING: iludata->type = %d not supported!\n",
490 iludata->
iperm = NULL;
492 iludata->
row = iludata->
col=n;
499 printf(
"### DEBUG: symbolic factorization ... \n");
510#if prtlvl > PRINT_MIN
511 printf(
"ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);
514 nwork = 5*A->
ROW*A->
nb;
515 iludata->
nzlu = nzlu;
516 iludata->
nwork = nwork;
520 memcpy(iludata->
ijlu,ijlu,nzlu*
sizeof(
INT));
522 iludata->
uptr = NULL; iludata->
ic = NULL; iludata->
icmap = NULL;
527 printf(
"### DEBUG: numerical factorization ... \n");
533 numfactor_levsch(A, iludata->
luval, ijlu, uptr, iludata->
nlevL,
538#if prtlvl > PRINT_MIN
539 printf(
"ILU numerical factorization time = %f\n", numfac_end-numfac_start);
543 printf(
"### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
544 printf(
"### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
548 printf(
"### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
554 printf(
"### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
561 setup_duration = setup_end - setup_start;
562 printf(
"BSR ILU(%d)-ls setup costs %f seconds.\n", lfil, setup_duration);
570 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
603 const INT n = A->
COL, nnz = A->
NNZ, nb = A->
nb, nb2 = nb*nb;
607 static INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
610 REAL setup_start, setup_end, setup_duration;
611 REAL symbolic_start, symbolic_end, numfac_start, numfac_end;
614 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
615 printf(
"### DEBUG: m=%d, n=%d, nnz=%d\n", A->
ROW, n, nnz);
616 printf(
"### DEBUG: step=%d(1: symbolic factoration, 2: numerical factoration)\n", step);
626 printf(
"### WARNING: iludata->type = %d not supported!\n",
633 iludata->
iperm = NULL;
635 iludata->
row = iludata->
col=n;
645 printf(
"### DEBUG: symbolic factorization ... \n");
656 #if prtlvl > PRINT_MIN
657 printf(
"ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);
660 nwork = 5*A->
ROW*A->
nb;
661 iludata->
nzlu = nzlu;
662 iludata->
nwork = nwork;
666 memcpy(iludata->
ijlu,ijlu,nzlu*
sizeof(
INT));
670 iludata->
uptr = NULL; iludata->
ic = NULL; iludata->
icmap = NULL;
674 printf(
"### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
675 printf(
"### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
679 printf(
"### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
685 printf(
"### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
689 }
else if (step==2) {
692 printf(
"### DEBUG: numerical factorization ... \n");
698 numfactor_levsch(A, iludata->
luval, iludata->
ijlu, uptr, iludata->
nlevL,
702#if prtlvl > PRINT_MIN
703 printf(
"ILU numerical factorization time = %f\n", numfac_end-numfac_start);
714 setup_duration = setup_end - setup_start;
715 printf(
"BSR ILU(%d)-ls setup costs %f seconds.\n", lfil, setup_duration);
719 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
778 iludata->
ilevU = NULL;
779 iludata->
jlevU = NULL;
784 printf(
"### WARNING: iludata->type = %d not supported!\n",
791 iludata->
iperm = NULL;
824 INT n=A->
ROW,nb=A->
nb, nb2=nb*nb, ib, ibstart,ibstart1;
825 INT k, indj, inds, indja,jluj, jlus, ijaj;
849 memset(colptrs, 0,
sizeof(
INT)*n);
855 for (k = 0; k < n; ++k) {
857 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
858 colptrs[jlu[indj]] = indj;
860 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
865 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
867 ibstart=colptrs[ijaj]*nb2;
869 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
872 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
876 luval[indj] = luval[indj]*luval[jluj];
877 mult[0] = luval[indj];
879 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
881 if (colptrs[jlus] != 0)
882 luval[colptrs[jlus]] = luval[colptrs[jlus]] - mult[0]*luval[inds];
887 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
890 luval[k] = 1.0/luval[k];
897 for (k = 0; k < n; ++k) {
899 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
900 colptrs[jlu[indj]] = indj;
902 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
907 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
909 ibstart=colptrs[ijaj]*nb2;
911 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
914 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
919 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
921 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
923 if (colptrs[jlus] != 0) {
925 ibstart=colptrs[jlus]*nb2;
926 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
932 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
943 for (k = 0; k < n; ++k) {
945 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
946 colptrs[jlu[indj]] = indj;
948 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
953 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
955 ibstart=colptrs[ijaj]*nb2;
957 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
960 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
965 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
967 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
969 if (colptrs[jlus] != 0) {
971 ibstart=colptrs[jlus]*nb2;
972 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
978 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
990 for (k = 0; k < n; ++k) {
992 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
993 colptrs[jlu[indj]] = indj;
995 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1000 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1001 ijaj = A->
JA[indja];
1002 ibstart=colptrs[ijaj]*nb2;
1004 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1007 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1012 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1014 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1016 if (colptrs[jlus] != 0) {
1018 ibstart=colptrs[jlus]*nb2;
1019 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1025 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1039 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1040 colptrs[jlu[indj]] = indj;
1042 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1047 for (indja = A->
IA[k]; indja < A->IA[k+1]; indja++) {
1048 ijaj = A->
JA[indja];
1049 ibstart=colptrs[ijaj]*nb2;
1051 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1054 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1059 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1061 for (inds = uptr[jluj]; inds < jlu[jluj+1]; inds++) {
1063 if (colptrs[jlus] != 0) {
1065 ibstart=colptrs[jlus]*nb2;
1066 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1072 for (indj = jlu[k]; indj < jlu[k+1]; ++indj)
1073 colptrs[jlu[indj]] = 0;
1118 INT n = A->
ROW, nb = A->
nb, nb2 = nb*nb;
1119 INT ib, ibstart,ibstart1;
1120 INT k, i, indj, inds, indja,jluj, jlus, ijaj, tmp;
1141 for (i = 0; i < ncolors; ++i) {
1142#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,colptrs,tmp)
1145 memset(colptrs, 0,
sizeof(
INT)*n);
1147 for (k = ic[i]; k < ic[i+1]; ++k) {
1148 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1149 colptrs[jlu[indj]] = indj;
1151 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1154 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1155 ijaj = A->
JA[indja];
1156 ibstart=colptrs[ijaj]*nb2;
1158 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1160 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1162 luval[indj] = luval[indj]*luval[jluj];
1164 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1166 if (colptrs[jlus] != 0)
1167 luval[colptrs[jlus]] = luval[colptrs[jlus]] - tmp*luval[inds];
1171 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1173 luval[k] = 1.0/luval[k];
1183 for (i = 0; i < ncolors; ++i) {
1184#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs)
1187 memset(colptrs, 0,
sizeof(
INT)*n);
1191 for (k = ic[i]; k < ic[i+1]; ++k) {
1192 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1193 colptrs[jlu[indj]] = indj;
1195 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1198 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1199 ijaj = A->
JA[indja];
1200 ibstart=colptrs[ijaj]*nb2;
1202 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1204 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1208 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1209 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1211 if (colptrs[jlus] != 0) {
1213 ibstart=colptrs[jlus]*nb2;
1214 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1218 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1231 for (i = 0; i < ncolors; ++i) {
1232#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs)
1235 memset(colptrs, 0,
sizeof(
INT)*n);
1239 for (k = ic[i]; k < ic[i+1]; ++k) {
1240 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1241 colptrs[jlu[indj]] = indj;
1243 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1246 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1247 ijaj = A->
JA[indja];
1248 ibstart=colptrs[ijaj]*nb2;
1250 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1252 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1256 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1257 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1259 if (colptrs[jlus] != 0) {
1261 ibstart=colptrs[jlus]*nb2;
1262 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1266 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1279 if (nb > 3) printf(
"Multi-thread ILU numerical decomposition for %d\
1280 components has not been implemented!!!", nb);
1320 INT n = A->
ROW, nb = A->
nb, nb2 = nb*nb;
1321 INT ib, ibstart,ibstart1;
1322 INT k, i, indj, inds, indja, jluj, jlus, ijaj, tmp, ii;
1343 for (i = 0; i < ncolors; ++i) {
1344#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,colptrs,tmp)
1347 memset(colptrs, 0,
sizeof(
INT)*n);
1349 for (k = ic[i]; k < ic[i+1]; ++k) {
1350 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1351 colptrs[jlu[indj]] = indj;
1353 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1356 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1357 ijaj = A->
JA[indja];
1358 ibstart=colptrs[ijaj]*nb2;
1360 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1362 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1364 luval[indj] = luval[indj]*luval[jluj];
1366 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1368 if (colptrs[jlus] != 0)
1369 luval[colptrs[jlus]] = luval[colptrs[jlus]] - tmp*luval[inds];
1373 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1375 luval[k] = 1.0/luval[k];
1384 for (i = 0; i < ncolors; ++i) {
1385#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1388 memset(colptrs, 0,
sizeof(
INT)*n);
1392 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1394 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1395 colptrs[jlu[indj]] = indj;
1397 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1400 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1401 ijaj = A->
JA[indja];
1402 ibstart=colptrs[ijaj]*nb2;
1404 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1406 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1410 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1411 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1413 if (colptrs[jlus] != 0) {
1415 ibstart=colptrs[jlus]*nb2;
1416 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1420 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1433 for (i = 0; i < ncolors; ++i) {
1434#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1437 memset(colptrs, 0,
sizeof(
INT)*n);
1441 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1443 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1444 colptrs[jlu[indj]] = indj;
1446 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1449 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1450 ijaj = A->
JA[indja];
1451 ibstart=colptrs[ijaj]*nb2;
1453 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1455 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1459 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1460 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1462 if (colptrs[jlus] != 0) {
1464 ibstart=colptrs[jlus]*nb2;
1465 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1469 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1482 for (i = 0; i < ncolors; ++i) {
1483#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1486 memset(colptrs, 0,
sizeof(
INT)*n);
1490 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1492 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1493 colptrs[jlu[indj]] = indj;
1495 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1498 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1499 ijaj = A->
JA[indja];
1500 ibstart=colptrs[ijaj]*nb2;
1502 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1504 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1508 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1509 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1511 if (colptrs[jlus] != 0) {
1513 ibstart=colptrs[jlus]*nb2;
1514 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1518 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1531 for (i = 0; i < ncolors; ++i) {
1532#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1535 memset(colptrs, 0,
sizeof(
INT)*n);
1539 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1541 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1542 colptrs[jlu[indj]] = indj;
1544 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1547 for (indja = A->
IA[k]; indja < A->IA[k+1]; ++indja) {
1548 ijaj = A->
JA[indja];
1549 ibstart=colptrs[ijaj]*nb2;
1551 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->
val[ibstart1+ib];
1553 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1557 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1558 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1560 if (colptrs[jlus] != 0) {
1562 ibstart=colptrs[jlus]*nb2;
1563 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1567 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1579 for (i = 0; i < ncolors; ++i) {
1580#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1583 memset(colptrs, 0,
sizeof(
INT) * n);
1587 for (ii = ic[i]; ii < ic[i + 1]; ++ii) {
1589 for (indj = jlu[k]; indj < jlu[k + 1]; ++indj) {
1590 colptrs[jlu[indj]] = indj;
1591 ibstart = indj * nb2;
1592 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = 0;
1595 for (indja = A->
IA[k]; indja < A->IA[k + 1]; ++indja) {
1596 ijaj = A->
JA[indja];
1597 ibstart = colptrs[ijaj] * nb2;
1598 ibstart1 = indja * nb2;
1599 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = A->
val[ibstart1 + ib];
1601 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1603 ibstart = indj * nb2;
1605 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = mult[ib];
1606 for (inds = uptr[jluj]; inds < jlu[jluj + 1]; ++inds) {
1608 if (colptrs[jlus] != 0) {
1610 ibstart = colptrs[jlus] * nb2;
1611 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] -= mult1[ib];
1615 for (indj = jlu[k]; indj < jlu[k + 1]; ++indj) colptrs[jlu[indj]] = 0;
1646static void generate_S_theta (
dCSRmat *A,
1651 const INT row_plus_one = row+1;
1652 const INT nnz=A->
IA[row]-A->
IA[0];
1654 INT index, i, j, begin_row, end_row;
1674 for (i=0;i<row;++i) {
1678 begin_row=ia[i]; end_row=ia[i+1];
1680 for (j=begin_row;j<end_row;j++) row_abs_sum+=
ABS(aj[j]);
1682 row_abs_sum = row_abs_sum*theta;
1690 for (j=begin_row;j<end_row;j++){
1692 if ( (row_abs_sum >=
ABS(aj[j])) && (ja[j] !=i) ) S->
JA[j]=-1;
1698 for (i=0;i<row;++i) {
1700 begin_row=ia[i]; end_row=ia[i+1]-1;
1701 for (j=begin_row;j<=end_row;j++) {
1703 S->
JA[index]=S->
JA[j];
1734static void multicoloring (
AMG_data *mgl,
1739 INT k, i, j, pre, group, iend;
1749 S.
IA = S.
JA = NULL; S.
val = NULL;
1751 theta =
MAX(0.0,
MIN(1.0, theta));
1753 if (theta > 0.0 && theta < 1.0) {
1754 generate_S_theta(&A, &S, theta);
1758 else if (theta == 1.0) {
1760 mgl->
ic = (
INT*)malloc(
sizeof(
INT)*2);
1764 for(k=0; k<n; k++) mgl->
icmap[k]= k;
1770 printf(
"### WARNING: Theta = %lf! [%s]\n", theta, __FUNCTION__);
1779 cq = (
INT *)malloc(
sizeof(
INT)*(n+1));
1780 newr = (
INT *)malloc(
sizeof(
INT)*(n+1));
1783#pragma omp parallel for private(k)
1785 for ( k=0; k<n; k++ ) cq[k]= k;
1788 for ( k=0; k<n; k++ ) {
1789 if ((A.
IA[k+1] - A.
IA[k]) > group ) group = A.
IA[k+1] - A.
IA[k];
1793 mgl->
ic = (
INT *)malloc(
sizeof(
INT)*(group+2));
1799 memset(newr, -1,
sizeof(
INT)*(n+1));
1800 memset(mgl->
icmap, 0,
sizeof(
INT)*n);
1810 if (front == n ) front =0;
1814 mgl->
ic[group] = icount;
1815 mgl->
icmap[icount] = i;
1819 if ((IA[i+1]-IA[i]) > igold)
1820 iend =
MIN(IA[i+1], (IA[i] + igold));
1825 for (j= IA[i]; j< iend; j++) newr[JA[j]] = group;
1827 else if (newr[i] == group) {
1830 if (rear == n) rear = 0;
1834 mgl->
icmap[icount] = i;
1837 if ((IA[i+1] - IA[i]) > igold) iend =
MIN(IA[i+1], (IA[i] + igold));
1841 for (j = IA[i]; j< iend; j++) newr[JA[j]] = group;
1845 }
while(rear != front);
1847 mgl->
ic[group] = icount;
1889 for (i=0; i<n; i++) {
1891 for(j=ijlu[i]; j<ijlu[i+1]; j++)
if (ijlu[j]<=i) l =
MAX(l, level[ijlu[j]]);
1894 nlevL =
MAX(nlevL, l+1);
1897 for (i=1; i<=nlevL; i++) ilevL[i] += ilevL[i-1];
1899 for (i=0; i<n; i++) {
1900 k = ilevL[level[i]-1];
1902 ilevL[level[i]-1]++;
1905 for (i=nlevL-1; i>0; i--) ilevL[i] = ilevL[i-1];
1911 for (i=0; i<n; i++) level[i] = 0;
1915 for (i=n-1; i>=0; i--) {
1917 for (j=ijlu[i]; j<ijlu[i+1]; j++)
if (ijlu[j]>=i) l =
MAX(l, level[ijlu[j]]);
1920 nlevU =
MAX(nlevU, l+1);
1923 for (i=1; i<=nlevU; i++) ilevU[i] += ilevU[i-1];
1925 for (i=n-1; i>=0; i--) {
1926 k = ilevU[level[i]-1];
1928 ilevU[level[i]-1]++;
1931 for (i=nlevU-1; i>0; i--) ilevU[i] = ilevU[i-1];
1935 iludata->
nlevL = nlevL+1; iludata->
ilevL = ilevL;iludata->
jlevL = jlevL;
1936 iludata->
nlevU = nlevU+1; iludata->
ilevU = ilevU;iludata->
jlevU = jlevU;
1956 INT Colors, rowmax, level, prtlvl = 0;
1963#pragma omp parallel for private(level,rowmax,Colors) schedule(static, 1)
1965 for ( level=0; level<maxlvl; level++ ) {
1967 multicoloring(&mgl[level], theta, &rowmax, &Colors);
1971 printf(
"mgl[%3d].A.row = %12d rowmax = %5d rowavg = %7.2lf colors = %5d theta = %le\n",
1972 level, mgl[level].A.
row, rowmax, (
double)mgl[level].
A.
nnz/mgl[level].
A.
row,
1973 mgl[level].
colors, theta);
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_gettime(REAL *time)
Get system time.
SHORT fasp_ilu_dbsr_setup_levsch_omp(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A based on level schedule strategy.
SHORT fasp_ilu_dbsr_setup_mc_omp(dBSRmat *A, dCSRmat *Ap, ILU_data *iludata, ILU_param *iluparam)
Multi-thread ILU decoposition of a BSR matrix A based on graph coloring.
void topologic_sort_ILU(ILU_data *iludata)
Reordering vertices according to level schedule strategy.
SHORT fasp_ilu_dbsr_setup_step(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam, INT step)
Get ILU decoposition of a BSR matrix A.
SHORT fasp_ilu_dbsr_setup(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A.
void mulcol_independ_set(AMG_data *mgl, INT gslvl)
Multi-coloring vertices of adjacency graph of A.
SHORT fasp_ilu_dbsr_setup_omp(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Multi-thread ILU decoposition of a BSR matrix A based on graph coloring.
SHORT fasp_ilu_dbsr_setup_levsch_step(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam, INT step)
Get ILU decoposition of a BSR matrix A based on level schedule strategy.
void fasp_symbfactor(INT n, INT *colind, INT *rwptr, INT levfill, INT nzmax, INT *nzlu, INT *ijlu, INT *uptr, INT *ierr)
Symbolic factorization of a CSR matrix A in compressed sparse row format, with resulting factors stor...
void fasp_smat_inv_nc3(REAL *a)
Compute the inverse matrix of a 3*3 full matrix A (in place)
void fasp_smat_inv_nc4(REAL *a)
Compute the inverse matrix of a 4*4 full matrix A (in place)
SHORT fasp_smat_invp_nc(REAL *a, const INT n)
Compute the inverse of a matrix using Gauss Elimination with Pivoting.
void fasp_smat_inv_nc2(REAL *a)
Compute the inverse matrix of a 2*2 full matrix A (in place)
void fasp_smat_inv_nc5(REAL *a)
Compute the inverse matrix of a 5*5 full matrix A (in place)
void fasp_blas_smat_mul_nc4(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 4*4 matrices a and b, stored in c.
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
void fasp_blas_smat_mul_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 2* matrices a and b, stored in c.
void fasp_blas_smat_mul_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
void fasp_blas_smat_mul_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
void fasp_blas_smat_mul_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
dBSRmat fasp_dbsr_perm(const dBSRmat *A, const INT *P)
Apply permutation of A, i.e. Aperm=PAP' by the orders given in P.
void fasp_dbsr_free(dBSRmat *A)
Free memory space for BSR format sparse matrix.
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
void fasp_dcsr_free(dCSRmat *A)
Free CSR sparse matrix data memory space.
void fasp_dcsr_cp(const dCSRmat *A, dCSRmat *B)
copy a dCSRmat to a new one B=A
dCSRmat fasp_dcsr_sympart(dCSRmat *A)
Get symmetric part of a dCSRmat matrix.
void fasp_blas_dcsr_mxm(const dCSRmat *A, const dCSRmat *B, dCSRmat *C)
Sparse matrix multiplication C=A*B.
AMG_data * fasp_amg_data_create(SHORT max_levels)
Create and initialize AMG_data for classical and SA AMG.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define MAX(a, b)
Definition of max, min, abs.
#define FASP_SUCCESS
Definition of return status and error messages.
#define ERROR_SOLVER_ILUSETUP
#define PRINT_NONE
Print level for all subroutines – not including DEBUG output.
dCSRmat A
pointer to the matrix at level level_num
INT colors
number of colors
INT * ic
indices for different colors
INT * icmap
mapping from vertex to color
SHORT num_levels
number of levels in use <= max_levels
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
REAL * luval
nonzero entries of LU
INT col
column of matrix LU, n
INT * jlevU
mapping from row to color for upper triangle
INT nlevU
number of colors for upper triangle
INT nb
block size for BSR type only
INT row
row number of matrix LU, m
INT nlevL
number of colors for lower triangle
INT * ic
indices for different colors
INT * icmap
mapping from vertex to color
INT * iperm
permutation arrays for ILUtp
INT nzlu
number of nonzero entries
INT * jlevL
mapping from row to color for lower triangle
INT * uptr
temporary work space
INT * ilevL
number of vertices in each color for lower triangle
dCSRmat * A
pointer to the original coefficient matrix
INT * ilevU
number of vertices in each color for upper triangle
INT ILU_lfil
level of fill-in for ILUk
SHORT print_level
print level
SHORT ILU_type
ILU type for decomposition.
Block sparse row storage matrix of REAL type.
INT COL
number of cols of sub-blocks in matrix A, N
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
INT nb
dimension of each sub-block
INT * IA
integer array of row pointers, the size is ROW+1
INT ROW
number of rows of sub-blocks in matrix A, M
Sparse matrix of REAL type in CSR format.
INT col
column of matrix A, n
REAL * val
nonzero entries of A
INT row
row number of matrix A, m
INT * IA
integer array of row pointers, the size is m+1
INT nnz
number of nonzero entries
INT * JA
integer array of column indexes, the size is nnz
Sparse matrix of INT type in CSR format.
INT col
column of matrix A, n
INT row
row number of matrix A, m
INT * IA
integer array of row pointers, the size is m+1
INT nnz
number of nonzero entries
INT * JA
integer array of column indexes, the size is nnz
INT * val
nonzero entries of A