18#include "fasp_functs.h"
65 for (block = 0; block < ngrid; block ++) {
134 for (point = 0; point < ngrid; point ++) {
135 for (band = 0; band < nband; band ++) {
136 width = offsets[band];
137 column = point + width;
140 b_tmp[point] -= offdiag[band][column]*u_val[column];
144 if (column < ngrid) {
145 b_tmp[point] -= offdiag[band][point]*u_val[column];
151 for (point = 0; point < ngrid; point ++) {
153 u_val[point] = b_tmp[point] / diag[point];
157 for (block = 0; block < ngrid; block ++) {
158 start_DATA = nc2*block;
159 start_vecb = nc*block;
160 for (band = 0; band < nband; band ++) {
161 width = offsets[band];
162 column = block + width;
165 start_data = nc2*column;
166 start_vecu = nc*column;
167 blkcontr2( start_data, start_vecu, start_vecb,
168 nc, offdiag[band], u_val, b_tmp );
172 if (column < ngrid) {
173 start_vecu = nc*column;
174 blkcontr2( start_DATA, start_vecu, start_vecb,
175 nc, offdiag[band], u_val, b_tmp );
181 for (block = 0; block < ngrid; block ++) {
187 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
226 REAL *diaginv = NULL;
229 INT size = nc2*ngrid;
241 for (block = 0; block < ngrid; block ++) {
353 REAL *vec_tmp = NULL;
358 for (point = 0; point < ngrid; point ++) {
360 for (band = 0; band < nband; band ++) {
361 width = offsets[band];
362 column = point + width;
365 rhs -= offdiag[band][column]*u_val[column];
369 if (column < ngrid) {
370 rhs -= offdiag[band][point]*u_val[column];
376 u_val[point] = rhs / diag[point];
383 for (block = 0; block < ngrid; block ++) {
385 for (point = 0; point < nc; point ++) {
386 vec_tmp[point] = b_val[ncb+point];
388 start_DATA = nc2*block;
389 for (band = 0; band < nband; band ++) {
390 width = offsets[band];
391 column = block + width;
394 start_data = nc2*column;
395 start_vecu = nc*column;
396 blkcontr2( start_data, start_vecu, 0, nc,
397 offdiag[band], u_val, vec_tmp );
401 if (column < ngrid) {
402 start_vecu = nc*column;
403 blkcontr2( start_DATA, start_vecu, 0, nc,
404 offdiag[band], u_val, vec_tmp );
416 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
469 REAL *vec_tmp = NULL;
474 for (point = ngrid-1; point >= 0; point --) {
476 for (band = 0; band < nband; band ++) {
477 width = offsets[band];
478 column = point + width;
481 rhs -= offdiag[band][column]*u_val[column];
485 if (column < ngrid) {
486 rhs -= offdiag[band][point]*u_val[column];
492 u_val[point] = rhs / diag[point];
499 for (block = ngrid-1; block >= 0; block --) {
501 for (point = 0; point < nc; point ++) {
502 vec_tmp[point] = b_val[ncb+point];
504 start_DATA = nc2*block;
505 for (band = 0; band < nband; band ++) {
506 width = offsets[band];
507 column = block + width;
510 start_data = nc2*column;
511 start_vecu = nc*column;
512 blkcontr2( start_data, start_vecu, 0, nc,
513 offdiag[band], u_val, vec_tmp );
517 if (column < ngrid) {
518 start_vecu = nc*column;
519 blkcontr2( start_DATA, start_vecu, 0, nc,
520 offdiag[band], u_val, vec_tmp );
533 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
589 REAL *vec_tmp = NULL;
594 for (index = 0; index < ngrid; index ++) {
597 for (band = 0; band < nband; band ++) {
598 width = offsets[band];
599 column = point + width;
602 rhs -= offdiag[band][column]*u_val[column];
606 if (column < ngrid) {
607 rhs -= offdiag[band][point]*u_val[column];
613 u_val[point] = rhs / diag[point];
620 for (index = 0; index < ngrid; index ++) {
623 for (point = 0; point < nc; point ++) {
624 vec_tmp[point] = b_val[ncb+point];
626 start_DATA = nc2*block;
627 for (band = 0; band < nband; band ++) {
628 width = offsets[band];
629 column = block + width;
632 start_data = nc2*column;
633 start_vecu = nc*column;
634 blkcontr2( start_data, start_vecu, 0, nc,
635 offdiag[band], u_val, vec_tmp );
639 if (column < ngrid) {
640 start_vecu = nc*column;
641 blkcontr2( start_DATA, start_vecu, 0, nc,
642 offdiag[band], u_val, vec_tmp );
654 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
716 REAL *vec_tmp = NULL;
722 for (point = 0; point < ngrid; point ++) {
723 if (mark[point] == FIRST) {
725 for (band = 0; band < nband; band ++) {
726 width = offsets[band];
727 column = point + width;
730 rhs -= offdiag[band][column]*u_val[column];
734 if (column < ngrid) {
735 rhs -= offdiag[band][point]*u_val[column];
741 u_val[point] = rhs / diag[point];
746 for (point = 0; point < ngrid; point ++) {
747 if (mark[point] == SECOND) {
749 for (band = 0; band < nband; band ++) {
750 width = offsets[band];
751 column = point + width;
754 rhs -= offdiag[band][column]*u_val[column];
758 if (column < ngrid) {
759 rhs -= offdiag[band][point]*u_val[column];
765 u_val[point] = rhs / diag[point];
773 for (block = 0; block < ngrid; block ++) {
774 if (mark[block] == FIRST) {
776 for (point = 0; point < nc; point ++) {
777 vec_tmp[point] = b_val[ncb+point];
779 start_DATA = nc2*block;
780 for (band = 0; band < nband; band ++) {
781 width = offsets[band];
782 column = block + width;
785 start_data = nc2*column;
786 start_vecu = nc*column;
787 blkcontr2( start_data, start_vecu, 0, nc,
788 offdiag[band], u_val, vec_tmp );
792 if (column < ngrid) {
793 start_vecu = nc*column;
794 blkcontr2( start_DATA, start_vecu, 0, nc,
795 offdiag[band], u_val, vec_tmp );
807 for (block = 0; block < ngrid; block ++) {
808 if (mark[block] == SECOND) {
810 for (point = 0; point < nc; point ++) {
811 vec_tmp[point] = b_val[ncb+point];
813 start_DATA = nc2*block;
814 for (band = 0; band < nband; band ++) {
815 width = offsets[band];
816 column = block + width;
819 start_data = nc2*column;
820 start_vecu = nc*column;
821 blkcontr2( start_data, start_vecu, 0, nc,
822 offdiag[band], u_val, vec_tmp );
826 if (column < ngrid) {
827 start_vecu = nc*column;
828 blkcontr2( start_DATA, start_vecu, 0, nc,
829 offdiag[band], u_val, vec_tmp );
842 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
883 REAL *diaginv = NULL;
886 INT size = nc2*ngrid;
898 for (block = 0; block < ngrid; block ++) {
1011 REAL one_minus_weight = 1.0 - weight;
1014 REAL *vec_tmp = NULL;
1019 for (point = 0; point < ngrid; point ++) {
1021 for (band = 0; band < nband; band ++) {
1022 width = offsets[band];
1023 column = point + width;
1026 rhs -= offdiag[band][column]*u_val[column];
1030 if (column < ngrid) {
1031 rhs -= offdiag[band][point]*u_val[column];
1037 u_val[point] = one_minus_weight*u_val[point] +
1038 weight*(rhs / diag[point]);
1045 for (block = 0; block < ngrid; block ++) {
1047 for (point = 0; point < nc; point ++) {
1048 vec_tmp[point] = b_val[ncb+point];
1050 start_DATA = nc2*block;
1051 for (band = 0; band < nband; band ++) {
1052 width = offsets[band];
1053 column = block + width;
1056 start_data = nc2*column;
1057 start_vecu = nc*column;
1058 blkcontr2( start_data, start_vecu, 0, nc,
1059 offdiag[band], u_val, vec_tmp );
1063 if (column < ngrid) {
1064 start_vecu = nc*column;
1065 blkcontr2( start_DATA, start_vecu, 0, nc,
1066 offdiag[band], u_val, vec_tmp );
1072 aAxpby(weight, one_minus_weight, nc,
1073 diaginv+start_DATA, vec_tmp, u_val+nc*block);
1079 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
1132 REAL one_minus_weight = 1.0 - weight;
1135 REAL *vec_tmp = NULL;
1140 for (point = ngrid-1; point >= 0; point --) {
1142 for (band = 0; band < nband; band ++) {
1143 width = offsets[band];
1144 column = point + width;
1147 rhs -= offdiag[band][column]*u_val[column];
1151 if (column < ngrid) {
1152 rhs -= offdiag[band][point]*u_val[column];
1158 u_val[point] = one_minus_weight*u_val[point] +
1159 weight*(rhs / diag[point]);
1166 for (block = ngrid-1; block >= 0; block --) {
1168 for (point = 0; point < nc; point ++) {
1169 vec_tmp[point] = b_val[ncb+point];
1171 start_DATA = nc2*block;
1172 for (band = 0; band < nband; band ++) {
1173 width = offsets[band];
1174 column = block + width;
1177 start_data = nc2*column;
1178 start_vecu = nc*column;
1179 blkcontr2( start_data, start_vecu, 0, nc,
1180 offdiag[band], u_val, vec_tmp );
1184 if (column < ngrid) {
1185 start_vecu = nc*column;
1186 blkcontr2( start_DATA, start_vecu, 0, nc,
1187 offdiag[band], u_val, vec_tmp );
1193 aAxpby(weight, one_minus_weight, nc,
1194 diaginv+start_DATA, vec_tmp, u_val+nc*block);
1200 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
1256 REAL one_minus_weight = 1.0 - weight;
1259 REAL *vec_tmp = NULL;
1264 for (index = 0; index < ngrid; index ++) {
1265 point = mark[index];
1267 for (band = 0; band < nband; band ++) {
1268 width = offsets[band];
1269 column = point + width;
1272 rhs -= offdiag[band][column]*u_val[column];
1276 if (column < ngrid) {
1277 rhs -= offdiag[band][point]*u_val[column];
1283 u_val[point] = one_minus_weight*u_val[point] +
1284 weight*(rhs / diag[point]);
1291 for (index = 0; index < ngrid; index ++) {
1292 block = mark[index];
1294 for (point = 0; point < nc; point ++) {
1295 vec_tmp[point] = b_val[ncb+point];
1297 start_DATA = nc2*block;
1298 for (band = 0; band < nband; band ++) {
1299 width = offsets[band];
1300 column = block + width;
1303 start_data = nc2*column;
1304 start_vecu = nc*column;
1305 blkcontr2( start_data, start_vecu, 0, nc,
1306 offdiag[band], u_val, vec_tmp );
1310 if (column < ngrid) {
1311 start_vecu = nc*column;
1312 blkcontr2( start_DATA, start_vecu, 0, nc,
1313 offdiag[band], u_val, vec_tmp );
1319 aAxpby(weight, one_minus_weight, nc,
1320 diaginv+start_DATA, vec_tmp, u_val+nc*block);
1327 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
1387 REAL one_minus_weight = 1.0 - weight;
1389 INT SECOND = -order;
1392 REAL *vec_tmp = NULL;
1398 for (point = 0; point < ngrid; point ++) {
1399 if (mark[point] == FIRST) {
1401 for (band = 0; band < nband; band ++) {
1402 width = offsets[band];
1403 column = point + width;
1406 rhs -= offdiag[band][column]*u_val[column];
1410 if (column < ngrid) {
1411 rhs -= offdiag[band][point]*u_val[column];
1417 u_val[point] = one_minus_weight*u_val[point] +
1418 weight*(rhs / diag[point]);
1424 for (point = 0; point < ngrid; point ++) {
1425 if (mark[point] == SECOND) {
1427 for (band = 0; band < nband; band ++) {
1428 width = offsets[band];
1429 column = point + width;
1432 rhs -= offdiag[band][column]*u_val[column];
1436 if (column < ngrid) {
1437 rhs -= offdiag[band][point]*u_val[column];
1443 u_val[point] = rhs / diag[point];
1451 for (block = 0; block < ngrid; block ++) {
1452 if (mark[block] == FIRST) {
1454 for (point = 0; point < nc; point ++) {
1455 vec_tmp[point] = b_val[ncb+point];
1457 start_DATA = nc2*block;
1458 for (band = 0; band < nband; band ++) {
1459 width = offsets[band];
1460 column = block + width;
1463 start_data = nc2*column;
1464 start_vecu = nc*column;
1465 blkcontr2( start_data, start_vecu, 0, nc,
1466 offdiag[band], u_val, vec_tmp );
1470 if (column < ngrid) {
1471 start_vecu = nc*column;
1472 blkcontr2( start_DATA, start_vecu, 0, nc,
1473 offdiag[band], u_val, vec_tmp );
1479 aAxpby(weight, one_minus_weight, nc,
1480 diaginv+start_DATA, vec_tmp, u_val+nc*block);
1486 for (block = 0; block < ngrid; block ++) {
1487 if (mark[block] == SECOND) {
1489 for (point = 0; point < nc; point ++) {
1490 vec_tmp[point] = b_val[ncb+point];
1492 start_DATA = nc2*block;
1493 for (band = 0; band < nband; band ++) {
1494 width = offsets[band];
1495 column = block + width;
1498 start_data = nc2*column;
1499 start_vecu = nc*column;
1500 blkcontr2( start_data, start_vecu, 0, nc,
1501 offdiag[band], u_val, vec_tmp );
1505 if (column < ngrid) {
1506 start_vecu = nc*column;
1507 blkcontr2( start_DATA, start_vecu, 0, nc,
1508 offdiag[band], u_val, vec_tmp );
1514 aAxpby(weight, one_minus_weight, nc,
1515 diaginv+start_DATA, vec_tmp, u_val+nc*block);
1522 printf(
"### ERROR: nc is illegal! [%s:%d]\n", __FILE__, __LINE__);
1549 const INT nc = A->
nc;
1563 nneigh= neigh->
row/ngrid;
1567 INT i, j, k, l, m, n, nbd, p;
1578 for (i=0; i<ngrid; ++i) {
1581 for (l=0; l<nneigh; ++l) {
1582 if (neigh->
val[i*nneigh+l] >= 0) count++ ;
1586 block_size = count*nc;
1588 diaginv[i].
row = block_size*block_size;
1589 diaginv[i].
val = temp + mem_inv;
1590 mem_inv += diaginv[i].
row;
1592 pivot[i].
row = block_size;
1593 pivot[i].
val = tmp + mem_pivot;
1594 mem_pivot += pivot[i].
row;
1597 for (j=0; j<nc; ++j) {
1598 for (k=0; k<nc; ++k) {
1599 diaginv[i].
val[j*block_size+k] = diag[i*nc*nc + j*nc + k];
1605 for (l=0; l<nneigh; ++l) {
1606 p = neigh->
val[i*nneigh+l];
1609 for (j=0; j<nc; ++j) {
1610 for (k=0; k<nc; ++k) {
1611 m = count*nc + j; n = count*nc+k;
1612 diaginv[i].
val[m*block_size+n] = diag[p*nc*nc+j*nc+k];
1616 for (nbd=0; nbd<nband; nbd++) {
1618 if ( offsets[nbd] == (p-i) ) {
1619 for (j=0; j<nc; ++j) {
1620 for(k=0; k<nc; ++k) {
1621 m = j; n = count*nc + k;
1622 diaginv[i].
val[m*block_size+n] = offdiag[nbd][(p-
MAX(p-i, 0))*nc*nc+j*nc+k];
1628 if ( offsets[nbd] == (i-p) ) {
1629 for (j=0; j<nc; ++j) {
1630 for(k=0; k<nc; ++k) {
1631 m = count*nc + j; n = k;
1632 diaginv[i].
val[m*block_size+n] = offdiag[nbd][(i-
MAX(i-p, 0))*nc*nc+j*nc+k];
1665void fasp_smoother_dstr_swz (
dSTRmat *A,
1675 const INT nc = A->
nc;
1683 nneigh= neigh->
row/ngrid;
1687 INT i, j, k, l, p, ti;
1693 e.
row = (nneigh+1)*nc; e.
val = temp + b->
row;
1694 ri.
row = (nneigh+1)*nc; ri.
val = temp + b->
row + (nneigh+1)*nc;
1701 for (i=0; i<ngrid; ++i) {
1705 for (j=0; j<nc; ++j) {
1706 ri.
val[j] = r.
val[i*nc + j];
1710 for (l=0; l<nneigh; ++l) {
1711 p=neigh->
val[nneigh*i+l];
1713 for (j=0; j<nc; ++j) {
1714 ri.
val[k*nc+j] = r.
val[p*nc+j];
1732 for (j=0; j<nc; ++j) {
1733 u->
val[i*nc + j] += e.
val[j];
1737 for (l=0; l<nneigh; ++l) {
1738 p=neigh->
val[nneigh*i+l];
1740 for (j=0; j<nc; ++j) {
1741 u->
val[p*nc+j] += e.
val[k*nc+j];
1754 for (i=0; i<ngrid; ++i) {
1759 for (j=0; j<nc; ++j) {
1760 ri.
val[j] = r.
val[ti*nc + j];
1764 for (l=0; l<nneigh; ++l) {
1765 p=neigh->
val[nneigh*ti+l];
1767 for (j=0; j<nc; ++j) {
1768 ri.
val[k*nc+j] = r.
val[p*nc+j];
1786 for (j=0; j<nc; ++j) {
1787 u->
val[ti*nc + j] += e.
val[j];
1791 for (l=0; l<nneigh; ++l) {
1792 p=neigh->
val[nneigh*ti+l];
1794 for (j=0; j<nc; ++j) {
1795 u->
val[p*nc+j] += e.
val[k*nc+j];
1833static void blkcontr2 (
INT start_data,
1842 if (start_vecy == 0) {
1843 for (i = 0; i < nc; i ++) {
1844 k = start_data + i*nc;
1845 for (j = 0; j < nc; j ++) {
1846 y[i] -= data[k+j]*x[start_vecx+j];
1851 for (i = 0; i < nc; i ++) {
1852 k = start_data + i*nc;
1854 for (j = 0; j < nc; j ++) {
1855 y[m] -= data[k+j]*x[start_vecx+j];
1876static void aAxpby (
REAL alpha,
1886 for (i = 0; i < size; i ++) {
1893 for (i = 0; i < size; i ++) {
1897 for (i = 0; i < size; i ++) {
1898 for (j = 0; j < size; j ++) {
1899 y[i] += A[i*size+j]*x[j];
1903 for (i = 0; i < size; i ++) {
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_dvec_cp(const dvector *x, dvector *y)
Copy dvector x to dvector y.
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
SHORT fasp_smat_lu_decomp(REAL *A, INT pivot[], const INT n)
LU decomposition of A using Doolittle's method.
SHORT fasp_smat_lu_solve(const REAL *A, REAL b[], const INT pivot[], REAL x[], const INT n)
Solving Ax=b using LU decomposition.
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
void fasp_blas_dstr_aAxpy(const REAL alpha, const dSTRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
void fasp_smoother_dstr_jacobi(dSTRmat *A, dvector *b, dvector *u)
Jacobi method as the smoother.
void fasp_smoother_dstr_sor_cf(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark, const INT order, const REAL weight)
SOR method as the smoother in the C-F manner.
void fasp_smoother_dstr_gs_cf(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark, const INT order)
Gauss method as the smoother in the C-F manner.
void fasp_smoother_dstr_jacobi1(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv)
Jacobi method as the smoother with diag_inv given.
void fasp_smoother_dstr_sor(dSTRmat *A, dvector *b, dvector *u, const INT order, INT *mark, const REAL weight)
SOR method as the smoother.
void fasp_smoother_dstr_sor_ascend(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR method as the smoother in the ascending manner.
void fasp_smoother_dstr_gs1(dSTRmat *A, dvector *b, dvector *u, const INT order, INT *mark, REAL *diaginv)
Gauss-Seidel method as the smoother with diag_inv given.
void fasp_smoother_dstr_gs_descend(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel method as the smoother in the descending manner.
void fasp_smoother_dstr_sor_order(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark, REAL weight)
SOR method as the smoother in the user-defined order.
void fasp_smoother_dstr_gs_ascend(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel method as the smoother in the ascending manner.
void fasp_smoother_dstr_sor1(dSTRmat *A, dvector *b, dvector *u, const INT order, INT *mark, REAL *diaginv, const REAL weight)
SOR method as the smoother.
void fasp_smoother_dstr_gs_order(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark)
Gauss method as the smoother in the user-defined order.
void fasp_generate_diaginv_block(dSTRmat *A, ivector *neigh, dvector *diaginv, ivector *pivot)
Generate inverse of diagonal block for block smoothers.
void fasp_smoother_dstr_gs(dSTRmat *A, dvector *b, dvector *u, const INT order, INT *mark)
Gauss-Seidel method as the smoother.
void fasp_smoother_dstr_sor_descend(dSTRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR method as the smoother in the descending manner.
Main header file for the FASP project.
#define MAX(a, b)
Definition of max, min, abs.
#define USERDEFINED
Type of ordering for smoothers.
Structure matrix of REAL type.
INT * offsets
offsets of the off-diagonals (length is nband)
INT nband
number of off-diag bands
REAL * diag
diagonal entries (length is ngrid*(nc^2))
INT nc
size of each block (number of components)
REAL ** offdiag
off-diagonal entries (dimension is nband * [(ngrid-|offsets|) * nc^2])
Vector with n entries of REAL type.
REAL * val
actual vector entries
Vector with n entries of INT type.
INT * val
actual vector entries