22#include "fasp_functs.h"
28static inline void smat_amxv_nc3(
const REAL,
const REAL *,
const REAL *,
REAL *);
29static inline void smat_amxv_nc5(
const REAL,
const REAL *,
const REAL *,
REAL *);
40static inline void blkcontr_str(
const INT,
const INT,
const INT,
const INT,
73 str_spaAxpy_2D_nc1(alpha, A, x, y);
77 str_spaAxpy_2D_nc3(alpha, A, x, y);
81 str_spaAxpy_2D_nc5(alpha, A, x, y);
85 str_spaAxpy_2D_blk(alpha, A, x, y);
95 str_spaAxpy_3D_nc1(alpha, A, x, y);
99 str_spaAxpy_3D_nc3(alpha, A, x, y);
103 str_spaAxpy_3D_nc5(alpha, A, x, y);
107 str_spaAxpy_3D_blk(alpha, A, x, y);
113 str_spaAxpy(alpha, A, x, y);
137 memset(y, 0, n*
sizeof(
REAL));
159 const INT nc2=nc*nc, size=ngrid*nc2;
164 INT myid, mybegin, myend;
165 INT nthreads = fasp_get_num_threads();
177#pragma omp parallel for private(myid, mybegin, myend, i, ic2, j)
178 for (myid=0; myid<nthreads; myid++) {
180 for (i=mybegin; i<myend; i++) {
182 for (j=0; j<nc2; j++) {
183 if (j/nc == j%nc) B->
diag[ic2+j]=1;
184 else B->
diag[ic2+j]=0;
191 for (i=0;i<ngrid;++i) {
193 for (j=0;j<nc2;++j) {
194 if (j/nc == j%nc) B->
diag[ic2+j]=1;
195 else B->
diag[ic2+j]=0;
204 for (i=0;i<nband;++i) {
208 for (j=0;j<ngrid-nb1;++j)
212 for (j=0;j<ngrid-nb1;++j)
241static inline void smat_amxv_nc3 (
const REAL alpha,
246 c[0] += alpha*(a[0]*b[0] + a[1]*b[1] + a[2]*b[2]);
247 c[1] += alpha*(a[3]*b[0] + a[4]*b[1] + a[5]*b[2]);
248 c[2] += alpha*(a[6]*b[0] + a[7]*b[1] + a[8]*b[2]);
265static inline void smat_amxv_nc5 (
const REAL alpha,
270 c[0] += alpha*(a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3] * b[3] + a[4] * b[4]);
271 c[1] += alpha*(a[5]*b[0] + a[6]*b[1] + a[7]*b[2] + a[8] * b[3] + a[9] * b[4]);
272 c[2] += alpha*(a[10]*b[0] + a[11]*b[1] + a[12]*b[2] + a[13] * b[3] + a[14] * b[4]);
273 c[3] += alpha*(a[15]*b[0] + a[16]*b[1] + a[17]*b[2] + a[18] * b[3] + a[19] * b[4]);
274 c[4] += alpha*(a[20]*b[0] + a[21]*b[1] + a[22]*b[2] + a[23] * b[3] + a[24] * b[4]);
294static inline void smat_amxv (
const REAL alpha,
305 INT myid, mybegin, myend;
306 INT nthreads = fasp_get_num_threads();
311#pragma omp parallel for private(myid, mybegin, myend, i, in, j)
312 for (myid=0; myid<nthreads; myid++) {
314 for (i=mybegin; i<myend; i++) {
317 c[i] += alpha*a[in+j]*b[j];
326 c[i] += alpha*a[in+j]*b[j];
356static inline void blkcontr_str (
const INT start_data,
357 const INT start_vecx,
358 const INT start_vecy,
368 INT myid, mybegin, myend;
369 INT nthreads = fasp_get_num_threads();
374#pragma omp parallel for private(myid, mybegin, myend, i, k, m, j)
375 for (myid = 0; myid < nthreads; myid++) {
377 for (i = mybegin; i < myend; i ++) {
378 k = start_data + i*nc;
380 for (j = 0; j < nc; j ++) {
381 y[m] += data[k+j]*x[start_vecx+j];
388 for (i = 0; i < nc; i ++) {
389 k = start_data + i*nc;
391 for (j = 0; j < nc; j ++) {
392 y[m] += data[k+j]*x[start_vecx+j];
420static inline void str_spaAxpy_2D_nc1 (
const REAL alpha,
432 INT myid, mybegin, myend, idx;
433 INT nthreads = fasp_get_num_threads();
442 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
451 for (i=0; i<nband; ++i) {
458 else if (A->
offsets[i] == -nline) {
461 else if (A->
offsets[i] == nline) {
465 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
466 str_spaAxpy(alpha, A, x, y);
474 y[0] += alpha*(diag[0]*x[0] + offdiag1[0]*x[1] + offdiag3[0]*x[nline]);
478#pragma omp parallel for private(myid, mybegin, myend, i, idx1, idx)
479 for (myid=0; myid<nthreads; myid++) {
481 for (i=mybegin; i<myend; i++) {
484 y[idx] += alpha*(offdiag0[idx1]*x[idx1] + diag[idx]*x[idx] +
485 offdiag1[idx]*x[idx+1] + offdiag3[idx]*x[idx+nline]);
491 for (i=1; i<nline; ++i) {
493 y[i] += alpha*(offdiag0[idx1]*x[idx1] + diag[i]*x[i] +
494 offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nline]);
502#pragma omp parallel for private(myid, i, mybegin, myend, idx1, idx2, idx)
503 for (myid=0; myid<nthreads; myid++) {
505 for (i=mybegin; i<myend; ++i) {
509 y[idx] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
510 diag[idx]*x[idx] + offdiag1[idx]*x[idx+1] +
511 offdiag3[idx]*x[idx+nline]);
517 for (i=nline; i<end2; ++i) {
520 y[i] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
521 diag[i]*x[i] + offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nline]);
529#pragma omp parallel for private(myid, i, mybegin, myend, idx1, idx2, idx)
530 for (myid=0; myid<nthreads; myid++) {
532 for (i=mybegin; i<myend; ++i) {
536 y[idx] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
537 diag[idx]*x[idx] + offdiag1[idx]*x[idx+1]);
543 for (i=end2; i<end1; ++i) {
546 y[i] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
547 diag[i]*x[i] + offdiag1[i]*x[i+1]);
555 y[end1] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] + diag[end1]*x[end1]);
581static inline void str_spaAxpy_2D_nc3 (
const REAL alpha,
588 INT matidx, matidx1, matidx2;
600 INT myid, mybegin, myend, up;
601 INT nthreads = fasp_get_num_threads();
605 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
615 for (i=0; i<nband; ++i) {
623 else if (A->
offsets[i] == -nline) {
626 else if (A->
offsets[i] == nline) {
630 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
631 str_spaAxpy(alpha, A, x, y);
640 smat_amxv_nc3(alpha, diag, x, y);
641 smat_amxv_nc3(alpha, offdiag1, x+nc, y);
642 smat_amxv_nc3(alpha, offdiag3, x+nlinenc, y);
647#pragma omp parallel for private(myid, mybegin, myend, i, idx, matidx, idx1, matidx1)
648 for (myid=0; myid<nthreads; myid++) {
650 for (i=mybegin; i<myend; i++) {
655 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
656 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
657 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
658 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
664 for (i=1; i<nline; ++i) {
669 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
670 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
671 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
672 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
681#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
682 for (myid=0; myid<nthreads; myid++) {
684 for (i=mybegin; i<myend; i++) {
691 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
692 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
693 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
694 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
695 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
701 for (i=nx; i<end2; ++i) {
708 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
709 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
710 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
711 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
712 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
721#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
722 for (myid=0; myid<nthreads; myid++) {
724 for (i=mybegin; i<myend; i++) {
731 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
732 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
733 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
734 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
740 for (i=end2; i<end1; ++i) {
747 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
748 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
749 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
750 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
762 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
763 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
764 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
787static inline void str_spaAxpy_2D_nc5 (
const REAL alpha,
794 INT matidx, matidx1, matidx2;
806 INT myid, mybegin, myend, up;
807 INT nthreads = fasp_get_num_threads();
811 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
821 for (i=0; i<nband; ++i) {
829 else if (A->
offsets[i] == -nline) {
832 else if (A->
offsets[i] == nline) {
836 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
837 str_spaAxpy(alpha, A, x, y);
845 smat_amxv_nc5(alpha, diag, x, y);
846 smat_amxv_nc5(alpha, offdiag1, x+nc, y);
847 smat_amxv_nc5(alpha, offdiag3, x+nlinenc, y);
852#pragma omp parallel for private(myid, mybegin, myend, i, idx, matidx, idx1, matidx1)
853 for (myid=0; myid<nthreads; myid++) {
855 for (i=mybegin; i<myend; i++) {
860 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
861 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
862 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
863 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
869 for (i=1; i<nline; ++i) {
874 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
875 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
876 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
877 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
886#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
887 for (myid=0; myid<nthreads; myid++) {
889 for (i=mybegin; i<myend; i++) {
896 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
897 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
898 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
899 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
900 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
906 for (i=nx; i<end2; ++i) {
913 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
914 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
915 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
916 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
917 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
926#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
927 for (myid=0; myid<nthreads; myid++) {
929 for (i=mybegin; i<myend; i++) {
936 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
937 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
938 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
939 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
945 for (i=end2; i<end1; ++i) {
952 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
953 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
954 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
955 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
968 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
969 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
970 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
994static inline void str_spaAxpy_2D_blk (
const REAL alpha,
1001 INT matidx, matidx1, matidx2;
1012 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
1022 for (i=0; i<nband; ++i) {
1027 else if (A->
offsets[i] == 1) {
1030 else if (A->
offsets[i] == -nline) {
1033 else if (A->
offsets[i] == nline) {
1037 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1038 str_spaAxpy(alpha, A, x, y);
1047 smat_amxv(alpha, diag, x, nc, y);
1048 smat_amxv(alpha, offdiag1, x+nc, nc, y);
1049 smat_amxv(alpha, offdiag3, x+nlinenc, nc, y);
1051 for (i=1; i<nline; ++i) {
1056 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1057 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1058 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1059 smat_amxv(alpha, offdiag3+matidx, x+idx+nlinenc, nc, y+idx);
1062 for (i=nx; i<end2; ++i) {
1069 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1070 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1071 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1072 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1073 smat_amxv(alpha, offdiag3+matidx, x+idx+nlinenc, nc, y+idx);
1076 for (i=end2; i<end1; ++i) {
1083 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1084 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1085 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1086 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1096 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1097 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1098 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1121static inline void str_spaAxpy_3D_nc1 (
const REAL alpha,
1128 INT end1, end2, end3;
1136 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1137 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1139 for (i=0; i<nband; ++i) {
1144 else if (A->
offsets[i] == 1) {
1147 else if (A->
offsets[i] == -nx) {
1150 else if (A->
offsets[i] == nx) {
1153 else if (A->
offsets[i] == -nxy) {
1156 else if (A->
offsets[i] == nxy) {
1160 printf(
"### WARNING: offsets for 3D scalar is illegal! %s\n", __FUNCTION__);
1161 str_spaAxpy(alpha, A, x, y);
1170 y[0] += alpha*(diag[0]*x[0] + offdiag1[0]*x[1] + offdiag3[0]*x[nx] + offdiag5[0]*x[nxy]);
1172 for (i=1; i<nx; ++i) {
1174 y[i] += alpha*(offdiag0[idx1]*x[idx1] + diag[i]*x[i] + offdiag1[i]*x[i+1] +
1175 offdiag3[i]*x[i+nx] + offdiag5[i]*x[i+nxy]);
1178 for (i=nx; i<nxy; ++i) {
1181 y[i] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1]
1182 + diag[i]*x[i] + offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nx]
1183 + offdiag5[i]*x[i+nxy]);
1186 for (i=nxy; i<end3; ++i) {
1190 y[i] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2]
1191 + offdiag0[idx1]*x[idx1] + diag[i]*x[i] + offdiag1[i]*x[i+1]
1192 + offdiag3[i]*x[i+nx] + offdiag5[i]*x[i+nxy]);
1195 for (i=end3; i<end2; ++i) {
1199 y[i] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2]
1200 + offdiag0[idx1]*x[idx1] + diag[i]*x[i]
1201 + offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nx]);
1204 for (i=end2; i<end1; ++i) {
1208 y[i] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2]
1209 + offdiag0[idx1]*x[idx1] + diag[i]*x[i]
1210 + offdiag1[i]*x[i+1]);
1216 y[end1] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2] +
1217 offdiag0[idx1]*x[idx1] + diag[end1]*x[end1]);
1240static inline void str_spaAxpy_3D_nc3 (
const REAL alpha,
1246 INT idx,idx1,idx2,idx3;
1247 INT matidx, matidx1, matidx2, matidx3;
1248 INT end1, end2, end3;
1259 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1260 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1262 for (i=0; i<nband; ++i) {
1267 else if (A->
offsets[i] == 1) {
1270 else if (A->
offsets[i] == -nx) {
1273 else if (A->
offsets[i] == nx) {
1276 else if (A->
offsets[i] == -nxy) {
1279 else if (A->
offsets[i] == nxy) {
1283 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1284 str_spaAxpy(alpha, A, x, y);
1293 smat_amxv_nc3(alpha, diag, x, y);
1294 smat_amxv_nc3(alpha, offdiag1, x+nc, y);
1295 smat_amxv_nc3(alpha, offdiag3, x+nxnc, y);
1296 smat_amxv_nc3(alpha, offdiag5, x+nxync, y);
1298 for (i=1; i<nx; ++i) {
1303 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1304 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1305 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1306 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1307 smat_amxv_nc3(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1310 for (i=nx; i<nxy; ++i) {
1317 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1318 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1319 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1320 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1321 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1322 smat_amxv_nc3(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1326 for (i=nxy; i<end3; ++i) {
1335 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1336 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1337 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1338 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1339 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1340 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1341 smat_amxv_nc3(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1344 for (i=end3; i<end2; ++i) {
1353 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1354 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1355 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1356 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1357 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1358 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1361 for (i=end2; i<end1; ++i) {
1370 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1371 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1372 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1373 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1374 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1386 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1387 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1388 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1389 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1413static inline void str_spaAxpy_3D_nc5 (
const REAL alpha,
1419 INT idx,idx1,idx2,idx3;
1420 INT matidx, matidx1, matidx2, matidx3;
1421 INT end1, end2, end3;
1432 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1433 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1435 for (i=0; i<nband; ++i) {
1440 else if (A->
offsets[i] == 1) {
1443 else if (A->
offsets[i] == -nx) {
1446 else if (A->
offsets[i] == nx) {
1449 else if (A->
offsets[i] == -nxy) {
1452 else if (A->
offsets[i] == nxy) {
1456 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1457 str_spaAxpy(alpha, A, x, y);
1466 smat_amxv_nc5(alpha, diag, x, y);
1467 smat_amxv_nc5(alpha, offdiag1, x+nc, y);
1468 smat_amxv_nc5(alpha, offdiag3, x+nxnc, y);
1469 smat_amxv_nc5(alpha, offdiag5, x+nxync, y);
1471 for (i=1; i<nx; ++i) {
1476 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1477 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1478 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1479 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1480 smat_amxv_nc5(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1483 for (i=nx; i<nxy; ++i) {
1490 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1491 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1492 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1493 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1494 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1495 smat_amxv_nc5(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1499 for (i=nxy; i<end3; ++i) {
1508 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1509 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1510 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1511 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1512 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1513 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1514 smat_amxv_nc5(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1517 for (i=end3; i<end2; ++i) {
1526 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1527 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1528 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1529 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1530 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1531 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1534 for (i=end2; i<end1; ++i) {
1543 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1544 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1545 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1546 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1547 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1559 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1560 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1561 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1562 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1586static inline void str_spaAxpy_3D_blk (
const REAL alpha,
1592 INT idx,idx1,idx2,idx3;
1593 INT matidx, matidx1, matidx2, matidx3;
1594 INT end1, end2, end3;
1605 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1606 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1608 for (i=0; i<nband; ++i) {
1613 else if (A->
offsets[i] == 1) {
1616 else if (A->
offsets[i] == -nx) {
1619 else if (A->
offsets[i] == nx) {
1622 else if (A->
offsets[i] == -nxy) {
1625 else if (A->
offsets[i] == nxy) {
1629 printf(
"### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1630 str_spaAxpy(alpha, A, x, y);
1639 smat_amxv(alpha, diag, x, nc, y);
1640 smat_amxv(alpha, offdiag1, x+nc, nc, y);
1641 smat_amxv(alpha, offdiag3, x+nxnc, nc, y);
1642 smat_amxv(alpha, offdiag5, x+nxync, nc, y);
1644 for (i=1; i<nx; ++i) {
1649 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1650 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1651 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1652 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1653 smat_amxv(alpha, offdiag5+matidx, x+idx+nxync, nc, y+idx);
1656 for (i=nx; i<nxy; ++i) {
1663 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1664 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1665 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1666 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1667 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1668 smat_amxv(alpha, offdiag5+matidx, x+idx+nxync, nc, y+idx);
1672 for (i=nxy; i<end3; ++i) {
1681 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1682 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1683 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1684 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1685 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1686 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1687 smat_amxv(alpha, offdiag5+matidx, x+idx+nxync, nc, y+idx);
1690 for (i=end3; i<end2; ++i) {
1699 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1700 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1701 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1702 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1703 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1704 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1707 for (i=end2; i<end1; ++i) {
1716 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1717 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1718 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1719 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1720 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1732 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1733 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1734 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1735 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1755static inline void str_spaAxpy (
const REAL alpha,
1774 INT size = nc*ngrid;
1790 for (k = 0; k < size; ++k) {
1797 for (block = 0; block < ngrid; ++block) {
1798 start_data = nc2*block;
1799 start_vect = nc*block;
1800 blkcontr_str(start_data,start_vect,start_vect,nc,diag,x,y);
1804 for (band = 0; band < nband; band ++) {
1805 width = offsets[band];
1808 for (block = 0; block < ngrid+width; ++block) {
1809 start_data = nc2*block;
1810 start_vecx = nc*block;
1811 start_vecy = start_vecx - ncw;
1812 blkcontr_str(start_data,start_vecx,start_vecy,nc,offdiag[band],x,y);
1816 for (block = 0; block < ngrid-width; ++block) {
1817 start_data = nc2*block;
1818 start_vecy = nc*block;
1819 start_vecx = start_vecy + ncw;
1820 blkcontr_str(start_data,start_vecx,start_vecy,nc,offdiag[band],x,y);
1827 for (point = 0; point < ngrid; point ++) {
1828 y[point] += diag[point]*x[point];
1832 for (band = 0; band < nband; band ++) {
1833 width = offsets[band];
1835 for (point = 0; point < ngrid+width; point ++) {
1836 y[point-width] += offdiag[band][point]*x[point];
1840 for (point = 0; point < ngrid-width; point ++) {
1841 y[point] += offdiag[band][point]*x[point+width];
1847 printf(
"### WARNING: nc is illegal! %s\n", __FUNCTION__);
1852 for (k = 0; k < size; ++k) {
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
void fasp_dstr_alloc(const INT nx, const INT ny, const INT nz, const INT nxy, const INT ngrid, const INT nband, const INT nc, INT *offsets, dSTRmat *A)
Allocate STR sparse matrix memory space.
void fasp_blas_dstr_aAxpy(const REAL alpha, const dSTRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
INT fasp_blas_dstr_diagscale(const dSTRmat *A, dSTRmat *B)
B=D^{-1}A.
void fasp_blas_dstr_mxv(const dSTRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Main header file for the FASP project.
Structure matrix of REAL type.
INT * offsets
offsets of the off-diagonals (length is nband)
INT nx
number of grids in x direction
INT nxy
number of grids on x-y plane
INT ny
number of grids in y direction
INT nband
number of off-diag bands
REAL * diag
diagonal entries (length is ngrid*(nc^2))
INT nc
size of each block (number of components)
INT nz
number of grids in z direction
REAL ** offdiag
off-diagonal entries (dimension is nband * [(ngrid-|offsets|) * nc^2])