Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
Loading...
Searching...
No Matches
BlaSpmvSTR.c
Go to the documentation of this file.
1
15#include <math.h>
16
17#ifdef _OPENMP
18#include <omp.h>
19#endif
20
21#include "fasp.h"
22#include "fasp_functs.h"
23
24/*---------------------------------*/
25/*-- Declare Private Functions --*/
26/*---------------------------------*/
27
28static inline void smat_amxv_nc3(const REAL, const REAL *, const REAL *, REAL *);
29static inline void smat_amxv_nc5(const REAL, const REAL *, const REAL *, REAL *);
30static inline void smat_amxv(const REAL, const REAL *, const REAL *, const INT, REAL *);
31static inline void str_spaAxpy_2D_nc1(const REAL, const dSTRmat *, const REAL *, REAL *);
32static inline void str_spaAxpy_2D_nc3(const REAL, const dSTRmat *, const REAL *, REAL *);
33static inline void str_spaAxpy_2D_nc5(const REAL, const dSTRmat *, const REAL *, REAL *);
34static inline void str_spaAxpy_2D_blk(const REAL, const dSTRmat *, const REAL *, REAL *);
35static inline void str_spaAxpy_3D_nc1(const REAL, const dSTRmat *, const REAL *, REAL *);
36static inline void str_spaAxpy_3D_nc3(const REAL, const dSTRmat *, const REAL *, REAL *);
37static inline void str_spaAxpy_3D_nc5(const REAL, const dSTRmat *, const REAL *, REAL *);
38static inline void str_spaAxpy_3D_blk(const REAL, const dSTRmat *, const REAL *, REAL *);
39static inline void str_spaAxpy(const REAL, const dSTRmat *, const REAL *, REAL *);
40static inline void blkcontr_str(const INT, const INT, const INT, const INT,
41 const REAL *, const REAL *, REAL *);
42
43/*---------------------------------*/
44/*-- Public Functions --*/
45/*---------------------------------*/
46
61void fasp_blas_dstr_aAxpy (const REAL alpha,
62 const dSTRmat *A,
63 const REAL *x,
64 REAL *y)
65{
66
67 switch (A->nband) {
68
69 case 4:
70
71 switch (A->nc) {
72 case 1:
73 str_spaAxpy_2D_nc1(alpha, A, x, y);
74 break;
75
76 case 3:
77 str_spaAxpy_2D_nc3(alpha, A, x, y);
78 break;
79
80 case 5:
81 str_spaAxpy_2D_nc5(alpha, A, x, y);
82 break;
83
84 default:
85 str_spaAxpy_2D_blk(alpha, A, x, y);
86 break;
87 }
88
89 break;
90
91 case 6:
92
93 switch (A->nc) {
94 case 1:
95 str_spaAxpy_3D_nc1(alpha, A, x, y);
96 break;
97
98 case 3:
99 str_spaAxpy_3D_nc3(alpha, A, x, y);
100 break;
101
102 case 5:
103 str_spaAxpy_3D_nc5(alpha, A, x, y);
104 break;
105
106 default:
107 str_spaAxpy_3D_blk(alpha, A, x, y);
108 break;
109 }
110 break;
111
112 default:
113 str_spaAxpy(alpha, A, x, y);
114 break;
115 }
116
117}
118
132 const REAL *x,
133 REAL *y)
134{
135 int n = (A->ngrid)*(A->nc)*(A->nc);
136
137 memset(y, 0, n*sizeof(REAL));
138
139 fasp_blas_dstr_aAxpy(1.0, A, x, y);
140}
141
156 dSTRmat *B)
157{
158 const INT ngrid=A->ngrid, nc=A->nc, nband=A->nband;
159 const INT nc2=nc*nc, size=ngrid*nc2;
160 INT i,j,ic2,nb,nb1;
161
162#ifdef _OPENMP
163 //variables for OpenMP
164 INT myid, mybegin, myend;
165 INT nthreads = fasp_get_num_threads();
166#endif
167
168 REAL *diag=(REAL *)fasp_mem_calloc(size,sizeof(REAL));
169
170 fasp_darray_cp(size,A->diag,diag);
171
172 fasp_dstr_alloc(A->nx, A->ny, A->nz,A->nxy,ngrid, nband,nc,A->offsets, B);
173
174 //compute diagnal elements of B
175#ifdef _OPENMP
176 if (ngrid > OPENMP_HOLDS) {
177#pragma omp parallel for private(myid, mybegin, myend, i, ic2, j)
178 for (myid=0; myid<nthreads; myid++) {
179 fasp_get_start_end(myid, nthreads, ngrid, &mybegin, &myend);
180 for (i=mybegin; i<myend; i++) {
181 ic2=i*nc2;
182 for (j=0; j<nc2; j++) {
183 if (j/nc == j%nc) B->diag[ic2+j]=1;
184 else B->diag[ic2+j]=0;
185 }
186 }
187 }
188 }
189 else {
190#endif
191 for (i=0;i<ngrid;++i) {
192 ic2=i*nc2;
193 for (j=0;j<nc2;++j) {
194 if (j/nc == j%nc) B->diag[ic2+j]=1;
195 else B->diag[ic2+j]=0;
196 }
197 }
198#ifdef _OPENMP
199 }
200#endif
201
202 for (i=0;i<ngrid;++i) fasp_smat_inv(&(diag[i*nc2]),nc);
203
204 for (i=0;i<nband;++i) {
205 nb=A->offsets[i];
206 nb1=abs(nb);
207 if (nb<0) {
208 for (j=0;j<ngrid-nb1;++j)
209 fasp_blas_smat_mul(&(diag[(j+nb1)*nc2]),&(A->offdiag[i][j*nc2]),&(B->offdiag[i][j*nc2]),nc);
210 }
211 else {
212 for (j=0;j<ngrid-nb1;++j)
213 fasp_blas_smat_mul(&(diag[j*nc2]),&(A->offdiag[i][j*nc2]),&(B->offdiag[i][j*nc2]),nc);
214 }
215
216 }
217
218 fasp_mem_free(diag); diag = NULL;
219
220 return (0);
221}
222
223/*---------------------------------*/
224/*-- Private Functions --*/
225/*---------------------------------*/
226
241static inline void smat_amxv_nc3 (const REAL alpha,
242 const REAL *a,
243 const REAL *b,
244 REAL *c)
245{
246 c[0] += alpha*(a[0]*b[0] + a[1]*b[1] + a[2]*b[2]);
247 c[1] += alpha*(a[3]*b[0] + a[4]*b[1] + a[5]*b[2]);
248 c[2] += alpha*(a[6]*b[0] + a[7]*b[1] + a[8]*b[2]);
249}
250
265static inline void smat_amxv_nc5 (const REAL alpha,
266 const REAL *a,
267 const REAL *b,
268 REAL *c)
269{
270 c[0] += alpha*(a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3] * b[3] + a[4] * b[4]);
271 c[1] += alpha*(a[5]*b[0] + a[6]*b[1] + a[7]*b[2] + a[8] * b[3] + a[9] * b[4]);
272 c[2] += alpha*(a[10]*b[0] + a[11]*b[1] + a[12]*b[2] + a[13] * b[3] + a[14] * b[4]);
273 c[3] += alpha*(a[15]*b[0] + a[16]*b[1] + a[17]*b[2] + a[18] * b[3] + a[19] * b[4]);
274 c[4] += alpha*(a[20]*b[0] + a[21]*b[1] + a[22]*b[2] + a[23] * b[3] + a[24] * b[4]);
275}
276
294static inline void smat_amxv (const REAL alpha,
295 const REAL *a,
296 const REAL *b,
297 const INT n,
298 REAL *c)
299{
300 INT i,j;
301 INT in;
302
303#ifdef _OPENMP
304 // variables for OpenMP
305 INT myid, mybegin, myend;
306 INT nthreads = fasp_get_num_threads();
307#endif
308
309#ifdef _OPENMP
310 if (n > OPENMP_HOLDS) {
311#pragma omp parallel for private(myid, mybegin, myend, i, in, j)
312 for (myid=0; myid<nthreads; myid++) {
313 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
314 for (i=mybegin; i<myend; i++) {
315 in = i*n;
316 for (j=0; j<n; j++)
317 c[i] += alpha*a[in+j]*b[j];
318 }
319 }
320 }
321 else {
322#endif
323 for (i=0;i<n;++i) {
324 in = i*n;
325 for (j=0;j<n;++j)
326 c[i] += alpha*a[in+j]*b[j];
327 }
328#ifdef _OPENMP
329 }
330#endif
331 return;
332}
333
356static inline void blkcontr_str (const INT start_data,
357 const INT start_vecx,
358 const INT start_vecy,
359 const INT nc,
360 const REAL *data,
361 const REAL *x,
362 REAL *y)
363{
364 INT i,j,k,m;
365
366#ifdef _OPENMP
367 //variables for OpenMP
368 INT myid, mybegin, myend;
369 INT nthreads = fasp_get_num_threads();
370#endif
371
372#ifdef _OPENMP
373 if (nc > OPENMP_HOLDS) {
374#pragma omp parallel for private(myid, mybegin, myend, i, k, m, j)
375 for (myid = 0; myid < nthreads; myid++) {
376 fasp_get_start_end(myid, nthreads, nc, &mybegin, &myend);
377 for (i = mybegin; i < myend; i ++) {
378 k = start_data + i*nc;
379 m = start_vecy + i;
380 for (j = 0; j < nc; j ++) {
381 y[m] += data[k+j]*x[start_vecx+j];
382 }
383 }
384 }
385 }
386 else {
387#endif
388 for (i = 0; i < nc; i ++) {
389 k = start_data + i*nc;
390 m = start_vecy + i;
391 for (j = 0; j < nc; j ++) {
392 y[m] += data[k+j]*x[start_vecx+j];
393 }
394 }
395#ifdef _OPENMP
396 }
397#endif
398}
399
420static inline void str_spaAxpy_2D_nc1 (const REAL alpha,
421 const dSTRmat *A,
422 const REAL *x,
423 REAL *y)
424{
425 INT i;
426 INT idx1, idx2;
427 INT end1, end2;
428 INT nline;
429
430#ifdef _OPENMP
431 //variables for OpenMP
432 INT myid, mybegin, myend, idx;
433 INT nthreads = fasp_get_num_threads();
434#endif
435
436 // information of A
437 INT nx = A->nx;
438 INT ngrid = A->ngrid; // number of grids
439 INT nband = A->nband;
440
441 REAL *diag = A->diag;
442 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
443
444 if (nx == 1) {
445 nline = A->ny;
446 }
447 else {
448 nline = nx;
449 }
450
451 for (i=0; i<nband; ++i) {
452 if (A->offsets[i] == -1) {
453 offdiag0 = A->offdiag[i];
454 }
455 else if (A->offsets[i] == 1) {
456 offdiag1 = A->offdiag[i];
457 }
458 else if (A->offsets[i] == -nline) {
459 offdiag2 = A->offdiag[i];
460 }
461 else if (A->offsets[i] == nline) {
462 offdiag3 = A->offdiag[i];
463 }
464 else {
465 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
466 str_spaAxpy(alpha, A, x, y);
467 return;
468 }
469 }
470
471 end1 = ngrid-1;
472 end2 = ngrid-nline;
473
474 y[0] += alpha*(diag[0]*x[0] + offdiag1[0]*x[1] + offdiag3[0]*x[nline]);
475
476#ifdef _OPENMP
477 if (nline-1 > OPENMP_HOLDS) {
478#pragma omp parallel for private(myid, mybegin, myend, i, idx1, idx)
479 for (myid=0; myid<nthreads; myid++) {
480 fasp_get_start_end(myid, nthreads, nline-1, &mybegin, &myend);
481 for (i=mybegin; i<myend; i++) {
482 idx1 = i;
483 idx = i+1;
484 y[idx] += alpha*(offdiag0[idx1]*x[idx1] + diag[idx]*x[idx] +
485 offdiag1[idx]*x[idx+1] + offdiag3[idx]*x[idx+nline]);
486 }
487 }
488 }
489 else {
490#endif
491 for (i=1; i<nline; ++i) {
492 idx1 = i-1;
493 y[i] += alpha*(offdiag0[idx1]*x[idx1] + diag[i]*x[i] +
494 offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nline]);
495 }
496#ifdef _OPENMP
497 }
498#endif
499
500#ifdef _OPENMP
501 if (end2-nline > OPENMP_HOLDS) {
502#pragma omp parallel for private(myid, i, mybegin, myend, idx1, idx2, idx)
503 for (myid=0; myid<nthreads; myid++) {
504 fasp_get_start_end(myid, nthreads, end2-nline, &mybegin, &myend);
505 for (i=mybegin; i<myend; ++i) {
506 idx = i+nline;
507 idx1 = idx-1; //idx1 = i-1+nline;
508 idx2 = i;
509 y[idx] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
510 diag[idx]*x[idx] + offdiag1[idx]*x[idx+1] +
511 offdiag3[idx]*x[idx+nline]);
512 }
513 }
514 }
515 else {
516#endif
517 for (i=nline; i<end2; ++i) {
518 idx1 = i-1;
519 idx2 = i-nline;
520 y[i] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
521 diag[i]*x[i] + offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nline]);
522 }
523#ifdef _OPENMP
524 }
525#endif
526
527#ifdef _OPENMP
528 if (end1-end2 > OPENMP_HOLDS) {
529#pragma omp parallel for private(myid, i, mybegin, myend, idx1, idx2, idx)
530 for (myid=0; myid<nthreads; myid++) {
531 fasp_get_start_end(myid, nthreads, end1-end2, &mybegin, &myend);
532 for (i=mybegin; i<myend; ++i) {
533 idx = i+end2;
534 idx1 = idx-1; //idx1 = i-1+end2;
535 idx2 = idx-nline; //idx2 = i-nline+end2;
536 y[idx] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
537 diag[idx]*x[idx] + offdiag1[idx]*x[idx+1]);
538 }
539 }
540 }
541 else {
542#endif
543 for (i=end2; i<end1; ++i) {
544 idx1 = i-1;
545 idx2 = i-nline;
546 y[i] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] +
547 diag[i]*x[i] + offdiag1[i]*x[i+1]);
548 }
549#ifdef _OPENMP
550 }
551#endif
552
553 idx1 = end1-1;
554 idx2 = end1-nline;
555 y[end1] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1] + diag[end1]*x[end1]);
556
557 return;
558
559}
560
581static inline void str_spaAxpy_2D_nc3 (const REAL alpha,
582 const dSTRmat *A,
583 const REAL *x,
584 REAL *y)
585{
586 INT i;
587 INT idx,idx1,idx2;
588 INT matidx, matidx1, matidx2;
589 INT end1, end2;
590 INT nline, nlinenc;
591
592 // information of A
593 INT nx = A->nx;
594 INT ngrid = A->ngrid; // number of grids
595 INT nc = A->nc;
596 INT nband = A->nband;
597
598#ifdef _OPENMP
599 // variables for OpenMP
600 INT myid, mybegin, myend, up;
601 INT nthreads = fasp_get_num_threads();
602#endif
603
604 REAL *diag = A->diag;
605 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
606
607 if (nx == 1) {
608 nline = A->ny;
609 }
610 else {
611 nline = nx;
612 }
613 nlinenc = nline*nc;
614
615 for (i=0; i<nband; ++i) {
616
617 if (A->offsets[i] == -1) {
618 offdiag0 = A->offdiag[i];
619 }
620 else if (A->offsets[i] == 1) {
621 offdiag1 = A->offdiag[i];
622 }
623 else if (A->offsets[i] == -nline) {
624 offdiag2 = A->offdiag[i];
625 }
626 else if (A->offsets[i] == nline) {
627 offdiag3 = A->offdiag[i];
628 }
629 else {
630 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
631 str_spaAxpy(alpha, A, x, y);
632 return;
633 }
634
635 }
636
637 end1 = ngrid-1;
638 end2 = ngrid-nline;
639
640 smat_amxv_nc3(alpha, diag, x, y);
641 smat_amxv_nc3(alpha, offdiag1, x+nc, y);
642 smat_amxv_nc3(alpha, offdiag3, x+nlinenc, y);
643
644#ifdef _OPENMP
645 up = nline - 1;
646 if (up > OPENMP_HOLDS) {
647#pragma omp parallel for private(myid, mybegin, myend, i, idx, matidx, idx1, matidx1)
648 for (myid=0; myid<nthreads; myid++) {
649 fasp_get_start_end(myid, nthreads, up, &mybegin, &myend);
650 for (i=mybegin; i<myend; i++) {
651 idx = (i+1)*nc;
652 matidx = idx*nc;
653 idx1 = i*nc;
654 matidx1 = idx1*nc;
655 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
656 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
657 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
658 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
659 }
660 }
661 }
662 else {
663#endif
664 for (i=1; i<nline; ++i) {
665 idx = i*nc;
666 matidx = idx*nc;
667 idx1 = idx - nc;
668 matidx1 = idx1*nc;
669 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
670 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
671 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
672 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
673 }
674#ifdef _OPENMP
675 }
676#endif
677
678#ifdef _OPENMP
679 up = end2 - nx;
680 if (up > OPENMP_HOLDS) {
681#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
682 for (myid=0; myid<nthreads; myid++) {
683 fasp_get_start_end(myid, nthreads, up, &mybegin, &myend);
684 for (i=mybegin; i<myend; i++) {
685 idx = (i+nx)*nc;
686 idx1 = idx-nc;
687 idx2 = idx-nlinenc;
688 matidx = idx*nc;
689 matidx1 = idx1*nc;
690 matidx2 = idx2*nc;
691 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
692 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
693 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
694 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
695 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
696 }
697 }
698 }
699 else {
700#endif
701 for (i=nx; i<end2; ++i) {
702 idx = i*nc;
703 idx1 = idx-nc;
704 idx2 = idx-nlinenc;
705 matidx = idx*nc;
706 matidx1 = idx1*nc;
707 matidx2 = idx2*nc;
708 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
709 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
710 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
711 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
712 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
713 }
714#ifdef _OPENMP
715 }
716#endif
717
718#ifdef _OPENMP
719 up = end1 - end2;
720 if (up > OPENMP_HOLDS) {
721#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
722 for (myid=0; myid<nthreads; myid++) {
723 fasp_get_start_end(myid, nthreads, up, &mybegin, &myend);
724 for (i=mybegin; i<myend; i++) {
725 idx = (i+end2)*nc;
726 idx1 = idx-nc;
727 idx2 = idx-nlinenc;
728 matidx = idx*nc;
729 matidx1 = idx1*nc;
730 matidx2 = idx2*nc;
731 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
732 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
733 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
734 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
735 }
736 }
737 }
738 else {
739#endif
740 for (i=end2; i<end1; ++i) {
741 idx = i*nc;
742 idx1 = idx-nc;
743 idx2 = idx-nlinenc;
744 matidx = idx*nc;
745 matidx1 = idx1*nc;
746 matidx2 = idx2*nc;
747 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
748 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
749 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
750 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
751 }
752#ifdef _OPENMP
753 }
754#endif
755 i=end1;
756 idx = i*nc;
757 idx1 = idx-nc;
758 idx2 = idx-nlinenc;
759 matidx = idx*nc;
760 matidx1 = idx1*nc;
761 matidx2 = idx2*nc;
762 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
763 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
764 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
765
766 return;
767}
768
787static inline void str_spaAxpy_2D_nc5 (const REAL alpha,
788 const dSTRmat *A,
789 const REAL *x,
790 REAL *y)
791{
792 INT i;
793 INT idx,idx1,idx2;
794 INT matidx, matidx1, matidx2;
795 INT end1, end2;
796 INT nline, nlinenc;
797
798 // information of A
799 INT nx = A->nx;
800 INT ngrid = A->ngrid; // number of grids
801 INT nc = A->nc;
802 INT nband = A->nband;
803
804#ifdef _OPENMP
805 // variables for OpenMP
806 INT myid, mybegin, myend, up;
807 INT nthreads = fasp_get_num_threads();
808#endif
809
810 REAL *diag = A->diag;
811 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
812
813 if (nx == 1) {
814 nline = A->ny;
815 }
816 else {
817 nline = nx;
818 }
819 nlinenc = nline*nc;
820
821 for (i=0; i<nband; ++i) {
822
823 if (A->offsets[i] == -1) {
824 offdiag0 = A->offdiag[i];
825 }
826 else if (A->offsets[i] == 1) {
827 offdiag1 = A->offdiag[i];
828 }
829 else if (A->offsets[i] == -nline) {
830 offdiag2 = A->offdiag[i];
831 }
832 else if (A->offsets[i] == nline) {
833 offdiag3 = A->offdiag[i];
834 }
835 else {
836 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
837 str_spaAxpy(alpha, A, x, y);
838 return;
839 }
840 }
841
842 end1 = ngrid-1;
843 end2 = ngrid-nline;
844
845 smat_amxv_nc5(alpha, diag, x, y);
846 smat_amxv_nc5(alpha, offdiag1, x+nc, y);
847 smat_amxv_nc5(alpha, offdiag3, x+nlinenc, y);
848
849#ifdef _OPENMP
850 up = nline - 1;
851 if (up > OPENMP_HOLDS) {
852#pragma omp parallel for private(myid, mybegin, myend, i, idx, matidx, idx1, matidx1)
853 for (myid=0; myid<nthreads; myid++) {
854 fasp_get_start_end(myid, nthreads, up, &mybegin, &myend);
855 for (i=mybegin; i<myend; i++) {
856 idx = (i+1)*nc;
857 matidx = idx*nc;
858 idx1 = i*nc; // idx1 = idx - nc;
859 matidx1 = idx1*nc;
860 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
861 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
862 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
863 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
864 }
865 }
866 }
867 else {
868#endif
869 for (i=1; i<nline; ++i) {
870 idx = i*nc;
871 matidx = idx*nc;
872 idx1 = idx - nc;
873 matidx1 = idx1*nc;
874 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
875 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
876 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
877 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
878 }
879#ifdef _OPENMP
880 }
881#endif
882
883#ifdef _OPENMP
884 up = end2 - nx;
885 if (up > OPENMP_HOLDS) {
886#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
887 for (myid=0; myid<nthreads; myid++) {
888 fasp_get_start_end(myid, nthreads, up, &mybegin, &myend);
889 for (i=mybegin; i<myend; i++) {
890 idx = (i+nx)*nc;
891 idx1 = idx-nc;
892 idx2 = idx-nlinenc;
893 matidx = idx*nc;
894 matidx1 = idx1*nc;
895 matidx2 = idx2*nc;
896 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
897 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
898 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
899 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
900 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
901 }
902 }
903 }
904 else {
905#endif
906 for (i=nx; i<end2; ++i) {
907 idx = i*nc;
908 idx1 = idx-nc;
909 idx2 = idx-nlinenc;
910 matidx = idx*nc;
911 matidx1 = idx1*nc;
912 matidx2 = idx2*nc;
913 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
914 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
915 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
916 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
917 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nlinenc, y+idx);
918 }
919#ifdef _OPENMP
920 }
921#endif
922
923#ifdef _OPENMP
924 up = end1 - end2;
925 if (up > OPENMP_HOLDS) {
926#pragma omp parallel for private(myid, mybegin, myend, idx, idx1, idx2, matidx, matidx1, matidx2)
927 for (myid=0; myid<nthreads; myid++) {
928 fasp_get_start_end(myid, nthreads, up, &mybegin, &myend);
929 for (i=mybegin; i<myend; i++) {
930 idx = (i+end2)*nc;
931 idx1 = idx-nc;
932 idx2 = idx-nlinenc;
933 matidx = idx*nc;
934 matidx1 = idx1*nc;
935 matidx2 = idx2*nc;
936 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
937 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
938 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
939 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
940 }
941 }
942 }
943 else {
944#endif
945 for (i=end2; i<end1; ++i) {
946 idx = i*nc;
947 idx1 = idx-nc;
948 idx2 = idx-nlinenc;
949 matidx = idx*nc;
950 matidx1 = idx1*nc;
951 matidx2 = idx2*nc;
952 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
953 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
954 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
955 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
956 }
957#ifdef _OPENMP
958 }
959#endif
960
961 i=end1;
962 idx = i*nc;
963 idx1 = idx-nc;
964 idx2 = idx-nlinenc;
965 matidx = idx*nc;
966 matidx1 = idx1*nc;
967 matidx2 = idx2*nc;
968 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
969 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
970 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
971
972 return;
973
974}
975
994static inline void str_spaAxpy_2D_blk (const REAL alpha,
995 const dSTRmat *A,
996 const REAL *x,
997 REAL *y)
998{
999 INT i;
1000 INT idx,idx1,idx2;
1001 INT matidx, matidx1, matidx2;
1002 INT end1, end2;
1003 INT nline, nlinenc;
1004
1005 // information of A
1006 INT nx = A->nx;
1007 INT ngrid = A->ngrid; // number of grids
1008 INT nc = A->nc;
1009 INT nband = A->nband;
1010
1011 REAL *diag = A->diag;
1012 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL, *offdiag3=NULL;
1013
1014 if (nx == 1) {
1015 nline = A->ny;
1016 }
1017 else {
1018 nline = nx;
1019 }
1020 nlinenc = nline*nc;
1021
1022 for (i=0; i<nband; ++i) {
1023
1024 if (A->offsets[i] == -1) {
1025 offdiag0 = A->offdiag[i];
1026 }
1027 else if (A->offsets[i] == 1) {
1028 offdiag1 = A->offdiag[i];
1029 }
1030 else if (A->offsets[i] == -nline) {
1031 offdiag2 = A->offdiag[i];
1032 }
1033 else if (A->offsets[i] == nline) {
1034 offdiag3 = A->offdiag[i];
1035 }
1036 else {
1037 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1038 str_spaAxpy(alpha, A, x, y);
1039 return;
1040 }
1041
1042 }
1043
1044 end1 = ngrid-1;
1045 end2 = ngrid-nline;
1046
1047 smat_amxv(alpha, diag, x, nc, y);
1048 smat_amxv(alpha, offdiag1, x+nc, nc, y);
1049 smat_amxv(alpha, offdiag3, x+nlinenc, nc, y);
1050
1051 for (i=1; i<nline; ++i) {
1052 idx = i*nc;
1053 matidx = idx*nc;
1054 idx1 = idx - nc;
1055 matidx1 = idx1*nc;
1056 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1057 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1058 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1059 smat_amxv(alpha, offdiag3+matidx, x+idx+nlinenc, nc, y+idx);
1060 }
1061
1062 for (i=nx; i<end2; ++i) {
1063 idx = i*nc;
1064 idx1 = idx-nc;
1065 idx2 = idx-nlinenc;
1066 matidx = idx*nc;
1067 matidx1 = idx1*nc;
1068 matidx2 = idx2*nc;
1069 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1070 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1071 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1072 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1073 smat_amxv(alpha, offdiag3+matidx, x+idx+nlinenc, nc, y+idx);
1074 }
1075
1076 for (i=end2; i<end1; ++i) {
1077 idx = i*nc;
1078 idx1 = idx-nc;
1079 idx2 = idx-nlinenc;
1080 matidx = idx*nc;
1081 matidx1 = idx1*nc;
1082 matidx2 = idx2*nc;
1083 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1084 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1085 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1086 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1087 }
1088
1089 i=end1;
1090 idx = i*nc;
1091 idx1 = idx-nc;
1092 idx2 = idx-nlinenc;
1093 matidx = idx*nc;
1094 matidx1 = idx1*nc;
1095 matidx2 = idx2*nc;
1096 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1097 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1098 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1099
1100 return;
1101}
1102
1121static inline void str_spaAxpy_3D_nc1 (const REAL alpha,
1122 const dSTRmat *A,
1123 const REAL *x,
1124 REAL *y)
1125{
1126 INT i;
1127 INT idx1,idx2,idx3;
1128 INT end1, end2, end3;
1129 // information of A
1130 INT nx = A->nx;
1131 INT nxy = A->nxy;
1132 INT ngrid = A->ngrid; // number of grids
1133 INT nband = A->nband;
1134
1135 REAL *diag = A->diag;
1136 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1137 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1138
1139 for (i=0; i<nband; ++i) {
1140
1141 if (A->offsets[i] == -1) {
1142 offdiag0 = A->offdiag[i];
1143 }
1144 else if (A->offsets[i] == 1) {
1145 offdiag1 = A->offdiag[i];
1146 }
1147 else if (A->offsets[i] == -nx) {
1148 offdiag2 = A->offdiag[i];
1149 }
1150 else if (A->offsets[i] == nx) {
1151 offdiag3 = A->offdiag[i];
1152 }
1153 else if (A->offsets[i] == -nxy) {
1154 offdiag4 = A->offdiag[i];
1155 }
1156 else if (A->offsets[i] == nxy) {
1157 offdiag5 = A->offdiag[i];
1158 }
1159 else {
1160 printf("### WARNING: offsets for 3D scalar is illegal! %s\n", __FUNCTION__);
1161 str_spaAxpy(alpha, A, x, y);
1162 return;
1163 }
1164 }
1165
1166 end1 = ngrid-1;
1167 end2 = ngrid-nx;
1168 end3 = ngrid-nxy;
1169
1170 y[0] += alpha*(diag[0]*x[0] + offdiag1[0]*x[1] + offdiag3[0]*x[nx] + offdiag5[0]*x[nxy]);
1171
1172 for (i=1; i<nx; ++i) {
1173 idx1 = i-1;
1174 y[i] += alpha*(offdiag0[idx1]*x[idx1] + diag[i]*x[i] + offdiag1[i]*x[i+1] +
1175 offdiag3[i]*x[i+nx] + offdiag5[i]*x[i+nxy]);
1176 }
1177
1178 for (i=nx; i<nxy; ++i) {
1179 idx1 = i-1;
1180 idx2 = i-nx;
1181 y[i] += alpha*(offdiag2[idx2]*x[idx2] + offdiag0[idx1]*x[idx1]
1182 + diag[i]*x[i] + offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nx]
1183 + offdiag5[i]*x[i+nxy]);
1184 }
1185
1186 for (i=nxy; i<end3; ++i) {
1187 idx1 = i-1;
1188 idx2 = i-nx;
1189 idx3 = i-nxy;
1190 y[i] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2]
1191 + offdiag0[idx1]*x[idx1] + diag[i]*x[i] + offdiag1[i]*x[i+1]
1192 + offdiag3[i]*x[i+nx] + offdiag5[i]*x[i+nxy]);
1193 }
1194
1195 for (i=end3; i<end2; ++i) {
1196 idx1 = i-1;
1197 idx2 = i-nx;
1198 idx3 = i-nxy;
1199 y[i] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2]
1200 + offdiag0[idx1]*x[idx1] + diag[i]*x[i]
1201 + offdiag1[i]*x[i+1] + offdiag3[i]*x[i+nx]);
1202 }
1203
1204 for (i=end2; i<end1; ++i) {
1205 idx1 = i-1;
1206 idx2 = i-nx;
1207 idx3 = i-nxy;
1208 y[i] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2]
1209 + offdiag0[idx1]*x[idx1] + diag[i]*x[i]
1210 + offdiag1[i]*x[i+1]);
1211 }
1212
1213 idx1 = end1-1;
1214 idx2 = end1-nx;
1215 idx3 = end1-nxy;
1216 y[end1] += alpha*(offdiag4[idx3]*x[idx3] + offdiag2[idx2]*x[idx2] +
1217 offdiag0[idx1]*x[idx1] + diag[end1]*x[end1]);
1218
1219 return;
1220}
1221
1240static inline void str_spaAxpy_3D_nc3 (const REAL alpha,
1241 const dSTRmat *A,
1242 const REAL *x,
1243 REAL *y)
1244{
1245 INT i;
1246 INT idx,idx1,idx2,idx3;
1247 INT matidx, matidx1, matidx2, matidx3;
1248 INT end1, end2, end3;
1249 // information of A
1250 INT nx = A->nx;
1251 INT nxy = A->nxy;
1252 INT ngrid = A->ngrid; // number of grids
1253 INT nc = A->nc;
1254 INT nxnc = nx*nc;
1255 INT nxync = nxy*nc;
1256 INT nband = A->nband;
1257
1258 REAL *diag = A->diag;
1259 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1260 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1261
1262 for (i=0; i<nband; ++i) {
1263
1264 if (A->offsets[i] == -1) {
1265 offdiag0 = A->offdiag[i];
1266 }
1267 else if (A->offsets[i] == 1) {
1268 offdiag1 = A->offdiag[i];
1269 }
1270 else if (A->offsets[i] == -nx) {
1271 offdiag2 = A->offdiag[i];
1272 }
1273 else if (A->offsets[i] == nx) {
1274 offdiag3 = A->offdiag[i];
1275 }
1276 else if (A->offsets[i] == -nxy) {
1277 offdiag4 = A->offdiag[i];
1278 }
1279 else if (A->offsets[i] == nxy) {
1280 offdiag5 = A->offdiag[i];
1281 }
1282 else {
1283 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1284 str_spaAxpy(alpha, A, x, y);
1285 return;
1286 }
1287 }
1288
1289 end1 = ngrid-1;
1290 end2 = ngrid-nx;
1291 end3 = ngrid-nxy;
1292
1293 smat_amxv_nc3(alpha, diag, x, y);
1294 smat_amxv_nc3(alpha, offdiag1, x+nc, y);
1295 smat_amxv_nc3(alpha, offdiag3, x+nxnc, y);
1296 smat_amxv_nc3(alpha, offdiag5, x+nxync, y);
1297
1298 for (i=1; i<nx; ++i) {
1299 idx = i*nc;
1300 matidx = idx*nc;
1301 idx1 = idx - nc;
1302 matidx1 = idx1*nc;
1303 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1304 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1305 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1306 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1307 smat_amxv_nc3(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1308 }
1309
1310 for (i=nx; i<nxy; ++i) {
1311 idx = i*nc;
1312 idx1 = idx-nc;
1313 idx2 = idx-nxnc;
1314 matidx = idx*nc;
1315 matidx1 = idx1*nc;
1316 matidx2 = idx2*nc;
1317 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1318 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1319 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1320 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1321 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1322 smat_amxv_nc3(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1323
1324 }
1325
1326 for (i=nxy; i<end3; ++i) {
1327 idx = i*nc;
1328 idx1 = idx-nc;
1329 idx2 = idx-nxnc;
1330 idx3 = idx-nxync;
1331 matidx = idx*nc;
1332 matidx1 = idx1*nc;
1333 matidx2 = idx2*nc;
1334 matidx3 = idx3*nc;
1335 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1336 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1337 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1338 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1339 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1340 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1341 smat_amxv_nc3(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1342 }
1343
1344 for (i=end3; i<end2; ++i) {
1345 idx = i*nc;
1346 idx1 = idx-nc;
1347 idx2 = idx-nxnc;
1348 idx3 = idx-nxync;
1349 matidx = idx*nc;
1350 matidx1 = idx1*nc;
1351 matidx2 = idx2*nc;
1352 matidx3 = idx3*nc;
1353 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1354 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1355 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1356 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1357 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1358 smat_amxv_nc3(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1359 }
1360
1361 for (i=end2; i<end1; ++i) {
1362 idx = i*nc;
1363 idx1 = idx-nc;
1364 idx2 = idx-nxnc;
1365 idx3 = idx-nxync;
1366 matidx = idx*nc;
1367 matidx1 = idx1*nc;
1368 matidx2 = idx2*nc;
1369 matidx3 = idx3*nc;
1370 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1371 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1372 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1373 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1374 smat_amxv_nc3(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1375 }
1376
1377 i=end1;
1378 idx = i*nc;
1379 idx1 = idx-nc;
1380 idx2 = idx-nxnc;
1381 idx3 = idx-nxync;
1382 matidx = idx*nc;
1383 matidx1 = idx1*nc;
1384 matidx2 = idx2*nc;
1385 matidx3 = idx3*nc;
1386 smat_amxv_nc3(alpha, offdiag4+matidx3, x+idx3, y+idx);
1387 smat_amxv_nc3(alpha, offdiag2+matidx2, x+idx2, y+idx);
1388 smat_amxv_nc3(alpha, offdiag0+matidx1, x+idx1, y+idx);
1389 smat_amxv_nc3(alpha, diag+matidx, x+idx, y+idx);
1390
1391 return;
1392
1393}
1394
1413static inline void str_spaAxpy_3D_nc5 (const REAL alpha,
1414 const dSTRmat *A,
1415 const REAL *x,
1416 REAL *y)
1417{
1418 INT i;
1419 INT idx,idx1,idx2,idx3;
1420 INT matidx, matidx1, matidx2, matidx3;
1421 INT end1, end2, end3;
1422 // information of A
1423 INT nx = A->nx;
1424 INT nxy = A->nxy;
1425 INT ngrid = A->ngrid; // number of grids
1426 INT nc = A->nc;
1427 INT nxnc = nx*nc;
1428 INT nxync = nxy*nc;
1429 INT nband = A->nband;
1430
1431 REAL *diag = A->diag;
1432 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1433 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1434
1435 for (i=0; i<nband; ++i) {
1436
1437 if (A->offsets[i] == -1) {
1438 offdiag0 = A->offdiag[i];
1439 }
1440 else if (A->offsets[i] == 1) {
1441 offdiag1 = A->offdiag[i];
1442 }
1443 else if (A->offsets[i] == -nx) {
1444 offdiag2 = A->offdiag[i];
1445 }
1446 else if (A->offsets[i] == nx) {
1447 offdiag3 = A->offdiag[i];
1448 }
1449 else if (A->offsets[i] == -nxy) {
1450 offdiag4 = A->offdiag[i];
1451 }
1452 else if (A->offsets[i] == nxy) {
1453 offdiag5 = A->offdiag[i];
1454 }
1455 else {
1456 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1457 str_spaAxpy(alpha, A, x, y);
1458 return;
1459 }
1460 }
1461
1462 end1 = ngrid-1;
1463 end2 = ngrid-nx;
1464 end3 = ngrid-nxy;
1465
1466 smat_amxv_nc5(alpha, diag, x, y);
1467 smat_amxv_nc5(alpha, offdiag1, x+nc, y);
1468 smat_amxv_nc5(alpha, offdiag3, x+nxnc, y);
1469 smat_amxv_nc5(alpha, offdiag5, x+nxync, y);
1470
1471 for (i=1; i<nx; ++i) {
1472 idx = i*nc;
1473 matidx = idx*nc;
1474 idx1 = idx - nc;
1475 matidx1 = idx1*nc;
1476 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1477 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1478 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1479 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1480 smat_amxv_nc5(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1481 }
1482
1483 for (i=nx; i<nxy; ++i) {
1484 idx = i*nc;
1485 idx1 = idx-nc;
1486 idx2 = idx-nxnc;
1487 matidx = idx*nc;
1488 matidx1 = idx1*nc;
1489 matidx2 = idx2*nc;
1490 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1491 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1492 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1493 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1494 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1495 smat_amxv_nc5(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1496
1497 }
1498
1499 for (i=nxy; i<end3; ++i) {
1500 idx = i*nc;
1501 idx1 = idx-nc;
1502 idx2 = idx-nxnc;
1503 idx3 = idx-nxync;
1504 matidx = idx*nc;
1505 matidx1 = idx1*nc;
1506 matidx2 = idx2*nc;
1507 matidx3 = idx3*nc;
1508 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1509 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1510 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1511 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1512 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1513 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1514 smat_amxv_nc5(alpha, offdiag5+matidx, x+idx+nxync, y+idx);
1515 }
1516
1517 for (i=end3; i<end2; ++i) {
1518 idx = i*nc;
1519 idx1 = idx-nc;
1520 idx2 = idx-nxnc;
1521 idx3 = idx-nxync;
1522 matidx = idx*nc;
1523 matidx1 = idx1*nc;
1524 matidx2 = idx2*nc;
1525 matidx3 = idx3*nc;
1526 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1527 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1528 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1529 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1530 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1531 smat_amxv_nc5(alpha, offdiag3+matidx, x+idx+nxnc, y+idx);
1532 }
1533
1534 for (i=end2; i<end1; ++i) {
1535 idx = i*nc;
1536 idx1 = idx-nc;
1537 idx2 = idx-nxnc;
1538 idx3 = idx-nxync;
1539 matidx = idx*nc;
1540 matidx1 = idx1*nc;
1541 matidx2 = idx2*nc;
1542 matidx3 = idx3*nc;
1543 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1544 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1545 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1546 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1547 smat_amxv_nc5(alpha, offdiag1+matidx, x+idx+nc, y+idx);
1548 }
1549
1550 i=end1;
1551 idx = i*nc;
1552 idx1 = idx-nc;
1553 idx2 = idx-nxnc;
1554 idx3 = idx-nxync;
1555 matidx = idx*nc;
1556 matidx1 = idx1*nc;
1557 matidx2 = idx2*nc;
1558 matidx3 = idx3*nc;
1559 smat_amxv_nc5(alpha, offdiag4+matidx3, x+idx3, y+idx);
1560 smat_amxv_nc5(alpha, offdiag2+matidx2, x+idx2, y+idx);
1561 smat_amxv_nc5(alpha, offdiag0+matidx1, x+idx1, y+idx);
1562 smat_amxv_nc5(alpha, diag+matidx, x+idx, y+idx);
1563
1564 return;
1565
1566}
1567
1586static inline void str_spaAxpy_3D_blk (const REAL alpha,
1587 const dSTRmat *A,
1588 const REAL *x,
1589 REAL *y)
1590{
1591 INT i;
1592 INT idx,idx1,idx2,idx3;
1593 INT matidx, matidx1, matidx2, matidx3;
1594 INT end1, end2, end3;
1595 // information of A
1596 INT nx = A->nx;
1597 INT nxy = A->nxy;
1598 INT ngrid = A->ngrid; // number of grids
1599 INT nc = A->nc;
1600 INT nxnc = nx*nc;
1601 INT nxync = nxy*nc;
1602 INT nband = A->nband;
1603
1604 REAL *diag = A->diag;
1605 REAL *offdiag0=NULL, *offdiag1=NULL, *offdiag2=NULL,
1606 *offdiag3=NULL, *offdiag4=NULL, *offdiag5=NULL;
1607
1608 for (i=0; i<nband; ++i) {
1609
1610 if (A->offsets[i] == -1) {
1611 offdiag0 = A->offdiag[i];
1612 }
1613 else if (A->offsets[i] == 1) {
1614 offdiag1 = A->offdiag[i];
1615 }
1616 else if (A->offsets[i] == -nx) {
1617 offdiag2 = A->offdiag[i];
1618 }
1619 else if (A->offsets[i] == nx) {
1620 offdiag3 = A->offdiag[i];
1621 }
1622 else if (A->offsets[i] == -nxy) {
1623 offdiag4 = A->offdiag[i];
1624 }
1625 else if (A->offsets[i] == nxy) {
1626 offdiag5 = A->offdiag[i];
1627 }
1628 else {
1629 printf("### WARNING: offsets for 2D scalar is illegal! %s\n", __FUNCTION__);
1630 str_spaAxpy(alpha, A, x, y);
1631 return;
1632 }
1633 }
1634
1635 end1 = ngrid-1;
1636 end2 = ngrid-nx;
1637 end3 = ngrid-nxy;
1638
1639 smat_amxv(alpha, diag, x, nc, y);
1640 smat_amxv(alpha, offdiag1, x+nc, nc, y);
1641 smat_amxv(alpha, offdiag3, x+nxnc, nc, y);
1642 smat_amxv(alpha, offdiag5, x+nxync, nc, y);
1643
1644 for (i=1; i<nx; ++i) {
1645 idx = i*nc;
1646 matidx = idx*nc;
1647 idx1 = idx - nc;
1648 matidx1 = idx1*nc;
1649 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1650 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1651 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1652 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1653 smat_amxv(alpha, offdiag5+matidx, x+idx+nxync, nc, y+idx);
1654 }
1655
1656 for (i=nx; i<nxy; ++i) {
1657 idx = i*nc;
1658 idx1 = idx-nc;
1659 idx2 = idx-nxnc;
1660 matidx = idx*nc;
1661 matidx1 = idx1*nc;
1662 matidx2 = idx2*nc;
1663 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1664 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1665 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1666 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1667 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1668 smat_amxv(alpha, offdiag5+matidx, x+idx+nxync, nc, y+idx);
1669
1670 }
1671
1672 for (i=nxy; i<end3; ++i) {
1673 idx = i*nc;
1674 idx1 = idx-nc;
1675 idx2 = idx-nxnc;
1676 idx3 = idx-nxync;
1677 matidx = idx*nc;
1678 matidx1 = idx1*nc;
1679 matidx2 = idx2*nc;
1680 matidx3 = idx3*nc;
1681 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1682 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1683 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1684 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1685 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1686 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1687 smat_amxv(alpha, offdiag5+matidx, x+idx+nxync, nc, y+idx);
1688 }
1689
1690 for (i=end3; i<end2; ++i) {
1691 idx = i*nc;
1692 idx1 = idx-nc;
1693 idx2 = idx-nxnc;
1694 idx3 = idx-nxync;
1695 matidx = idx*nc;
1696 matidx1 = idx1*nc;
1697 matidx2 = idx2*nc;
1698 matidx3 = idx3*nc;
1699 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1700 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1701 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1702 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1703 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1704 smat_amxv(alpha, offdiag3+matidx, x+idx+nxnc, nc, y+idx);
1705 }
1706
1707 for (i=end2; i<end1; ++i) {
1708 idx = i*nc;
1709 idx1 = idx-nc;
1710 idx2 = idx-nxnc;
1711 idx3 = idx-nxync;
1712 matidx = idx*nc;
1713 matidx1 = idx1*nc;
1714 matidx2 = idx2*nc;
1715 matidx3 = idx3*nc;
1716 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1717 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1718 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1719 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1720 smat_amxv(alpha, offdiag1+matidx, x+idx+nc, nc, y+idx);
1721 }
1722
1723 i=end1;
1724 idx = i*nc;
1725 idx1 = idx-nc;
1726 idx2 = idx-nxnc;
1727 idx3 = idx-nxync;
1728 matidx = idx*nc;
1729 matidx1 = idx1*nc;
1730 matidx2 = idx2*nc;
1731 matidx3 = idx3*nc;
1732 smat_amxv(alpha, offdiag4+matidx3, x+idx3, nc, y+idx);
1733 smat_amxv(alpha, offdiag2+matidx2, x+idx2, nc, y+idx);
1734 smat_amxv(alpha, offdiag0+matidx1, x+idx1, nc, y+idx);
1735 smat_amxv(alpha, diag+matidx, x+idx, nc, y+idx);
1736
1737 return;
1738
1739}
1740
1755static inline void str_spaAxpy (const REAL alpha,
1756 const dSTRmat *A,
1757 const REAL *x,
1758 REAL *y)
1759{
1760 // information of A
1761 INT ngrid = A->ngrid; // number of grids
1762 INT nc = A->nc; // size of each block (number of components)
1763 INT nband = A->nband ; // number of off-diag band
1764 INT *offsets = A->offsets; // offsets of the off-diagals
1765 REAL *diag = A->diag; // Diagonal entries
1766 REAL **offdiag = A->offdiag; // Off-diagonal entries
1767
1768 // local variables
1769 INT k;
1770 INT block = 0;
1771 INT point = 0;
1772 INT band = 0;
1773 INT width = 0;
1774 INT size = nc*ngrid;
1775 INT nc2 = nc*nc;
1776 INT ncw = 0;
1777 INT start_data = 0;
1778 INT start_vecx = 0;
1779 INT start_vecy = 0;
1780 INT start_vect = 0;
1781 REAL beta = 0.0;
1782
1783 if (alpha == 0) {
1784 return; // nothing should be done
1785 }
1786
1787 beta = 1.0/alpha;
1788
1789 // y: = beta*y
1790 for (k = 0; k < size; ++k) {
1791 y[k] *= beta;
1792 }
1793
1794 // y: = y + A*x
1795 if (nc > 1) {
1796 // Deal with the diagonal band
1797 for (block = 0; block < ngrid; ++block) {
1798 start_data = nc2*block;
1799 start_vect = nc*block;
1800 blkcontr_str(start_data,start_vect,start_vect,nc,diag,x,y);
1801 }
1802
1803 // Deal with the off-diagonal bands
1804 for (band = 0; band < nband; band ++) {
1805 width = offsets[band];
1806 ncw = nc*width;
1807 if (width < 0) {
1808 for (block = 0; block < ngrid+width; ++block) {
1809 start_data = nc2*block;
1810 start_vecx = nc*block;
1811 start_vecy = start_vecx - ncw;
1812 blkcontr_str(start_data,start_vecx,start_vecy,nc,offdiag[band],x,y);
1813 }
1814 }
1815 else {
1816 for (block = 0; block < ngrid-width; ++block) {
1817 start_data = nc2*block;
1818 start_vecy = nc*block;
1819 start_vecx = start_vecy + ncw;
1820 blkcontr_str(start_data,start_vecx,start_vecy,nc,offdiag[band],x,y);
1821 }
1822 }
1823 }
1824 }
1825 else if (nc == 1) {
1826 // Deal with the diagonal band
1827 for (point = 0; point < ngrid; point ++) {
1828 y[point] += diag[point]*x[point];
1829 }
1830
1831 // Deal with the off-diagonal bands
1832 for (band = 0; band < nband; band ++) {
1833 width = offsets[band];
1834 if (width < 0) {
1835 for (point = 0; point < ngrid+width; point ++) {
1836 y[point-width] += offdiag[band][point]*x[point];
1837 }
1838 }
1839 else {
1840 for (point = 0; point < ngrid-width; point ++) {
1841 y[point] += offdiag[band][point]*x[point+width];
1842 }
1843 }
1844 }
1845 }
1846 else {
1847 printf("### WARNING: nc is illegal! %s\n", __FUNCTION__);
1848 return;
1849 }
1850
1851 // y: = alpha*y
1852 for (k = 0; k < size; ++k) {
1853 y[k] *= alpha;
1854 }
1855}
1856
1857/*---------------------------------*/
1858/*-- End of File --*/
1859/*---------------------------------*/
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: AuxArray.c:210
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:152
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:93
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: BlaSmallMat.c:596
void fasp_dstr_alloc(const INT nx, const INT ny, const INT nz, const INT nxy, const INT ngrid, const INT nband, const INT nc, INT *offsets, dSTRmat *A)
Allocate STR sparse matrix memory space.
Definition: BlaSparseSTR.c:93
void fasp_blas_dstr_aAxpy(const REAL alpha, const dSTRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: BlaSpmvSTR.c:61
INT fasp_blas_dstr_diagscale(const dSTRmat *A, dSTRmat *B)
B=D^{-1}A.
Definition: BlaSpmvSTR.c:155
void fasp_blas_dstr_mxv(const dSTRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: BlaSpmvSTR.c:131
Main header file for the FASP project.
#define REAL
Definition: fasp.h:75
#define INT
Definition: fasp.h:72
#define OPENMP_HOLDS
Definition: fasp_const.h:269
Structure matrix of REAL type.
Definition: fasp.h:316
INT * offsets
offsets of the off-diagonals (length is nband)
Definition: fasp.h:343
INT nx
number of grids in x direction
Definition: fasp.h:319
INT nxy
number of grids on x-y plane
Definition: fasp.h:328
INT ny
number of grids in y direction
Definition: fasp.h:322
INT nband
number of off-diag bands
Definition: fasp.h:340
REAL * diag
diagonal entries (length is ngrid*(nc^2))
Definition: fasp.h:337
INT ngrid
number of grids
Definition: fasp.h:334
INT nc
size of each block (number of components)
Definition: fasp.h:331
INT nz
number of grids in z direction
Definition: fasp.h:325
REAL ** offdiag
off-diagonal entries (dimension is nband * [(ngrid-|offsets|) * nc^2])
Definition: fasp.h:346