21#include "fasp_functs.h"
49 INT count = 0, added, countrow;
50 INT nb = A->
nb, nb2 = nb * nb;
55 INT mybegin, myend, myid, nthreads;
58 nthreads = fasp_get_num_threads();
63 printf(
"### ERROR: Matrix sizes do not match!\n");
68 if (A == NULL && B == NULL) {
78 if (A->
NNZ == 0 && B->
NNZ == 0) {
89 if (A->
NNZ == 0 || A == NULL) {
91 memcpy(C->
IA, B->
IA, (B->
ROW + 1) *
sizeof(
INT));
96#pragma omp parallel private(myid, mybegin, myend, i)
98 myid = omp_get_thread_num();
100 for (i = mybegin; i < myend; ++i)
105 for (i = 0; i < B->
NNZ; ++i) {
116 if (B->
NNZ == 0 || B == NULL) {
118 memcpy(C->
IA, A->
IA, (A->
ROW + 1) *
sizeof(
INT));
123 INT mybegin, myend, myid;
124#pragma omp parallel private(myid, mybegin, myend, i)
126 myid = omp_get_thread_num();
128 for (i = mybegin; i < myend; ++i)
133 for (i = 0; i < A->
NNZ; ++i)
156 memset(C->
IA, 0,
sizeof(
INT) * (C->
ROW + 1));
157 memset(C->
JA, -1,
sizeof(
INT) * (A->
NNZ + B->
NNZ));
159 for (i = 0; i < A->
ROW; ++i) {
161 for (j = A->
IA[i]; j < A->IA[i + 1]; ++j) {
164 C->
JA[count] = A->
JA[j];
170 for (k = B->
IA[i]; k < B->IA[i + 1]; ++k) {
173 for (l = C->
IA[i]; l < C->IA[i] + countrow + 1; l++) {
174 if (B->
JA[k] == C->
JA[l]) {
177 beta, &C->
val[l * nb2]);
186 C->
JA[count] = B->
JA[k];
193 C->
IA[i + 1] += C->
IA[i];
218 const INT nb = A->
nb;
268 nthreads = fasp_get_num_threads();
288 memset(y, 0X0, size *
sizeof(
REAL));
304 INT myid, mybegin, myend;
306#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
308 for (myid = 0; myid < nthreads; myid++) {
310 for (i = mybegin; i < myend; ++i) {
313 for (k = IA[i]; k < iend; ++k) {
323 for (i = 0; i < ROW; ++i) {
326 for (k = IA[i]; k < iend; ++k) {
341 INT myid, mybegin, myend;
343#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
345 for (myid = 0; myid < nthreads; myid++) {
347 for (i = mybegin; i < myend; ++i) {
350 for (k = IA[i]; k < iend; ++k) {
360 for (i = 0; i < ROW; ++i) {
363 for (k = IA[i]; k < iend; ++k) {
378 INT myid, mybegin, myend;
380#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
382 for (myid = 0; myid < nthreads; myid++) {
384 for (i = mybegin; i < myend; ++i) {
387 for (k = IA[i]; k < iend; ++k) {
397 for (i = 0; i < ROW; ++i) {
400 for (k = IA[i]; k < iend; ++k) {
415 INT myid, mybegin, myend;
417#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
419 for (myid = 0; myid < nthreads; myid++) {
421 for (i = mybegin; i < myend; ++i) {
424 for (k = IA[i]; k < iend; ++k) {
434 for (i = 0; i < ROW; ++i) {
437 for (k = IA[i]; k < iend; ++k) {
452 INT myid, mybegin, myend;
454#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
456 for (myid = 0; myid < nthreads; myid++) {
458 for (i = mybegin; i < myend; ++i) {
461 for (k = IA[i]; k < iend; ++k) {
471 for (i = 0; i < ROW; ++i) {
474 for (k = IA[i]; k < iend; ++k) {
518 const INT nb = A->
nb;
519 const INT* IA = A->
IA;
520 const INT* JA = A->
JA;
524 const REAL* pA = NULL;
525 const REAL* px0 = NULL;
539 nthreads = fasp_get_num_threads();
569 INT myid, mybegin, myend;
571#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
573 for (myid = 0; myid < nthreads; myid++) {
575 for (i = mybegin; i < myend; ++i) {
578 for (k = IA[i]; k < iend; ++k) {
588 for (i = 0; i < ROW; ++i) {
591 for (k = IA[i]; k < iend; ++k) {
606 INT myid, mybegin, myend;
608#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
610 for (myid = 0; myid < nthreads; myid++) {
612 for (i = mybegin; i < myend; ++i) {
615 for (k = IA[i]; k < iend; ++k) {
625 for (i = 0; i < ROW; ++i) {
628 for (k = IA[i]; k < iend; ++k) {
643 INT myid, mybegin, myend;
645#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
647 for (myid = 0; myid < nthreads; myid++) {
649 for (i = mybegin; i < myend; ++i) {
652 for (k = IA[i]; k < iend; ++k) {
662 for (i = 0; i < ROW; ++i) {
665 for (k = IA[i]; k < iend; ++k) {
680 INT myid, mybegin, myend;
682#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
684 for (myid = 0; myid < nthreads; myid++) {
686 for (i = mybegin; i < myend; ++i) {
689 for (k = IA[i]; k < iend; ++k) {
699 for (i = 0; i < ROW; ++i) {
702 for (k = IA[i]; k < iend; ++k) {
717 INT myid, mybegin, myend;
719#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py, iend)
721 for (myid = 0; myid < nthreads; myid++) {
723 for (i = mybegin; i < myend; ++i) {
726 for (k = IA[i]; k < iend; ++k) {
736 for (i = 0; i < ROW; ++i) {
739 for (k = IA[i]; k < iend; ++k) {
785 const INT nb = A->
nb;
786 const INT* IA = A->
IA;
787 const INT* JA = A->
JA;
790 const REAL* px0 = NULL;
791 REAL * py0 = NULL, *py = NULL;
801 nthreads = fasp_get_num_threads();
831 INT myid, mybegin, myend;
833#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py, iend)
835 for (myid = 0; myid < nthreads; myid++) {
837 for (i = mybegin; i < myend; ++i) {
840 for (k = IA[i]; k < iend; ++k) {
850 for (i = 0; i < ROW; ++i) {
853 for (k = IA[i]; k < iend; ++k) {
868 INT myid, mybegin, myend;
870#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py, iend)
872 for (myid = 0; myid < nthreads; myid++) {
874 for (i = mybegin; i < myend; ++i) {
877 for (k = IA[i]; k < iend; ++k) {
888 for (i = 0; i < ROW; ++i) {
891 for (k = IA[i]; k < iend; ++k) {
907 INT myid, mybegin, myend;
909#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py, iend)
911 for (myid = 0; myid < nthreads; myid++) {
913 for (i = mybegin; i < myend; ++i) {
916 for (k = IA[i]; k < iend; ++k) {
929 for (i = 0; i < ROW; ++i) {
932 for (k = IA[i]; k < iend; ++k) {
950 INT myid, mybegin, myend;
952#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py, iend)
954 for (myid = 0; myid < nthreads; myid++) {
956 for (i = mybegin; i < myend; ++i) {
959 for (k = IA[i]; k < iend; ++k) {
974 for (i = 0; i < ROW; ++i) {
977 for (k = IA[i]; k < iend; ++k) {
997 INT myid, mybegin, myend;
999#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py, iend)
1001 for (myid = 0; myid < nthreads; myid++) {
1003 for (i = mybegin; i < myend; ++i) {
1006 for (k = IA[i]; k < iend; ++k) {
1015 for (i = 0; i < ROW; ++i) {
1018 for (k = IA[i]; k < iend; ++k) {
1059 const INT nb = A->
nb;
1060 const INT* IA = A->
IA;
1061 const INT* JA = A->
JA;
1065 INT size = ROW * nb;
1067 INT i, j, k, num_nnz_row;
1069 const REAL* pA = NULL;
1070 const REAL* px0 = NULL;
1077 INT myid, mybegin, myend, nthreads;
1080 nthreads = fasp_get_num_threads();
1099#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
1102 myid = omp_get_thread_num();
1104 for (i = mybegin; i < myend; ++i) {
1106 num_nnz_row = IA[i + 1] - IA[i];
1107 switch (num_nnz_row) {
1299 for (k = IA[i]; k < IA[i + 1]; ++k) {
1312 for (i = 0; i < ROW; ++i) {
1314 num_nnz_row = IA[i + 1] - IA[i];
1315 switch (num_nnz_row) {
1507 for (k = IA[i]; k < IA[i + 1]; ++k) {
1525#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
1528 myid = omp_get_thread_num();
1530 for (i = mybegin; i < myend; ++i) {
1532 num_nnz_row = IA[i + 1] - IA[i];
1533 switch (num_nnz_row) {
1725 for (k = IA[i]; k < IA[i + 1]; ++k) {
1738 for (i = 0; i < ROW; ++i) {
1740 num_nnz_row = IA[i + 1] - IA[i];
1741 switch (num_nnz_row) {
1933 for (k = IA[i]; k < IA[i + 1]; ++k) {
1951#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
1954 myid = omp_get_thread_num();
1956 for (i = mybegin; i < myend; ++i) {
1958 num_nnz_row = IA[i + 1] - IA[i];
1959 switch (num_nnz_row) {
2151 for (k = IA[i]; k < IA[i + 1]; ++k) {
2164 for (i = 0; i < ROW; ++i) {
2166 num_nnz_row = IA[i + 1] - IA[i];
2167 switch (num_nnz_row) {
2359 for (k = IA[i]; k < IA[i + 1]; ++k) {
2377#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
2380 myid = omp_get_thread_num();
2382 for (i = mybegin; i < myend; ++i) {
2384 num_nnz_row = IA[i + 1] - IA[i];
2385 switch (num_nnz_row) {
2389 pA = val + k * jump;
2396 pA = val + k * jump;
2403 pA = val + k * jump;
2413 pA = val + k * jump;
2420 pA = val + k * jump;
2427 pA = val + k * jump;
2434 pA = val + k * jump;
2444 pA = val + k * jump;
2451 pA = val + k * jump;
2458 pA = val + k * jump;
2465 pA = val + k * jump;
2472 pA = val + k * jump;
2482 pA = val + k * jump;
2489 pA = val + k * jump;
2496 pA = val + k * jump;
2503 pA = val + k * jump;
2510 pA = val + k * jump;
2517 pA = val + k * jump;
2527 pA = val + k * jump;
2534 pA = val + k * jump;
2541 pA = val + k * jump;
2548 pA = val + k * jump;
2555 pA = val + k * jump;
2562 pA = val + k * jump;
2569 pA = val + k * jump;
2577 for (k = IA[i]; k < IA[i + 1]; ++k) {
2579 pA = val + k * jump;
2590 for (i = 0; i < ROW; ++i) {
2592 num_nnz_row = IA[i + 1] - IA[i];
2593 switch (num_nnz_row) {
2597 pA = val + k * jump;
2604 pA = val + k * jump;
2611 pA = val + k * jump;
2621 pA = val + k * jump;
2628 pA = val + k * jump;
2635 pA = val + k * jump;
2642 pA = val + k * jump;
2652 pA = val + k * jump;
2659 pA = val + k * jump;
2666 pA = val + k * jump;
2673 pA = val + k * jump;
2680 pA = val + k * jump;
2690 pA = val + k * jump;
2697 pA = val + k * jump;
2704 pA = val + k * jump;
2711 pA = val + k * jump;
2718 pA = val + k * jump;
2725 pA = val + k * jump;
2735 pA = val + k * jump;
2742 pA = val + k * jump;
2749 pA = val + k * jump;
2756 pA = val + k * jump;
2763 pA = val + k * jump;
2770 pA = val + k * jump;
2777 pA = val + k * jump;
2785 for (k = IA[i]; k < IA[i + 1]; ++k) {
2787 pA = val + k * jump;
2819 const INT nb = A->
nb;
2820 const INT size = ROW * nb;
2821 const INT* IA = A->
IA;
2822 const INT* JA = A->
JA;
2825 const REAL* px0 = NULL;
2826 REAL * py0 = NULL, *py = NULL;
2827 INT i, j, k, num_nnz_row;
2833 INT myid, mybegin, myend, nthreads;
2836 nthreads = fasp_get_num_threads();
2855#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
2858 myid = omp_get_thread_num();
2860 for (i = mybegin; i < myend; ++i) {
2862 num_nnz_row = IA[i + 1] - IA[i];
2863 switch (num_nnz_row) {
3055 for (k = IA[i]; k < IA[i + 1]; ++k) {
3068 for (i = 0; i < ROW; ++i) {
3070 num_nnz_row = IA[i + 1] - IA[i];
3071 switch (num_nnz_row) {
3288 for (k = IA[i]; k < IA[i + 1]; ++k) {
3307#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
3310 myid = omp_get_thread_num();
3312 for (i = mybegin; i < myend; ++i) {
3314 num_nnz_row = IA[i + 1] - IA[i];
3315 switch (num_nnz_row) {
3508 for (k = IA[i]; k < IA[i + 1]; ++k) {
3521 for (i = 0; i < ROW; ++i) {
3523 num_nnz_row = IA[i + 1] - IA[i];
3524 switch (num_nnz_row) {
3792 for (k = IA[i]; k < IA[i + 1]; ++k) {
3813#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
3816 myid = omp_get_thread_num();
3818 for (i = mybegin; i < myend; ++i) {
3820 num_nnz_row = IA[i + 1] - IA[i];
3821 switch (num_nnz_row) {
4014 for (k = IA[i]; k < IA[i + 1]; ++k) {
4027 for (i = 0; i < ROW; ++i) {
4029 num_nnz_row = IA[i + 1] - IA[i];
4030 switch (num_nnz_row) {
4348 for (k = IA[i]; k < IA[i + 1]; ++k) {
4371#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, \
4374 myid = omp_get_thread_num();
4376 for (i = mybegin; i < myend; ++i) {
4378 num_nnz_row = IA[i + 1] - IA[i];
4379 switch (num_nnz_row) {
4547 for (k = IA[i]; k < IA[i + 1]; ++k) {
4559 for (i = 0; i < ROW; ++i) {
4561 num_nnz_row = IA[i + 1] - IA[i];
4562 switch (num_nnz_row) {
4730 for (k = IA[i]; k < IA[i + 1]; ++k) {
4763 INT i, j, k, l, count;
4766 const INT nb = A->
nb;
4767 const INT nb2 = nb * nb;
4770 if ((A->
COL != B->
ROW) && (A->
nb != B->
nb)) {
4771 printf(
"### ERROR: Matrix sizes do not match!\n");
4786 for (i = 0; i < B->
COL; ++i) JD[i] = -1;
4789 for (i = 0; i < C->
ROW; ++i) {
4792 for (k = A->
IA[i]; k < A->IA[i + 1]; ++k) {
4793 for (j = B->
IA[A->
JA[k]]; j < B->IA[A->
JA[k] + 1]; ++j) {
4794 for (l = 0; l < count; l++) {
4795 if (JD[l] == B->
JA[j])
break;
4799 JD[count] = B->
JA[j];
4804 C->
IA[i + 1] = count;
4805 for (j = 0; j < count; ++j) {
4810 for (i = 0; i < C->
ROW; ++i) C->
IA[i + 1] += C->
IA[i];
4817 for (i = 0; i < C->
ROW; ++i) {
4820 for (k = A->
IA[i]; k < A->IA[i + 1]; ++k) {
4821 for (j = B->
IA[A->
JA[k]]; j < B->IA[A->
JA[k] + 1]; ++j) {
4822 for (l = 0; l < countJD; l++) {
4823 if (JD[l] == B->
JA[j])
break;
4827 C->
JA[count] = B->
JA[j];
4828 JD[countJD] = B->
JA[j];
4845 for (i = 0; i < C->
ROW; ++i) {
4846 for (j = C->
IA[i]; j < C->IA[i + 1]; ++j) {
4850 for (k = A->
IA[i]; k < A->IA[i + 1]; ++k) {
4851 for (l = B->
IA[A->
JA[k]]; l < B->IA[A->
JA[k] + 1]; l++) {
4852 if (B->
JA[l] == C->
JA[j]) {
4897 INT ia, ib, ic, ja, jb;
4898 INT num_nonzeros = 0;
4899 INT row_start, counter;
4900 REAL *a_entry, *b_entry;
4901 INT* B_marker = NULL;
4903 const INT nb = A->
nb;
4904 const INT nb2 = nb * nb;
4909 if ((A->
COL != B->
ROW) && (A->
nb != B->
nb)) {
4910 printf(
"### ERROR: Matrix sizes do not match!\n");
4927 for (ic = 0; ic < nrows_A; ic++) {
4928 for (ia = A_i[ic]; ia < A_i[ic + 1]; ia++) {
4930 for (ib = B_i[ja]; ib < B_i[ja + 1]; ib++) {
4932 if (B_marker[jb] != ic) {
4938 C->
IA[ic + 1] = num_nonzeros;
4941 C->
NNZ = num_nonzeros;
4952 for (ic = 0; ic < nrows_A; ic++) {
4953 row_start = C->
IA[ic];
4954 for (ia = A_i[ic]; ia < A_i[ic + 1]; ia++) {
4956 a_entry = A_data + ia * nb2;
4957 for (ib = B_i[ja]; ib < B_i[ja + 1]; ib++) {
4959 b_entry = B_data + ib * nb2;
4962 if (B_marker[jb] < row_start) {
4963 B_marker[jb] = counter;
4964 C->
JA[B_marker[jb]] = jb;
4966 memcpy(C->
val + B_marker[jb] * nb2, temp, nb2 *
sizeof(
REAL));
5011 INT ia, ib, ic, ja, jb;
5012 INT num_nonzeros = 0;
5013 INT row_start, counter;
5014 REAL *a_entry, *b_entry, *d_entry;
5015 INT* B_marker = NULL;
5017 const INT nb = A->
nb;
5018 const INT nb2 = nb * nb;
5024 printf(
"### ERROR: Matrix sizes do not match!\n");
5041 for (ic = 0; ic < nrows_A; ic++) {
5042 for (ia = A_i[ic]; ia < A_i[ic + 1]; ia++) {
5044 for (ib = B_i[ja]; ib < B_i[ja + 1]; ib++) {
5046 if (B_marker[jb] != ic) {
5052 C->
IA[ic + 1] = num_nonzeros;
5055 C->
NNZ = num_nonzeros;
5067 for (ic = 0; ic < nrows_A; ic++) {
5068 row_start = C->
IA[ic];
5069 for (ia = A_i[ic]; ia < A_i[ic + 1]; ia++) {
5071 a_entry = A_data + ia * nb2;
5072 d_entry = D->
val + ja * nb2;
5075 for (ib = B_i[ja]; ib < B_i[ja + 1]; ib++) {
5077 b_entry = B_data + ib * nb2;
5080 if (B_marker[jb] < row_start) {
5081 B_marker[jb] = counter;
5082 C->
JA[B_marker[jb]] = jb;
5084 memcpy(C->
val + B_marker[jb] * nb2, temp, nb2 *
sizeof(
REAL));
5117void fasp_blas_dbsr_schur(
5132 INT ia, ib, ic, ja, jb, i;
5133 INT num_nonzeros = 0;
5134 INT row_start, counter;
5135 REAL *a_entry, *b_entry, *d1_entry, *d2_entry;
5136 INT* B_marker = NULL;
5138 const INT nb = A->
nb;
5139 const INT nb2 = nb * nb;
5145 printf(
"### ERROR: Matrix sizes do not match!\n");
5162 for (ic = 0; ic < nrows_A; ic++) {
5163 for (ia = A_i[ic]; ia < A_i[ic + 1]; ia++) {
5165 for (ib = B_i[ja]; ib < B_i[ja + 1]; ib++) {
5167 if (B_marker[jb] != ic) {
5173 C->
IA[ic + 1] = num_nonzeros;
5176 C->
NNZ = num_nonzeros;
5188 for (i = 0; i < D1->
row; i++) {
5189 D1_neg.
val[i] = -D1->
val[i];
5194 for (ic = 0; ic < nrows_A; ic++) {
5195 row_start = C->
IA[ic];
5196 d2_entry = D2->
val + ic * nb2;
5197 for (ia = A_i[ic]; ia < A_i[ic + 1]; ia++) {
5199 a_entry = A_data + ia * nb2;
5200 d1_entry = D1_neg.
val + ja * nb2;
5203 for (ib = B_i[ja]; ib < B_i[ja + 1]; ib++) {
5205 b_entry = B_data + ib * nb2;
5208 if (B_marker[jb] < row_start) {
5209 B_marker[jb] = counter;
5210 C->
JA[B_marker[jb]] = jb;
5215 C->
val + B_marker[jb] * nb2);
5218 memcpy(C->
val + B_marker[jb] * nb2, temp, nb2 *
sizeof(
REAL));
5260 const INT row = R->
ROW, col = P->
COL, nb = A->
nb, nb2 = A->
nb * A->
nb;
5263 const INT * ir = R->
IA, *ia = A->
IA, *ip = P->
IA;
5264 const INT * jr = R->
JA, *ja = A->
JA, *jp = P->
JA;
5270 INT i, i1, j, jj, k, length;
5271 INT begin_row, end_row, begin_rowA, end_rowA, begin_rowR, end_rowR;
5272 INT istart, iistart, count;
5280 for (i = 0; i < A->
COL; ++i) index[i] = -2;
5282 memcpy(iindex, index, col *
sizeof(
INT));
5293 for (i = 0; i < row; ++i) {
5302 for (jj = begin_rowR; jj < end_rowR; ++jj) {
5306 end_rowA = ia[j + 1];
5307 for (k = begin_rowA; k < end_rowA; ++k) {
5308 if (index[ja[k]] == -2) {
5309 index[ja[k]] = istart;
5322 for (j = 0; j < count; ++j) {
5324 istart = index[istart];
5329 end_row = ip[jj + 1];
5330 for (k = begin_row; k < end_row; ++k) {
5332 if (iindex[jp[k]] == -2) {
5333 iindex[jp[k]] = iistart;
5341 iac[i1] = iac[i] + length;
5351 for (j = begin_row; j < end_row; ++j) {
5355 iistart = iindex[iistart];
5357 iindex[jac[j]] = -2;
5368 for (i = 0; i < row; ++i) {
5373 for (j = begin_row; j < end_row; ++j) {
5374 BTindex[jac[j]] = j;
5383 for (jj = begin_rowR; jj < end_rowR; ++jj) {
5387 end_rowA = ia[j + 1];
5388 for (k = begin_rowA; k < end_rowA; ++k) {
5389 if (index[ja[k]] == -2) {
5390 index[ja[k]] = istart;
5405 for (j = 0; j < length; ++j) {
5407 istart = index[istart];
5412 end_row = ip[jj + 1];
5413 for (k = begin_row; k < end_row; ++k) {
5471 const INT row = R->
ROW, col = P->
COL, nb = A->
nb, nb2 = A->
nb * A->
nb;
5474 const INT * ir = R->
IA, *ia = A->
IA, *ip = P->
IA;
5475 const INT * jr = R->
JA, *ja = A->
JA, *jp = P->
JA;
5480 INT* Ps_marker = NULL;
5481 INT* As_marker = NULL;
5484 INT* P_marker = NULL;
5485 INT* A_marker = NULL;
5486 REAL* smat_tmp = NULL;
5489 INT i, i1, i2, i3, jj1, jj2, jj3;
5490 INT counter, jj_row_begining;
5495 INT myid, mybegin, myend, Ctemp;
5496 nthreads = fasp_get_num_threads();
5501 INT coarse_mul_nthreads = n_coarse * nthreads;
5502 INT fine_mul_nthreads = n_fine * nthreads;
5503 INT coarse_add_nthreads = n_coarse + nthreads;
5504 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
5505 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
5508 As_marker = Ps_marker + coarse_mul_nthreads;
5520 INT* RAP_temp = As_marker + fine_mul_nthreads;
5521 INT* part_end = RAP_temp + coarse_add_nthreads;
5524#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5525 counter, i, jj_row_begining, jj1, i1, jj2, i2, \
5527 for (myid = 0; myid < nthreads; myid++) {
5529 P_marker = Ps_marker + myid * n_coarse;
5530 A_marker = As_marker + myid * n_fine;
5532 for (i = mybegin; i < myend; ++i) {
5533 P_marker[i] = counter;
5534 jj_row_begining = counter;
5536 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5538 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5540 if (A_marker[i2] != i) {
5542 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5544 if (P_marker[i3] < jj_row_begining) {
5545 P_marker[i3] = counter;
5552 RAP_temp[i + myid] = jj_row_begining;
5554 RAP_temp[myend + myid] = counter;
5555 part_end[myid] = myend + myid + 1;
5558 counter = part_end[0];
5560 for (i1 = 1; i1 < nthreads; i1++) {
5561 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
5562 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
5563 iac[counter] = RAP_temp[jj1] + Ctemp;
5570 for (i = 0; i < row; ++i) {
5571 Ps_marker[i] = counter;
5572 jj_row_begining = counter;
5575 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5577 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5579 if (As_marker[i2] != i) {
5581 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5583 if (Ps_marker[i3] < jj_row_begining) {
5584 Ps_marker[i3] = counter;
5591 iac[i] = jj_row_begining;
5610#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5611 counter, i, jj_row_begining, jj1, i1, jj2, i2, \
5613 for (myid = 0; myid < nthreads; myid++) {
5615 P_marker = Ps_marker + myid * n_coarse;
5616 A_marker = As_marker + myid * n_fine;
5617 smat_tmp = tmp + myid * 2 * nb2;
5618 counter = iac[mybegin];
5619 for (i = mybegin; i < myend; ++i) {
5620 P_marker[i] = counter;
5621 jj_row_begining = counter;
5626 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5628 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5632 if (A_marker[i2] != i) {
5634 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5637 smat_tmp + nb2, nb);
5638 if (P_marker[i3] < jj_row_begining) {
5639 P_marker[i3] = counter;
5641 &acj[counter * nb2]);
5646 &acj[P_marker[i3] * nb2]);
5650 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; jj3++) {
5653 smat_tmp + nb2, nb);
5655 &acj[P_marker[i3] * nb2]);
5665 for (i = 0; i < row; ++i) {
5666 Ps_marker[i] = counter;
5667 jj_row_begining = counter;
5672 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5674 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5677 if (As_marker[i2] != i) {
5679 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5682 if (Ps_marker[i3] < jj_row_begining) {
5683 Ps_marker[i3] = counter;
5689 &acj[Ps_marker[i3] * nb2]);
5693 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; jj3++) {
5697 &acj[Ps_marker[i3] * nb2]);
5747 const INT * ir = R->
IA, *ia = A->
IA, *ip = P->
IA;
5748 const INT * jr = R->
JA, *ja = A->
JA, *jp = P->
JA;
5752 INT* Ps_marker = NULL;
5753 INT* As_marker = NULL;
5756 INT* P_marker = NULL;
5757 INT* A_marker = NULL;
5760 INT i, i1, i2, i3, jj1, jj2, jj3;
5761 INT counter, jj_row_begining;
5767 INT myid, mybegin, myend, Ctemp;
5768 nthreads = fasp_get_num_threads();
5773 INT coarse_mul_nthreads = n_coarse * nthreads;
5774 INT fine_mul_nthreads = n_fine * nthreads;
5775 INT coarse_add_nthreads = n_coarse + nthreads;
5776 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
5777 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
5780 As_marker = Ps_marker + coarse_mul_nthreads;
5790 INT* RAP_temp = As_marker + fine_mul_nthreads;
5791 INT* part_end = RAP_temp + coarse_add_nthreads;
5794#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5795 counter, i, jj_row_begining, jj1, i1, jj2, i2, \
5797 for (myid = 0; myid < nthreads; myid++) {
5799 P_marker = Ps_marker + myid * n_coarse;
5800 A_marker = As_marker + myid * n_fine;
5802 for (i = mybegin; i < myend; ++i) {
5803 P_marker[i] = counter;
5804 jj_row_begining = counter;
5806 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5808 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5810 if (A_marker[i2] != i) {
5812 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5814 if (P_marker[i3] < jj_row_begining) {
5815 P_marker[i3] = counter;
5822 RAP_temp[i + myid] = jj_row_begining;
5824 RAP_temp[myend + myid] = counter;
5825 part_end[myid] = myend + myid + 1;
5828 counter = part_end[0];
5830 for (i1 = 1; i1 < nthreads; i1++) {
5831 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
5832 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
5833 iac[counter] = RAP_temp[jj1] + Ctemp;
5841 for (i = 0; i < row; ++i) {
5842 Ps_marker[i] = counter;
5843 jj_row_begining = counter;
5846 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5848 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5850 if (As_marker[i2] != i) {
5852 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5854 if (Ps_marker[i3] < jj_row_begining) {
5855 Ps_marker[i3] = counter;
5862 iac[i] = jj_row_begining;
5884#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, counter, i, \
5885 jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
5886 for (myid = 0; myid < nthreads; myid++) {
5888 P_marker = Ps_marker + myid * n_coarse;
5889 A_marker = As_marker + myid * n_fine;
5890 counter = iac[mybegin];
5891 for (i = mybegin; i < myend; ++i) {
5892 P_marker[i] = counter;
5893 jj_row_begining = counter;
5899 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5901 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5904 if (A_marker[i2] != i) {
5906 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5909 if (P_marker[i3] < jj_row_begining) {
5910 P_marker[i3] = counter;
5912 &acj[counter * nb2]);
5917 &acj[P_marker[i3] * nb2]);
5921 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; jj3++) {
5924 &acj[P_marker[i3] * nb2]);
5934 for (i = 0; i < row; ++i) {
5935 Ps_marker[i] = counter;
5936 jj_row_begining = counter;
5941 for (jj1 = ir[i]; jj1 < ir[i + 1]; ++jj1) {
5943 for (jj2 = ia[i1]; jj2 < ia[i1 + 1]; ++jj2) {
5946 if (As_marker[i2] != i) {
5948 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; ++jj3) {
5950 if (Ps_marker[i3] < jj_row_begining) {
5951 Ps_marker[i3] = counter;
5953 &acj[counter * nb2]);
5958 &acj[Ps_marker[i3] * nb2]);
5962 for (jj3 = ip[i2]; jj3 < ip[i2 + 1]; jj3++) {
5965 &acj[Ps_marker[i3] * nb2]);
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_dvec_free(dvector *u)
Free vector data space of REAL type.
dvector fasp_dvec_create(const INT m)
Create dvector data space of REAL type.
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
void fasp_blas_smat_ypAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y + Ax, where 'A' is a n*n dense matrix.
void fasp_blas_smat_ypAx_nc3(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 3*3 dense matrix.
void fasp_blas_smat_ypAx_nc2(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 2*2 dense matrix.
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
void fasp_blas_smat_add(const REAL *a, const REAL *b, const INT n, const REAL alpha, const REAL beta, REAL *c)
Compute c = alpha*a + beta*b.
void fasp_blas_smat_axm1(REAL *a, const INT n, const REAL alpha, REAL *b)
Compute b = alpha*a (in place)
void fasp_blas_smat_ypAx_nc5(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 5*5 dense matrix.
void fasp_blas_smat_ypAx_nc7(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 7*7 dense matrix.
void fasp_dbsr_alloc(const INT ROW, const INT COL, const INT NNZ, const INT nb, const INT storage_manner, dBSRmat *A)
Allocate memory space for BSR format sparse matrix.
void fasp_blas_dbsr_rap1(const dBSRmat *R, const dBSRmat *A, const dBSRmat *P, dBSRmat *B)
dBSRmat sparse matrix multiplication B=R*A*P
void fasp_blas_dbsr_axm(dBSRmat *A, const REAL alpha)
Multiply a sparse matrix A in BSR format by a scalar alpha.
void fasp_blas_dbsr_mxm(const dBSRmat *A, const dBSRmat *B, dBSRmat *C)
Sparse matrix multiplication C=A*B.
void fasp_blas_dbsr_rap_agg(const dBSRmat *R, const dBSRmat *A, const dBSRmat *P, dBSRmat *B)
dBSRmat sparse matrix multiplication B=R*A*P, where small block matrices in P and R are identity matr...
void fasp_blas_dbsr_aAxpby(const REAL alpha, dBSRmat *A, REAL *x, const REAL beta, REAL *y)
Compute y := alpha*A*x + beta*y.
void fasp_blas_dbsr_mxv_agg(const dBSRmat *A, const REAL *x, REAL *y)
Compute y := A*x, where each small block matrices of A is an identity.
void fasp_blas_dbsr_mxm2(const dBSRmat *A, const dBSRmat *B, dBSRmat *C)
Sparse matrix multiplication C=A*B.
void fasp_blas_dbsr_mxm_adb(const dBSRmat *A, dvector *D, const dBSRmat *B, dBSRmat *C)
Sparse matrix multiplication C=A*D*B, where D is diagnal matrix.
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
void fasp_blas_dbsr_mxv(const dBSRmat *A, const REAL *x, REAL *y)
Compute y := A*x.
SHORT fasp_blas_dbsr_add(const dBSRmat *A, const REAL alpha, const dBSRmat *B, const REAL beta, dBSRmat *C)
compute C = alpha*A + beta*B in BSR format
void fasp_blas_dbsr_aAxpy_agg(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y where each small block matrix is an identity matrix.
void fasp_blas_dbsr_rap(const dBSRmat *R, const dBSRmat *A, const dBSRmat *P, dBSRmat *B)
dBSRmat sparse matrix multiplication B=R*A*P
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define FASP_SUCCESS
Definition of return status and error messages.
#define TRUE
Definition of logic type.
Block sparse row storage matrix of REAL type.
INT COL
number of cols of sub-blocks in matrix A, N
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
INT nb
dimension of each sub-block
INT * IA
integer array of row pointers, the size is ROW+1
INT ROW
number of rows of sub-blocks in matrix A, M
INT storage_manner
storage manner for each sub-block
Vector with n entries of REAL type.
REAL * val
actual vector entries