32#include "fasp_functs.h"
64 INT count = 0, added, countrow;
69 INT mybegin, myend, myid, nthreads;
72 nthreads = fasp_get_num_threads();
77 printf(
"### ERROR: Matrix sizes do not match!\n");
82 if (A == NULL && B == NULL) {
90 if (A->
nnz == 0 && B->
nnz == 0) {
99 if (A->
nnz == 0 || A == NULL) {
101 memcpy(C->
IA, B->
IA, (B->
row + 1) *
sizeof(
INT));
106#pragma omp parallel private(myid, mybegin, myend, i)
108 myid = omp_get_thread_num();
110 for (i = mybegin; i < myend; ++i) C->
val[i] = B->
val[i] * beta;
114 for (i = 0; i < B->
nnz; ++i)
115 C->
val[i] = B->
val[i] * beta;
123 if (B->
nnz == 0 || B == NULL) {
125 memcpy(C->
IA, A->
IA, (A->
row + 1) *
sizeof(
INT));
130 INT mybegin, myend, myid;
131#pragma omp parallel private(myid, mybegin, myend, i)
133 myid = omp_get_thread_num();
135 for (i = mybegin; i < myend; ++i) C->
val[i] = A->
val[i] * alpha;
139 for (i = 0; i < A->
nnz; ++i) C->
val[i] = A->
val[i] * alpha;
157 memset(C->
IA, 0,
sizeof(
INT) * (C->
row + 1));
158 memset(C->
JA, -1,
sizeof(
INT) * (A->
nnz + B->
nnz));
160 for (i = 0; i < A->
row; ++i) {
162 for (j = A->
IA[i]; j < A->IA[i + 1]; ++j) {
163 C->
val[count] = alpha * A->
val[j];
164 C->
JA[count] = A->
JA[j];
170 for (k = B->
IA[i]; k < B->IA[i + 1]; ++k) {
173 for (l = C->
IA[i]; l < C->IA[i] + countrow + 1; l++) {
174 if (B->
JA[k] == C->
JA[l]) {
175 C->
val[l] = C->
val[l] + beta * B->
val[k];
182 C->
val[count] = beta * B->
val[k];
183 C->
JA[count] = B->
JA[k];
190 C->
IA[i + 1] += C->
IA[i];
245 const INT * ia = A->
IA, *ja = A->
JA;
248 INT i, k, begin_row, end_row, nnz_row;
256 nthreads = fasp_get_num_threads();
261 INT myid, mybegin, myend;
264#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, \
267 for (myid = 0; myid < nthreads; myid++) {
269 for (i = mybegin; i < myend; ++i) {
273 nnz_row = end_row - begin_row;
277 temp += aj[k] * x[ja[k]];
279 temp += aj[k] * x[ja[k]];
281 temp += aj[k] * x[ja[k]];
285 temp += aj[k] * x[ja[k]];
287 temp += aj[k] * x[ja[k]];
289 temp += aj[k] * x[ja[k]];
291 temp += aj[k] * x[ja[k]];
295 temp += aj[k] * x[ja[k]];
297 temp += aj[k] * x[ja[k]];
299 temp += aj[k] * x[ja[k]];
301 temp += aj[k] * x[ja[k]];
303 temp += aj[k] * x[ja[k]];
307 temp += aj[k] * x[ja[k]];
309 temp += aj[k] * x[ja[k]];
311 temp += aj[k] * x[ja[k]];
313 temp += aj[k] * x[ja[k]];
315 temp += aj[k] * x[ja[k]];
317 temp += aj[k] * x[ja[k]];
321 temp += aj[k] * x[ja[k]];
323 temp += aj[k] * x[ja[k]];
325 temp += aj[k] * x[ja[k]];
327 temp += aj[k] * x[ja[k]];
329 temp += aj[k] * x[ja[k]];
331 temp += aj[k] * x[ja[k]];
333 temp += aj[k] * x[ja[k]];
336 for (k = begin_row; k < end_row; ++k) {
337 temp += aj[k] * x[ja[k]];
347 for (i = 0; i < m; ++i) {
351 nnz_row = end_row - begin_row;
355 temp += aj[k] * x[ja[k]];
357 temp += aj[k] * x[ja[k]];
359 temp += aj[k] * x[ja[k]];
363 temp += aj[k] * x[ja[k]];
365 temp += aj[k] * x[ja[k]];
367 temp += aj[k] * x[ja[k]];
369 temp += aj[k] * x[ja[k]];
373 temp += aj[k] * x[ja[k]];
375 temp += aj[k] * x[ja[k]];
377 temp += aj[k] * x[ja[k]];
379 temp += aj[k] * x[ja[k]];
381 temp += aj[k] * x[ja[k]];
385 temp += aj[k] * x[ja[k]];
387 temp += aj[k] * x[ja[k]];
389 temp += aj[k] * x[ja[k]];
391 temp += aj[k] * x[ja[k]];
393 temp += aj[k] * x[ja[k]];
395 temp += aj[k] * x[ja[k]];
399 temp += aj[k] * x[ja[k]];
401 temp += aj[k] * x[ja[k]];
403 temp += aj[k] * x[ja[k]];
405 temp += aj[k] * x[ja[k]];
407 temp += aj[k] * x[ja[k]];
409 temp += aj[k] * x[ja[k]];
411 temp += aj[k] * x[ja[k]];
414 for (k = begin_row; k < end_row; ++k) {
415 temp += aj[k] * x[ja[k]];
441 const INT * ia = A->
IA, *ja = A->
JA;
442 INT i, k, begin_row, end_row;
447 INT myid, mybegin, myend;
448 INT nthreads = fasp_get_num_threads();
453#pragma omp parallel for private(myid, i, mybegin, myend, temp, begin_row, end_row, k)
454 for (myid = 0; myid < nthreads; myid++) {
456 for (i = mybegin; i < myend; i++) {
460 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
466 for (i = 0; i < m; ++i) {
470 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
497 const INT * ia = A->
IA, *ja = A->
JA;
499 INT i, k, begin_row, end_row;
506 nthreads = fasp_get_num_threads();
511 INT myid, mybegin, myend;
513#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
515 for (myid = 0; myid < nthreads; myid++) {
517 for (i = mybegin; i < myend; ++i) {
521 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
526 for (i = 0; i < m; ++i) {
530 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
536 else if (alpha == -1.0) {
538 INT myid, mybegin, myend;
541#pragma omp parallel for private(myid, mybegin, myend, temp, i, begin_row, end_row, k)
544 for (myid = 0; myid < nthreads; myid++) {
546 for (i = mybegin; i < myend; ++i) {
550 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
555 for (i = 0; i < m; ++i) {
559 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
567 INT myid, mybegin, myend;
569#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
571 for (myid = 0; myid < nthreads; myid++) {
573 for (i = mybegin; i < myend; ++i) {
577 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
578 y[i] += temp * alpha;
582 for (i = 0; i < m; ++i) {
586 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
587 y[i] += temp * alpha;
615 const INT * ia = A->
IA, *ja = A->
JA;
617 INT i, k, begin_row, end_row;
625 nthreads = fasp_get_num_threads();
631 INT myid, mybegin, myend;
633#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
635 for (myid = 0; myid < nthreads; myid++) {
637 for (i = mybegin; i < myend; ++i) {
641 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
646 for (i = 0; i < m; ++i) {
650 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
656 else if (alpha == -1.0) {
658 INT myid, mybegin, myend;
660#pragma omp parallel for private(myid, mybegin, myend, temp, i, begin_row, end_row, k)
662 for (myid = 0; myid < nthreads; myid++) {
664 for (i = mybegin; i < myend; ++i) {
668 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
673 for (i = 0; i < m; ++i) {
677 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
685 INT myid, mybegin, myend;
687#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
689 for (myid = 0; myid < nthreads; myid++) {
691 for (i = mybegin; i < myend; ++i) {
695 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
696 y[i] += temp * alpha;
700 for (i = 0; i < m; ++i) {
704 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
705 y[i] += temp * alpha;
733 const INT *ia = A->
IA, *ja = A->
JA;
735 INT i, k, begin_row, end_row;
741 INT myid, mybegin, myend;
742 INT nthreads = fasp_get_num_threads();
743#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
744 for (myid = 0; myid < nthreads; myid++) {
746 for (i = mybegin; i < myend; ++i) {
750 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
756 for (i = 0; i < m; ++i) {
760 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
766 }
else if (alpha == -1.0) {
769 INT myid, mybegin, myend;
770 INT nthreads = fasp_get_num_threads();
771#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
772 for (myid = 0; myid < nthreads; myid++) {
774 for (i = mybegin; i < myend; ++i) {
778 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
784 for (i = 0; i < m; ++i) {
788 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
799 INT myid, mybegin, myend;
800 INT nthreads = fasp_get_num_threads();
801#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
802 for (myid = 0; myid < nthreads; myid++) {
804 for (i = mybegin; i < myend; ++i) {
808 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
809 y[i] += temp * alpha;
814 for (i = 0; i < m; ++i) {
818 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
819 y[i] += temp * alpha;
841 register REAL value = 0.0;
843 const INT * ia = A->
IA, *ja = A->
JA;
845 INT i, k, begin_row, end_row;
858#pragma omp parallel for reduction(+ : value) private(i, temp, begin_row, end_row, k)
860 for (i = 0; i < m; ++i) {
864 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
865 value += y[i] * temp;
868 for (i = 0; i < m; ++i) {
872 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
873 value += y[i] * temp;
895 INT i, j, k, l, count;
905 for (i = 0; i < B->
col; ++i) JD[i] = -1;
908 for (i = 0; i < C->
row; ++i) {
911 for (k = A->
IA[i]; k < A->IA[i + 1]; ++k) {
912 for (j = B->
IA[A->
JA[k]]; j < B->IA[A->
JA[k] + 1]; ++j) {
913 for (l = 0; l < count; l++) {
914 if (JD[l] == B->
JA[j])
break;
918 JD[count] = B->
JA[j];
923 C->
IA[i + 1] = count;
924 for (j = 0; j < count; ++j) {
929 for (i = 0; i < C->
row; ++i) C->
IA[i + 1] += C->
IA[i];
936 for (i = 0; i < C->
row; ++i) {
939 for (k = A->
IA[i]; k < A->IA[i + 1]; ++k) {
940 for (j = B->
IA[A->
JA[k]]; j < B->IA[A->
JA[k] + 1]; ++j) {
941 for (l = 0; l < countJD; l++) {
942 if (JD[l] == B->
JA[j])
break;
946 C->
JA[count] = B->
JA[j];
947 JD[countJD] = B->
JA[j];
964 for (i = 0; i < C->
row; ++i) {
965 for (j = C->
IA[i]; j < C->IA[i + 1]; ++j) {
967 for (k = A->
IA[i]; k < A->IA[i + 1]; ++k) {
968 for (l = B->
IA[A->
JA[k]]; l < B->IA[A->
JA[k] + 1]; l++) {
969 if (B->
JA[l] == C->
JA[j]) {
1004 const INT n_coarse = R->
row;
1005 const INT* R_i = R->
IA;
1006 const INT* R_j = R->
JA;
1009 const INT n_fine = A->
row;
1010 const INT* A_i = A->
IA;
1011 const INT* A_j = A->
JA;
1014 const INT* P_i = P->
IA;
1015 const INT* P_j = P->
JA;
1021 REAL* RAP_data = NULL;
1024 INT* P_marker = NULL;
1025 INT* A_marker = NULL;
1028 INT* Ps_marker = NULL;
1029 INT* As_marker = NULL;
1031 INT ic, i1, i2, i3, jj1, jj2, jj3;
1032 INT jj_counter, jj_row_begining;
1033 REAL r_entry, r_a_product, r_a_p_product;
1038 INT myid, mybegin, myend, Ctemp;
1039 nthreads = fasp_get_num_threads();
1042 INT coarse_mul_nthreads = n_coarse * nthreads;
1043 INT fine_mul_nthreads = n_fine * nthreads;
1044 INT coarse_add_nthreads = n_coarse + nthreads;
1045 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
1046 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
1049 As_marker = Ps_marker + coarse_mul_nthreads;
1059 INT* RAP_temp = As_marker + fine_mul_nthreads;
1060 INT* part_end = RAP_temp + coarse_add_nthreads;
1063#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
1064 jj_counter, ic, jj_row_begining, jj1, i1, jj2, \
1066 for (myid = 0; myid < nthreads; myid++) {
1068 P_marker = Ps_marker + myid * n_coarse;
1069 A_marker = As_marker + myid * n_fine;
1071 for (ic = mybegin; ic < myend; ic++) {
1072 P_marker[ic] = jj_counter;
1073 jj_row_begining = jj_counter;
1076 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1078 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1080 if (A_marker[i2] != ic) {
1082 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1084 if (P_marker[i3] < jj_row_begining) {
1085 P_marker[i3] = jj_counter;
1093 RAP_temp[ic + myid] = jj_row_begining;
1095 RAP_temp[myend + myid] = jj_counter;
1097 part_end[myid] = myend + myid + 1;
1100 jj_counter = part_end[0];
1102 for (i1 = 1; i1 < nthreads; i1++) {
1103 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
1104 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
1105 RAP_i[jj_counter] = RAP_temp[jj1] + Ctemp;
1109 RAP_size = RAP_i[n_coarse];
1115 for (ic = 0; ic < n_coarse; ic++) {
1116 Ps_marker[ic] = jj_counter;
1117 jj_row_begining = jj_counter;
1120 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1123 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1125 if (As_marker[i2] != ic) {
1127 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1129 if (Ps_marker[i3] < jj_row_begining) {
1130 Ps_marker[i3] = jj_counter;
1138 RAP_i[ic] = jj_row_begining;
1141 RAP_i[n_coarse] = jj_counter;
1142 RAP_size = jj_counter;
1154#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, jj_counter, \
1155 ic, jj_row_begining, jj1, r_entry, i1, jj2, \
1156 r_a_product, i2, jj3, r_a_p_product, i3)
1157 for (myid = 0; myid < nthreads; myid++) {
1159 P_marker = Ps_marker + myid * n_coarse;
1160 A_marker = As_marker + myid * n_fine;
1161 jj_counter = RAP_i[mybegin];
1162 for (ic = mybegin; ic < myend; ic++) {
1163 P_marker[ic] = jj_counter;
1164 jj_row_begining = jj_counter;
1165 RAP_j[jj_counter] = ic;
1166 RAP_data[jj_counter] = 0.0;
1168 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1169 r_entry = R_data[jj1];
1172 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1173 r_a_product = r_entry * A_data[jj2];
1176 if (A_marker[i2] != ic) {
1178 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1179 r_a_p_product = r_a_product * P_data[jj3];
1182 if (P_marker[i3] < jj_row_begining) {
1183 P_marker[i3] = jj_counter;
1184 RAP_data[jj_counter] = r_a_p_product;
1185 RAP_j[jj_counter] = i3;
1188 RAP_data[P_marker[i3]] += r_a_p_product;
1192 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1194 r_a_p_product = r_a_product * P_data[jj3];
1195 RAP_data[P_marker[i3]] += r_a_p_product;
1205 for (ic = 0; ic < n_coarse; ic++) {
1206 Ps_marker[ic] = jj_counter;
1207 jj_row_begining = jj_counter;
1208 RAP_j[jj_counter] = ic;
1209 RAP_data[jj_counter] = 0.0;
1212 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1213 r_entry = R_data[jj1];
1216 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1217 r_a_product = r_entry * A_data[jj2];
1220 if (As_marker[i2] != ic) {
1222 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1223 r_a_p_product = r_a_product * P_data[jj3];
1226 if (Ps_marker[i3] < jj_row_begining) {
1227 Ps_marker[i3] = jj_counter;
1228 RAP_data[jj_counter] = r_a_p_product;
1229 RAP_j[jj_counter] = i3;
1232 RAP_data[Ps_marker[i3]] += r_a_p_product;
1236 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1238 r_a_p_product = r_a_product * P_data[jj3];
1239 RAP_data[Ps_marker[i3]] += r_a_p_product;
1249 RAP->
row = n_coarse;
1250 RAP->
col = n_coarse;
1251 RAP->
nnz = RAP_size;
1254 RAP->
val = RAP_data;
1281 const INT n_coarse = R->
row;
1282 const INT* R_i = R->
IA;
1283 const INT* R_j = R->
JA;
1285 const INT n_fine = A->
row;
1286 const INT* A_i = A->
IA;
1287 const INT* A_j = A->
JA;
1290 const INT* P_i = P->
IA;
1291 const INT* P_j = P->
JA;
1296 REAL* RAP_data = NULL;
1299 INT* P_marker = NULL;
1300 INT* A_marker = NULL;
1303 INT* Ps_marker = NULL;
1304 INT* As_marker = NULL;
1306 INT ic, i1, i2, i3, jj1, jj2, jj3;
1307 INT jj_counter, jj_row_begining;
1312 INT myid, mybegin, myend, Ctemp;
1313 nthreads = fasp_get_num_threads();
1316 INT coarse_mul_nthreads = n_coarse * nthreads;
1317 INT fine_mul_nthreads = n_fine * nthreads;
1318 INT coarse_add_nthreads = n_coarse + nthreads;
1319 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
1320 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
1323 As_marker = Ps_marker + coarse_mul_nthreads;
1333 INT* RAP_temp = As_marker + fine_mul_nthreads;
1334 INT* part_end = RAP_temp + coarse_add_nthreads;
1337#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
1338 jj_counter, ic, jj_row_begining, jj1, i1, jj2, \
1340 for (myid = 0; myid < nthreads; myid++) {
1342 P_marker = Ps_marker + myid * n_coarse;
1343 A_marker = As_marker + myid * n_fine;
1345 for (ic = mybegin; ic < myend; ic++) {
1346 P_marker[ic] = jj_counter;
1347 jj_row_begining = jj_counter;
1350 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1352 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1354 if (A_marker[i2] != ic) {
1356 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1358 if (P_marker[i3] < jj_row_begining) {
1359 P_marker[i3] = jj_counter;
1367 RAP_temp[ic + myid] = jj_row_begining;
1369 RAP_temp[myend + myid] = jj_counter;
1371 part_end[myid] = myend + myid + 1;
1374 jj_counter = part_end[0];
1376 for (i1 = 1; i1 < nthreads; i1++) {
1377 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
1378 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
1379 RAP_i[jj_counter] = RAP_temp[jj1] + Ctemp;
1383 RAP_size = RAP_i[n_coarse];
1389 for (ic = 0; ic < n_coarse; ic++) {
1390 Ps_marker[ic] = jj_counter;
1391 jj_row_begining = jj_counter;
1394 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1397 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1399 if (As_marker[i2] != ic) {
1401 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1403 if (Ps_marker[i3] < jj_row_begining) {
1404 Ps_marker[i3] = jj_counter;
1412 RAP_i[ic] = jj_row_begining;
1415 RAP_i[n_coarse] = jj_counter;
1416 RAP_size = jj_counter;
1428#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, jj_counter, \
1429 ic, jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
1430 for (myid = 0; myid < nthreads; myid++) {
1432 P_marker = Ps_marker + myid * n_coarse;
1433 A_marker = As_marker + myid * n_fine;
1434 jj_counter = RAP_i[mybegin];
1435 for (ic = mybegin; ic < myend; ic++) {
1436 P_marker[ic] = jj_counter;
1437 jj_row_begining = jj_counter;
1438 RAP_j[jj_counter] = ic;
1439 RAP_data[jj_counter] = 0.0;
1441 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1444 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1447 if (A_marker[i2] != ic) {
1449 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1452 if (P_marker[i3] < jj_row_begining) {
1453 P_marker[i3] = jj_counter;
1454 RAP_data[jj_counter] = A_data[jj2];
1455 RAP_j[jj_counter] = i3;
1458 RAP_data[P_marker[i3]] += A_data[jj2];
1462 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1464 RAP_data[P_marker[i3]] += A_data[jj2];
1474 for (ic = 0; ic < n_coarse; ic++) {
1475 Ps_marker[ic] = jj_counter;
1476 jj_row_begining = jj_counter;
1477 RAP_j[jj_counter] = ic;
1478 RAP_data[jj_counter] = 0.0;
1481 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1483 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1485 if (As_marker[i2] != ic) {
1487 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1489 if (Ps_marker[i3] < jj_row_begining) {
1490 Ps_marker[i3] = jj_counter;
1491 RAP_data[jj_counter] = A_data[jj2];
1492 RAP_j[jj_counter] = i3;
1495 RAP_data[Ps_marker[i3]] += A_data[jj2];
1499 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1501 RAP_data[Ps_marker[i3]] += A_data[jj2];
1511 RAP->
row = n_coarse;
1512 RAP->
col = n_coarse;
1513 RAP->
nnz = RAP_size;
1516 RAP->
val = RAP_data;
1545 const INT * ir = R->
IA, *ia = A->
IA, *ip = P->
IA;
1546 const INT * jr = R->
JA, *ja = A->
JA, *jp = P->
JA;
1556 INT i, i1, j, jj, k, length;
1557 INT begin_row, end_row, begin_rowA, end_rowA, begin_rowR, end_rowR;
1558 INT istart, iistart, count;
1575 for (i = 0; i < row; ++i) {
1584 for (jj = begin_rowR; jj < end_rowR; ++jj) {
1588 end_rowA = ia[j + 1];
1589 for (k = begin_rowA; k < end_rowA; ++k) {
1590 if (index[ja[k]] == -2) {
1591 index[ja[k]] = istart;
1604 for (j = 0; j < count; ++j) {
1606 istart = index[istart];
1611 end_row = ip[jj + 1];
1612 for (k = begin_row; k < end_row; ++k) {
1614 if (iindex[jp[k]] == -2) {
1615 iindex[jp[k]] = iistart;
1623 iac[i1] = iac[i] + length;
1633 for (j = begin_row; j < end_row; ++j) {
1637 iistart = iindex[iistart];
1639 iindex[jac[j]] = -2;
1651 for (i = 0; i < row; ++i) {
1657 for (j = begin_row; j < end_row; ++j) {
1658 BTindex[jac[j]] = j;
1668 for (jj = begin_rowR; jj < end_rowR; ++jj) {
1673 end_rowA = ia[j + 1];
1674 for (k = begin_rowA; k < end_rowA; ++k) {
1675 if (index[ja[k]] == -2) {
1676 index[ja[k]] = istart;
1680 temp[ja[k]] += aj[k];
1686 for (j = 0; j < length; ++j) {
1688 istart = index[istart];
1693 end_row = ip[jj + 1];
1694 for (k = begin_row; k < end_row; ++k) {
1696 acj[BTindex[jp[k]]] += temp[jj];
1755#pragma omp parallel for if (n > OPENMP_HOLDS)
1757 for (i = 0; i <= n; ++i) {
1763#pragma omp parallel for if (nnzA > OPENMP_HOLDS)
1765 for (i = 0; i < nnzA; ++i) {
1770#pragma omp parallel for if (nc > OPENMP_HOLDS)
1772 for (i = 0; i <= nc; ++i) {
1777#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1779 for (i = 0; i < nnzP; ++i) {
1787 Pt->
JA, Pt->
val, n, nc, &maxrpout, P->
IA, P->
JA);
1798#pragma omp parallel for if (Ac->row > OPENMP_HOLDS)
1800 for (i = 0; i <= Ac->
row; ++i) Ac->
IA[i]--;
1805 for (i = 0; i < Ac->
nnz; ++i) Ac->
JA[i]--;
1810 for (i = 0; i <= n; ++i) A->
IA[i]--;
1815 for (i = 0; i < nnzA; ++i) A->
JA[i]--;
1820 for (i = 0; i <= n; ++i) P->
IA[i]--;
1825 for (i = 0; i < nnzP; ++i) P->
JA[i]--;
1830 for (i = 0; i <= nc; ++i) Pt->
IA[i]--;
1835 for (i = 0; i < nnzP; ++i) Pt->
JA[i]--;
1871 INT n1, nc1, nnzp, maxrp;
1872 INT * ip = NULL, *jp = NULL;
1884 ip = (
INT*)calloc(n1,
sizeof(
INT));
1885 jp = (
INT*)calloc(nnzp,
sizeof(
INT));
1904 ac.
IA = (
INT*)calloc(nc1,
sizeof(
INT));
1911 fasp_sparse_rapms_(ir, jr, ia, ja, ip, jp, &n, &nc, ac.
IA, ac.
JA, &maxrp);
1912 ac.
nnz = ac.
IA[nc] - 1;
1918 fasp_sparse_rapms_(ir, jr, ia, ja, ip, jp, &n, &nc, ac.
IA, ac.
JA, &maxrp);
1925 fasp_sparse_rapcmp_(ir, jr, r, ia, ja, a, ipt, jpt, pt, &n, &nc, ac.
IA, ac.
JA,
1961 nthreads = fasp_get_num_threads();
1967 INT * ir = R->
IA, *ia = A->
IA, *ip = P->
IA;
1968 INT * jr = R->
JA, *ja = A->
JA, *jp = P->
JA;
1970 INT istart, iistart;
1971 INT end_row, end_rowA, end_rowR;
1972 INT i, j, jj, k, length, myid, mybegin, myend;
1973 INT jj_counter, ic, jj_row_begining, jj1, i1, jj2, i2, jj3, i3;
1976 INT* BTindex = NULL;
1978 INT FiveMyid, min_A, min_P, A_pos, P_pos, FiveIc;
1979 INT minus_one_length_A = icor_ysk[5 * nthreads];
1980 INT minus_one_length_P = icor_ysk[5 * nthreads + 1];
1981 INT minus_one_length = minus_one_length_A + minus_one_length_P;
1989 INT* indexs = iindexs + minus_one_length_P;
1990 INT* BTindexs = indexs + minus_one_length_A;
2004 INT* iac_temp = part_end + nthreads;
2011#pragma omp parallel for private(myid, FiveMyid, mybegin, myend, min_A, min_P, index, \
2012 iindex, A_pos, P_pos, ic, FiveIc, jj_counter, \
2013 jj_row_begining, end_rowR, jj1, i1, end_rowA, \
2014 jj2, i2, end_row, jj3, i3)
2016 for (myid = 0; myid < nthreads; myid++) {
2017 FiveMyid = myid * 5;
2018 mybegin = icor_ysk[FiveMyid];
2019 if (myid == nthreads - 1) {
2022 myend = icor_ysk[FiveMyid + 5];
2024 min_A = icor_ysk[FiveMyid + 2];
2025 min_P = icor_ysk[FiveMyid + 4];
2028 for (ic = myid - 1; ic >= 0; ic--) {
2030 A_pos += icor_ysk[FiveIc + 1];
2031 P_pos += icor_ysk[FiveIc + 3];
2033 iindex_array[myid] = iindex = iindexs + P_pos - min_P;
2034 index_array[myid] = index = indexs + A_pos - min_A;
2036 for (ic = mybegin; ic < myend; ic++) {
2037 iindex[ic] = jj_counter;
2038 jj_row_begining = jj_counter;
2040 end_rowR = ir[ic + 1];
2041 for (jj1 = ir[ic]; jj1 < end_rowR; jj1++) {
2043 end_rowA = ia[i1 + 1];
2044 for (jj2 = ia[i1]; jj2 < end_rowA; jj2++) {
2046 if (index[i2] != ic) {
2048 end_row = ip[i2 + 1];
2049 for (jj3 = ip[i2]; jj3 < end_row; jj3++) {
2051 if (iindex[i3] < jj_row_begining) {
2052 iindex[i3] = jj_counter;
2059 iac_temp[ic + myid] = jj_row_begining;
2061 iac_temp[myend + myid] = jj_counter;
2062 part_end[myid] = myend + myid + 1;
2065 jj_counter = part_end[0];
2067 for (i1 = 1; i1 < nthreads; i1++) {
2068 Ctemp += iac_temp[part_end[i1 - 1] - 1];
2069 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
2070 iac[jj_counter] = iac_temp[jj1] + Ctemp;
2080#pragma omp parallel for private(myid, index, iindex, FiveMyid, mybegin, myend, i, \
2081 istart, length, i1, end_rowR, jj, j, end_rowA, k, \
2084 for (myid = 0; myid < nthreads; myid++) {
2085 iindex = iindex_array[myid];
2086 index = index_array[myid];
2087 FiveMyid = myid * 5;
2088 mybegin = icor_ysk[FiveMyid];
2089 if (myid == nthreads - 1) {
2092 myend = icor_ysk[FiveMyid + 5];
2094 for (i = mybegin; i < myend; ++i) {
2100 for (jj = ir[i]; jj < end_rowR; ++jj) {
2103 end_rowA = ia[j + 1];
2104 for (k = ia[j]; k < end_rowA; ++k) {
2105 if (index[ja[k]] == -2) {
2106 index[ja[k]] = istart;
2118 for (j = 0; j < length; ++j) {
2120 istart = index[istart];
2123 end_row = ip[jj + 1];
2124 for (k = ip[jj]; k < end_row; ++k) {
2126 if (iindex[jp[k]] == -2) {
2127 iindex[jp[k]] = iistart;
2135 for (j = iac[i]; j < end_row; ++j) {
2139 iistart = iindex[iistart];
2141 iindex[jac[j]] = -2;
2156#pragma omp parallel for private(myid, index, FiveMyid, mybegin, myend, min_A, min_P, \
2157 A_pos, P_pos, ic, FiveIc, BTindex, temp, i, i1, \
2158 end_row, j, istart, length, end_rowR, jj, \
2161 for (myid = 0; myid < nthreads; myid++) {
2162 index = index_array[myid];
2163 FiveMyid = myid * 5;
2164 mybegin = icor_ysk[FiveMyid];
2165 if (myid == nthreads - 1) {
2168 myend = icor_ysk[FiveMyid + 5];
2170 min_A = icor_ysk[FiveMyid + 2];
2171 min_P = icor_ysk[FiveMyid + 4];
2174 for (ic = myid - 1; ic >= 0; ic--) {
2176 A_pos += icor_ysk[FiveIc + 1];
2177 P_pos += icor_ysk[FiveIc + 3];
2179 BTindex = BTindexs + P_pos - min_P;
2180 temp = temps + A_pos - min_A;
2181 for (i = mybegin; i < myend; ++i) {
2185 for (j = iac[i]; j < end_row; ++j) {
2186 BTindex[jac[j]] = j;
2193 for (jj = ir[i]; jj < end_rowR; ++jj) {
2196 end_rowA = ia[j + 1];
2197 for (k = ia[j]; k < end_rowA; ++k) {
2198 if (index[ja[k]] == -2) {
2199 index[ja[k]] = istart;
2203 temp[ja[k]] += rj[jj] * aj[k];
2208 for (j = 0; j < length; ++j) {
2210 istart = index[istart];
2213 end_row = ip[jj + 1];
2214 for (k = ip[jj]; k < end_row; ++k) {
2216 acj[BTindex[jp[k]]] += temp[jj] * pj[k];
2237 iindex_array = NULL;
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
void fasp_dcsr_alloc(const INT m, const INT n, const INT nnz, dCSRmat *A)
Allocate CSR sparse matrix memory space.
void fasp_sparse_rapms_(INT *ir, INT *jr, INT *ia, INT *ja, INT *ip, INT *jp, INT *nin, INT *ncin, INT *iac, INT *jac, INT *maxrout)
Calculates the nonzero structure of R*A*P, if jac is not null. If jac is null only finds num of nonze...
void fasp_sparse_iit_(INT *ia, INT *ja, INT *na, INT *ma, INT *iat, INT *jat)
Transpose a boolean matrix (only given by ia, ja)
void fasp_sparse_rapcmp_(INT *ir, INT *jr, REAL *r, INT *ia, INT *ja, REAL *a, INT *ipt, INT *jpt, REAL *pt, INT *nin, INT *ncin, INT *iac, INT *jac, REAL *ac, INT *idummy)
Calculates R*A*P after the nonzero structure of the result is known. iac,jac,ac have to be allocated ...
void fasp_blas_dcsr_mxv_agg(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x (nonzeros of A = 1)
void fasp_blas_ldcsr_aAxpy(const REAL alpha, const dCSRmat *A, const LONGREAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
void fasp_blas_dcsr_axm(dCSRmat *A, const REAL alpha)
Multiply a sparse matrix A in CSR format by a scalar alpha.
void fasp_blas_dcsr_mxv(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
dCSRmat fasp_blas_dcsr_rap2(INT *ir, INT *jr, REAL *r, INT *ia, INT *ja, REAL *a, INT *ipt, INT *jpt, REAL *pt, INT n, INT nc, INT *maxrpout, INT *ipin, INT *jpin)
Compute R*A*P.
void fasp_blas_dcsr_aAxpy_agg(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y (nonzeros of A = 1)
unsigned long total_alloc_mem
void fasp_blas_dcsr_mxm(const dCSRmat *A, const dCSRmat *B, dCSRmat *C)
Sparse matrix multiplication C=A*B.
void fasp_blas_dcsr_rap_agg1(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *B)
Triple sparse matrix multiplication B=R*A*P (nonzeros of R, P = 1)
void fasp_blas_dcsr_rap_agg(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *RAP)
Triple sparse matrix multiplication B=R*A*P (nonzeros of R, P = 1)
REAL fasp_blas_dcsr_vmv(const dCSRmat *A, const REAL *x, const REAL *y)
vector-Matrix-vector multiplication alpha = y'*A*x
SHORT fasp_blas_dcsr_add(const dCSRmat *A, const REAL alpha, const dCSRmat *B, const REAL beta, dCSRmat *C)
compute C = alpha*A + beta*B in CSR format
unsigned long total_alloc_count
void fasp_blas_dcsr_rap4(dCSRmat *R, dCSRmat *A, dCSRmat *P, dCSRmat *B, INT *icor_ysk)
Triple sparse matrix multiplication B=R*A*P.
void fasp_blas_dcsr_rap(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *RAP)
Triple sparse matrix multiplication B=R*A*P.
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
void fasp_blas_dcsr_ptap(const dCSRmat *Pt, const dCSRmat *A, const dCSRmat *P, dCSRmat *Ac)
Triple sparse matrix multiplication B=P'*A*P.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define FASP_SUCCESS
Definition of return status and error messages.
#define TRUE
Definition of logic type.
Sparse matrix of REAL type in CSR format.
INT col
column of matrix A, n
REAL * val
nonzero entries of A
INT row
row number of matrix A, m
INT * IA
integer array of row pointers, the size is m+1
INT nnz
number of nonzero entries
INT * JA
integer array of column indexes, the size is nnz