Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
Loading...
Searching...
No Matches
BlaSpmvCSR.c
Go to the documentation of this file.
1
24#include <math.h>
25#include <time.h>
26
27#ifdef _OPENMP
28#include <omp.h>
29#endif
30
31#include "fasp.h"
32#include "fasp_functs.h"
33
34extern unsigned long total_alloc_mem;
35extern unsigned long total_alloc_count;
37/*---------------------------------*/
38/*-- Public Functions --*/
39/*---------------------------------*/
40
61 const dCSRmat* A, const REAL alpha, const dCSRmat* B, const REAL beta, dCSRmat* C)
62{
63 INT i, j, k, l;
64 INT count = 0, added, countrow;
65
66 SHORT status = FASP_SUCCESS, use_openmp = FALSE;
67
68#ifdef _OPENMP
69 INT mybegin, myend, myid, nthreads;
70 if (A->nnz > OPENMP_HOLDS) {
71 use_openmp = TRUE;
72 nthreads = fasp_get_num_threads();
73 }
74#endif
75
76 if (A->row != B->row || A->col != B->col) {
77 printf("### ERROR: Matrix sizes do not match!\n");
78 status = ERROR_MAT_SIZE;
79 goto FINISHED;
80 }
81
82 if (A == NULL && B == NULL) {
83 C->row = 0;
84 C->col = 0;
85 C->nnz = 0;
86 status = FASP_SUCCESS;
87 goto FINISHED;
88 }
89
90 if (A->nnz == 0 && B->nnz == 0) {
91 C->row = A->row;
92 C->col = A->col;
93 C->nnz = A->nnz;
94 status = FASP_SUCCESS;
95 goto FINISHED;
96 }
97
98 // empty matrix A
99 if (A->nnz == 0 || A == NULL) {
100 fasp_dcsr_alloc(B->row, B->col, B->nnz, C);
101 memcpy(C->IA, B->IA, (B->row + 1) * sizeof(INT));
102 memcpy(C->JA, B->JA, (B->nnz) * sizeof(INT));
103
104 if (use_openmp) {
105#ifdef _OPENMP
106#pragma omp parallel private(myid, mybegin, myend, i)
107 {
108 myid = omp_get_thread_num();
109 fasp_get_start_end(myid, nthreads, B->nnz, &mybegin, &myend);
110 for (i = mybegin; i < myend; ++i) C->val[i] = B->val[i] * beta;
111 }
112#endif
113 } else {
114 for (i = 0; i < B->nnz; ++i)
115 C->val[i] = B->val[i] * beta; // revised bugs, Li Zhao 07/03/2023
116 }
117
118 status = FASP_SUCCESS;
119 goto FINISHED;
120 }
121
122 // empty matrix B
123 if (B->nnz == 0 || B == NULL) {
124 fasp_dcsr_alloc(A->row, A->col, A->nnz, C);
125 memcpy(C->IA, A->IA, (A->row + 1) * sizeof(INT));
126 memcpy(C->JA, A->JA, (A->nnz) * sizeof(INT));
127
128 if (use_openmp) {
129#ifdef _OPENMP
130 INT mybegin, myend, myid;
131#pragma omp parallel private(myid, mybegin, myend, i)
132 {
133 myid = omp_get_thread_num();
134 fasp_get_start_end(myid, nthreads, A->nnz, &mybegin, &myend);
135 for (i = mybegin; i < myend; ++i) C->val[i] = A->val[i] * alpha;
136 }
137#endif
138 } else {
139 for (i = 0; i < A->nnz; ++i) C->val[i] = A->val[i] * alpha;
140 }
141
142 status = FASP_SUCCESS;
143 goto FINISHED;
144 }
145
146 C->row = A->row;
147 C->col = A->col;
148
149 C->IA = (INT*)fasp_mem_calloc(C->row + 1, sizeof(INT));
150
151 // allocate work space for C->JA and C->val
152 C->JA = (INT*)fasp_mem_calloc(A->nnz + B->nnz, sizeof(INT));
153
154 C->val = (REAL*)fasp_mem_calloc(A->nnz + B->nnz, sizeof(REAL));
155
156 // initial C->IA
157 memset(C->IA, 0, sizeof(INT) * (C->row + 1));
158 memset(C->JA, -1, sizeof(INT) * (A->nnz + B->nnz));
159
160 for (i = 0; i < A->row; ++i) {
161 countrow = 0;
162 for (j = A->IA[i]; j < A->IA[i + 1]; ++j) {
163 C->val[count] = alpha * A->val[j];
164 C->JA[count] = A->JA[j];
165 C->IA[i + 1]++;
166 count++;
167 countrow++;
168 } // end for js
169
170 for (k = B->IA[i]; k < B->IA[i + 1]; ++k) {
171 added = 0;
172
173 for (l = C->IA[i]; l < C->IA[i] + countrow + 1; l++) {
174 if (B->JA[k] == C->JA[l]) {
175 C->val[l] = C->val[l] + beta * B->val[k];
176 added = 1;
177 break;
178 }
179 } // end for l
180
181 if (added == 0) {
182 C->val[count] = beta * B->val[k];
183 C->JA[count] = B->JA[k];
184 C->IA[i + 1]++;
185 count++;
186 }
187
188 } // end for k
189
190 C->IA[i + 1] += C->IA[i];
191 }
192
193 C->nnz = count;
194 C->JA = (INT*)fasp_mem_realloc(C->JA, (count) * sizeof(INT));
195 C->val = (REAL*)fasp_mem_realloc(C->val, (count) * sizeof(REAL));
196
197#if MULTI_COLOR_ORDER
198 C->color = 0;
199 C->IC = NULL;
200 C->ICMAP = NULL;
201#endif
202
203FINISHED:
204 return status;
205}
206
220void fasp_blas_dcsr_axm(dCSRmat* A, const REAL alpha)
221{
222 const INT nnz = A->nnz;
223
224 // A direct calculation can be written as:
225 fasp_blas_darray_ax(nnz, alpha, A->val);
226}
227
242void fasp_blas_dcsr_mxv(const dCSRmat* A, const REAL* x, REAL* y)
243{
244 const INT m = A->row;
245 const INT * ia = A->IA, *ja = A->JA;
246 const REAL* aj = A->val;
247
248 INT i, k, begin_row, end_row, nnz_row;
249 register REAL temp;
250
251 SHORT nthreads = 1, use_openmp = FALSE;
252
253#ifdef _OPENMP
254 if (m > OPENMP_HOLDS) {
255 use_openmp = TRUE;
256 nthreads = fasp_get_num_threads();
257 }
258#endif
259
260 if (use_openmp) {
261 INT myid, mybegin, myend;
262
263#ifdef _OPENMP
264#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, \
265 nnz_row, k)
266#endif
267 for (myid = 0; myid < nthreads; myid++) {
268 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
269 for (i = mybegin; i < myend; ++i) {
270 temp = 0.0;
271 begin_row = ia[i];
272 end_row = ia[i + 1];
273 nnz_row = end_row - begin_row;
274 switch (nnz_row) {
275 case 3:
276 k = begin_row;
277 temp += aj[k] * x[ja[k]];
278 k++;
279 temp += aj[k] * x[ja[k]];
280 k++;
281 temp += aj[k] * x[ja[k]];
282 break;
283 case 4:
284 k = begin_row;
285 temp += aj[k] * x[ja[k]];
286 k++;
287 temp += aj[k] * x[ja[k]];
288 k++;
289 temp += aj[k] * x[ja[k]];
290 k++;
291 temp += aj[k] * x[ja[k]];
292 break;
293 case 5:
294 k = begin_row;
295 temp += aj[k] * x[ja[k]];
296 k++;
297 temp += aj[k] * x[ja[k]];
298 k++;
299 temp += aj[k] * x[ja[k]];
300 k++;
301 temp += aj[k] * x[ja[k]];
302 k++;
303 temp += aj[k] * x[ja[k]];
304 break;
305 case 6:
306 k = begin_row;
307 temp += aj[k] * x[ja[k]];
308 k++;
309 temp += aj[k] * x[ja[k]];
310 k++;
311 temp += aj[k] * x[ja[k]];
312 k++;
313 temp += aj[k] * x[ja[k]];
314 k++;
315 temp += aj[k] * x[ja[k]];
316 k++;
317 temp += aj[k] * x[ja[k]];
318 break;
319 case 7:
320 k = begin_row;
321 temp += aj[k] * x[ja[k]];
322 k++;
323 temp += aj[k] * x[ja[k]];
324 k++;
325 temp += aj[k] * x[ja[k]];
326 k++;
327 temp += aj[k] * x[ja[k]];
328 k++;
329 temp += aj[k] * x[ja[k]];
330 k++;
331 temp += aj[k] * x[ja[k]];
332 k++;
333 temp += aj[k] * x[ja[k]];
334 break;
335 default:
336 for (k = begin_row; k < end_row; ++k) {
337 temp += aj[k] * x[ja[k]];
338 }
339 break;
340 }
341 y[i] = temp;
342 }
343 }
344 }
345
346 else {
347 for (i = 0; i < m; ++i) {
348 temp = 0.0;
349 begin_row = ia[i];
350 end_row = ia[i + 1];
351 nnz_row = end_row - begin_row;
352 switch (nnz_row) {
353 case 3:
354 k = begin_row;
355 temp += aj[k] * x[ja[k]];
356 k++;
357 temp += aj[k] * x[ja[k]];
358 k++;
359 temp += aj[k] * x[ja[k]];
360 break;
361 case 4:
362 k = begin_row;
363 temp += aj[k] * x[ja[k]];
364 k++;
365 temp += aj[k] * x[ja[k]];
366 k++;
367 temp += aj[k] * x[ja[k]];
368 k++;
369 temp += aj[k] * x[ja[k]];
370 break;
371 case 5:
372 k = begin_row;
373 temp += aj[k] * x[ja[k]];
374 k++;
375 temp += aj[k] * x[ja[k]];
376 k++;
377 temp += aj[k] * x[ja[k]];
378 k++;
379 temp += aj[k] * x[ja[k]];
380 k++;
381 temp += aj[k] * x[ja[k]];
382 break;
383 case 6:
384 k = begin_row;
385 temp += aj[k] * x[ja[k]];
386 k++;
387 temp += aj[k] * x[ja[k]];
388 k++;
389 temp += aj[k] * x[ja[k]];
390 k++;
391 temp += aj[k] * x[ja[k]];
392 k++;
393 temp += aj[k] * x[ja[k]];
394 k++;
395 temp += aj[k] * x[ja[k]];
396 break;
397 case 7:
398 k = begin_row;
399 temp += aj[k] * x[ja[k]];
400 k++;
401 temp += aj[k] * x[ja[k]];
402 k++;
403 temp += aj[k] * x[ja[k]];
404 k++;
405 temp += aj[k] * x[ja[k]];
406 k++;
407 temp += aj[k] * x[ja[k]];
408 k++;
409 temp += aj[k] * x[ja[k]];
410 k++;
411 temp += aj[k] * x[ja[k]];
412 break;
413 default:
414 for (k = begin_row; k < end_row; ++k) {
415 temp += aj[k] * x[ja[k]];
416 }
417 break;
418 }
419 y[i] = temp;
420 }
421 }
422}
423
438void fasp_blas_dcsr_mxv_agg(const dCSRmat* A, const REAL* x, REAL* y)
439{
440 const INT m = A->row;
441 const INT * ia = A->IA, *ja = A->JA;
442 INT i, k, begin_row, end_row;
443 register REAL temp;
444
445#ifdef _OPENMP
446 // variables for OpenMP
447 INT myid, mybegin, myend;
448 INT nthreads = fasp_get_num_threads();
449#endif
450
451#ifdef _OPENMP
452 if (m > OPENMP_HOLDS) {
453#pragma omp parallel for private(myid, i, mybegin, myend, temp, begin_row, end_row, k)
454 for (myid = 0; myid < nthreads; myid++) {
455 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
456 for (i = mybegin; i < myend; i++) {
457 temp = 0.0;
458 begin_row = ia[i];
459 end_row = ia[i + 1];
460 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
461 y[i] = temp;
462 }
463 }
464 } else {
465#endif
466 for (i = 0; i < m; ++i) {
467 temp = 0.0;
468 begin_row = ia[i];
469 end_row = ia[i + 1];
470 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
471 y[i] = temp;
472 }
473#ifdef _OPENMP
474 }
475#endif
476}
477
494void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat* A, const REAL* x, REAL* y)
495{
496 const INT m = A->row;
497 const INT * ia = A->IA, *ja = A->JA;
498 const REAL* aj = A->val;
499 INT i, k, begin_row, end_row;
500 register REAL temp;
501 SHORT nthreads = 1, use_openmp = FALSE;
502
503#ifdef _OPENMP
504 if (m > OPENMP_HOLDS) {
505 use_openmp = TRUE;
506 nthreads = fasp_get_num_threads();
507 }
508#endif
509 if (alpha == 1.0) {
510 if (use_openmp) {
511 INT myid, mybegin, myend;
512#ifdef _OPENMP
513#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
514#endif
515 for (myid = 0; myid < nthreads; myid++) {
516 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
517 for (i = mybegin; i < myend; ++i) {
518 temp = 0.0;
519 begin_row = ia[i];
520 end_row = ia[i + 1];
521 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
522 y[i] += temp;
523 }
524 }
525 } else {
526 for (i = 0; i < m; ++i) {
527 temp = 0.0;
528 begin_row = ia[i];
529 end_row = ia[i + 1];
530 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
531 y[i] += temp;
532 }
533 }
534 }
535
536 else if (alpha == -1.0) {
537 if (use_openmp) {
538 INT myid, mybegin, myend;
539 INT S = 0;
540#ifdef _OPENMP
541#pragma omp parallel for private(myid, mybegin, myend, temp, i, begin_row, end_row, k)
542#endif
543
544 for (myid = 0; myid < nthreads; myid++) {
545 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
546 for (i = mybegin; i < myend; ++i) {
547 temp = 0.0;
548 begin_row = ia[i];
549 end_row = ia[i + 1];
550 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
551 y[i] -= temp;
552 }
553 }
554 } else {
555 for (i = 0; i < m; ++i) {
556 temp = 0.0;
557 begin_row = ia[i];
558 end_row = ia[i + 1];
559 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
560 y[i] -= temp;
561 }
562 }
563 }
564
565 else {
566 if (use_openmp) {
567 INT myid, mybegin, myend;
568#ifdef _OPENMP
569#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
570#endif
571 for (myid = 0; myid < nthreads; myid++) {
572 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
573 for (i = mybegin; i < myend; ++i) {
574 temp = 0.0;
575 begin_row = ia[i];
576 end_row = ia[i + 1];
577 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
578 y[i] += temp * alpha;
579 }
580 }
581 } else {
582 for (i = 0; i < m; ++i) {
583 temp = 0.0;
584 begin_row = ia[i];
585 end_row = ia[i + 1];
586 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
587 y[i] += temp * alpha;
588 }
589 }
590 }
591}
592
610 const dCSRmat* A,
611 const LONGREAL* x,
612 REAL* y)
613{
614 const INT m = A->row;
615 const INT * ia = A->IA, *ja = A->JA;
616 const REAL* aj = A->val;
617 INT i, k, begin_row, end_row;
618 register LONGREAL temp;
619
620 SHORT nthreads = 1, use_openmp = FALSE;
621
622#ifdef _OPENMP
623 if (m > OPENMP_HOLDS) {
624 use_openmp = TRUE;
625 nthreads = fasp_get_num_threads();
626 }
627#endif
628
629 if (alpha == 1.0) {
630 if (use_openmp) {
631 INT myid, mybegin, myend;
632#ifdef _OPENMP
633#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
634#endif
635 for (myid = 0; myid < nthreads; myid++) {
636 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
637 for (i = mybegin; i < myend; ++i) {
638 temp = 0.0;
639 begin_row = ia[i];
640 end_row = ia[i + 1];
641 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
642 y[i] += temp;
643 }
644 }
645 } else {
646 for (i = 0; i < m; ++i) {
647 temp = 0.0;
648 begin_row = ia[i];
649 end_row = ia[i + 1];
650 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
651 y[i] += temp;
652 }
653 }
654 }
655
656 else if (alpha == -1.0) {
657 if (use_openmp) {
658 INT myid, mybegin, myend;
659#ifdef _OPENMP
660#pragma omp parallel for private(myid, mybegin, myend, temp, i, begin_row, end_row, k)
661#endif
662 for (myid = 0; myid < nthreads; myid++) {
663 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
664 for (i = mybegin; i < myend; ++i) {
665 temp = 0.0;
666 begin_row = ia[i];
667 end_row = ia[i + 1];
668 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
669 y[i] -= temp;
670 }
671 }
672 } else {
673 for (i = 0; i < m; ++i) {
674 temp = 0.0;
675 begin_row = ia[i];
676 end_row = ia[i + 1];
677 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
678 y[i] -= temp;
679 }
680 }
681 }
682
683 else {
684 if (use_openmp) {
685 INT myid, mybegin, myend;
686#ifdef _OPENMP
687#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
688#endif
689 for (myid = 0; myid < nthreads; myid++) {
690 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
691 for (i = mybegin; i < myend; ++i) {
692 temp = 0.0;
693 begin_row = ia[i];
694 end_row = ia[i + 1];
695 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
696 y[i] += temp * alpha;
697 }
698 }
699 } else {
700 for (i = 0; i < m; ++i) {
701 temp = 0.0;
702 begin_row = ia[i];
703 end_row = ia[i + 1];
704 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
705 y[i] += temp * alpha;
706 }
707 }
708 }
709}
710
728 const dCSRmat* A,
729 const REAL* x,
730 REAL* y)
731{
732 const INT m = A->row;
733 const INT *ia = A->IA, *ja = A->JA;
734
735 INT i, k, begin_row, end_row;
736 register REAL temp;
737
738 if (alpha == 1.0) {
739#ifdef _OPENMP
740 if (m > OPENMP_HOLDS) {
741 INT myid, mybegin, myend;
742 INT nthreads = fasp_get_num_threads();
743#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
744 for (myid = 0; myid < nthreads; myid++) {
745 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
746 for (i = mybegin; i < myend; ++i) {
747 temp = 0.0;
748 begin_row = ia[i];
749 end_row = ia[i + 1];
750 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
751 y[i] += temp;
752 }
753 }
754 } else {
755#endif
756 for (i = 0; i < m; ++i) {
757 temp = 0.0;
758 begin_row = ia[i];
759 end_row = ia[i + 1];
760 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
761 y[i] += temp;
762 }
763#ifdef _OPENMP
764 }
765#endif
766 } else if (alpha == -1.0) {
767#ifdef _OPENMP
768 if (m > OPENMP_HOLDS) {
769 INT myid, mybegin, myend;
770 INT nthreads = fasp_get_num_threads();
771#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
772 for (myid = 0; myid < nthreads; myid++) {
773 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
774 for (i = mybegin; i < myend; ++i) {
775 temp = 0.0;
776 begin_row = ia[i];
777 end_row = ia[i + 1];
778 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
779 y[i] -= temp;
780 }
781 }
782 } else {
783#endif
784 for (i = 0; i < m; ++i) {
785 temp = 0.0;
786 begin_row = ia[i];
787 end_row = ia[i + 1];
788 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
789 y[i] -= temp;
790 }
791#ifdef _OPENMP
792 }
793#endif
794 }
795
796 else {
797#ifdef _OPENMP
798 if (m > OPENMP_HOLDS) {
799 INT myid, mybegin, myend;
800 INT nthreads = fasp_get_num_threads();
801#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
802 for (myid = 0; myid < nthreads; myid++) {
803 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
804 for (i = mybegin; i < myend; ++i) {
805 temp = 0.0;
806 begin_row = ia[i];
807 end_row = ia[i + 1];
808 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
809 y[i] += temp * alpha;
810 }
811 }
812 } else {
813#endif
814 for (i = 0; i < m; ++i) {
815 temp = 0.0;
816 begin_row = ia[i];
817 end_row = ia[i + 1];
818 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
819 y[i] += temp * alpha;
820 }
821#ifdef _OPENMP
822 }
823#endif
824 }
825}
826
839REAL fasp_blas_dcsr_vmv(const dCSRmat* A, const REAL* x, const REAL* y)
840{
841 register REAL value = 0.0;
842 const INT m = A->row;
843 const INT * ia = A->IA, *ja = A->JA;
844 const REAL* aj = A->val;
845 INT i, k, begin_row, end_row;
846 register REAL temp;
847
848 SHORT use_openmp = FALSE;
849
850#ifdef _OPENMP
851 if (m > OPENMP_HOLDS) {
852 use_openmp = TRUE;
853 }
854#endif
855
856 if (use_openmp) {
857#ifdef _OPENMP
858#pragma omp parallel for reduction(+ : value) private(i, temp, begin_row, end_row, k)
859#endif
860 for (i = 0; i < m; ++i) {
861 temp = 0.0;
862 begin_row = ia[i];
863 end_row = ia[i + 1];
864 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
865 value += y[i] * temp;
866 }
867 } else {
868 for (i = 0; i < m; ++i) {
869 temp = 0.0;
870 begin_row = ia[i];
871 end_row = ia[i + 1];
872 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
873 value += y[i] * temp;
874 }
875 }
876 return value;
877}
878
893void fasp_blas_dcsr_mxm(const dCSRmat* A, const dCSRmat* B, dCSRmat* C)
894{
895 INT i, j, k, l, count;
896
897 INT* JD = (INT*)fasp_mem_calloc(B->col, sizeof(INT));
898
899 C->row = A->row;
900 C->col = B->col;
901 C->val = NULL;
902 C->JA = NULL;
903 C->IA = (INT*)fasp_mem_calloc(C->row + 1, sizeof(INT));
904
905 for (i = 0; i < B->col; ++i) JD[i] = -1;
906
907 // step 1: Find first the structure IA of C
908 for (i = 0; i < C->row; ++i) {
909 count = 0;
910
911 for (k = A->IA[i]; k < A->IA[i + 1]; ++k) {
912 for (j = B->IA[A->JA[k]]; j < B->IA[A->JA[k] + 1]; ++j) {
913 for (l = 0; l < count; l++) {
914 if (JD[l] == B->JA[j]) break;
915 }
916
917 if (l == count) {
918 JD[count] = B->JA[j];
919 count++;
920 }
921 }
922 }
923 C->IA[i + 1] = count;
924 for (j = 0; j < count; ++j) {
925 JD[j] = -1;
926 }
927 }
928
929 for (i = 0; i < C->row; ++i) C->IA[i + 1] += C->IA[i];
930
931 // step 2: Find the structure JA of C
932 INT countJD;
933
934 C->JA = (INT*)fasp_mem_calloc(C->IA[C->row], sizeof(INT));
935
936 for (i = 0; i < C->row; ++i) {
937 countJD = 0;
938 count = C->IA[i];
939 for (k = A->IA[i]; k < A->IA[i + 1]; ++k) {
940 for (j = B->IA[A->JA[k]]; j < B->IA[A->JA[k] + 1]; ++j) {
941 for (l = 0; l < countJD; l++) {
942 if (JD[l] == B->JA[j]) break;
943 }
944
945 if (l == countJD) {
946 C->JA[count] = B->JA[j];
947 JD[countJD] = B->JA[j];
948 count++;
949 countJD++;
950 }
951 }
952 }
953
954 // for (j=0;j<countJD;++j) JD[j]=-1;
955 fasp_iarray_set(countJD, JD, -1);
956 }
957
958 fasp_mem_free(JD);
959 JD = NULL;
960
961 // step 3: Find the structure A of C
962 C->val = (REAL*)fasp_mem_calloc(C->IA[C->row], sizeof(REAL));
963
964 for (i = 0; i < C->row; ++i) {
965 for (j = C->IA[i]; j < C->IA[i + 1]; ++j) {
966 C->val[j] = 0;
967 for (k = A->IA[i]; k < A->IA[i + 1]; ++k) {
968 for (l = B->IA[A->JA[k]]; l < B->IA[A->JA[k] + 1]; l++) {
969 if (B->JA[l] == C->JA[j]) {
970 C->val[j] += A->val[k] * B->val[l];
971 } // end if
972 } // end for l
973 } // end for k
974 } // end for j
975 } // end for i
976
977 C->nnz = C->IA[C->row] - C->IA[0];
978}
979
1000 const dCSRmat* A,
1001 const dCSRmat* P,
1002 dCSRmat* RAP)
1003{
1004 const INT n_coarse = R->row;
1005 const INT* R_i = R->IA;
1006 const INT* R_j = R->JA;
1007 const REAL* R_data = R->val;
1008
1009 const INT n_fine = A->row;
1010 const INT* A_i = A->IA;
1011 const INT* A_j = A->JA;
1012 const REAL* A_data = A->val;
1013
1014 const INT* P_i = P->IA;
1015 const INT* P_j = P->JA;
1016 const REAL* P_data = P->val;
1017
1018 INT RAP_size;
1019 INT* RAP_i = NULL;
1020 INT* RAP_j = NULL;
1021 REAL* RAP_data = NULL;
1022
1023#ifdef _OPENMP
1024 INT* P_marker = NULL;
1025 INT* A_marker = NULL;
1026#endif
1027
1028 INT* Ps_marker = NULL;
1029 INT* As_marker = NULL;
1030
1031 INT ic, i1, i2, i3, jj1, jj2, jj3;
1032 INT jj_counter, jj_row_begining;
1033 REAL r_entry, r_a_product, r_a_p_product;
1034
1035 INT nthreads = 1;
1036
1037#ifdef _OPENMP
1038 INT myid, mybegin, myend, Ctemp;
1039 nthreads = fasp_get_num_threads();
1040#endif
1041
1042 INT coarse_mul_nthreads = n_coarse * nthreads;
1043 INT fine_mul_nthreads = n_fine * nthreads;
1044 INT coarse_add_nthreads = n_coarse + nthreads;
1045 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
1046 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
1047
1048 Ps_marker = (INT*)fasp_mem_calloc(total_calloc, sizeof(INT));
1049 As_marker = Ps_marker + coarse_mul_nthreads;
1050
1051 /*------------------------------------------------------*
1052 * First Pass: Determine size of RAP and set up RAP_i *
1053 *------------------------------------------------------*/
1054 RAP_i = (INT*)fasp_mem_calloc(n_coarse + 1, sizeof(INT));
1055
1056 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1057
1058#ifdef _OPENMP
1059 INT* RAP_temp = As_marker + fine_mul_nthreads;
1060 INT* part_end = RAP_temp + coarse_add_nthreads;
1061
1062 if (n_coarse > OPENMP_HOLDS) {
1063#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
1064 jj_counter, ic, jj_row_begining, jj1, i1, jj2, \
1065 i2, jj3, i3)
1066 for (myid = 0; myid < nthreads; myid++) {
1067 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1068 P_marker = Ps_marker + myid * n_coarse;
1069 A_marker = As_marker + myid * n_fine;
1070 jj_counter = 0;
1071 for (ic = mybegin; ic < myend; ic++) {
1072 P_marker[ic] = jj_counter;
1073 jj_row_begining = jj_counter;
1074 jj_counter++;
1075
1076 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1077 i1 = R_j[jj1];
1078 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1079 i2 = A_j[jj2];
1080 if (A_marker[i2] != ic) {
1081 A_marker[i2] = ic;
1082 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1083 i3 = P_j[jj3];
1084 if (P_marker[i3] < jj_row_begining) {
1085 P_marker[i3] = jj_counter;
1086 jj_counter++;
1087 }
1088 }
1089 }
1090 }
1091 }
1092
1093 RAP_temp[ic + myid] = jj_row_begining;
1094 }
1095 RAP_temp[myend + myid] = jj_counter;
1096
1097 part_end[myid] = myend + myid + 1;
1098 }
1099 fasp_iarray_cp(part_end[0], RAP_temp, RAP_i);
1100 jj_counter = part_end[0];
1101 Ctemp = 0;
1102 for (i1 = 1; i1 < nthreads; i1++) {
1103 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
1104 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
1105 RAP_i[jj_counter] = RAP_temp[jj1] + Ctemp;
1106 jj_counter++;
1107 }
1108 }
1109 RAP_size = RAP_i[n_coarse];
1110 }
1111
1112 else {
1113#endif
1114 jj_counter = 0;
1115 for (ic = 0; ic < n_coarse; ic++) {
1116 Ps_marker[ic] = jj_counter;
1117 jj_row_begining = jj_counter;
1118 jj_counter++;
1119
1120 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1121 i1 = R_j[jj1];
1122
1123 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1124 i2 = A_j[jj2];
1125 if (As_marker[i2] != ic) {
1126 As_marker[i2] = ic;
1127 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1128 i3 = P_j[jj3];
1129 if (Ps_marker[i3] < jj_row_begining) {
1130 Ps_marker[i3] = jj_counter;
1131 jj_counter++;
1132 }
1133 }
1134 }
1135 }
1136 }
1137
1138 RAP_i[ic] = jj_row_begining;
1139 }
1140
1141 RAP_i[n_coarse] = jj_counter;
1142 RAP_size = jj_counter;
1143#ifdef _OPENMP
1144 }
1145#endif
1146
1147 RAP_j = (INT*)fasp_mem_calloc(RAP_size, sizeof(INT));
1148 RAP_data = (REAL*)fasp_mem_calloc(RAP_size, sizeof(REAL));
1149
1150 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1151
1152#ifdef _OPENMP
1153 if (n_coarse > OPENMP_HOLDS) {
1154#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, jj_counter, \
1155 ic, jj_row_begining, jj1, r_entry, i1, jj2, \
1156 r_a_product, i2, jj3, r_a_p_product, i3)
1157 for (myid = 0; myid < nthreads; myid++) {
1158 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1159 P_marker = Ps_marker + myid * n_coarse;
1160 A_marker = As_marker + myid * n_fine;
1161 jj_counter = RAP_i[mybegin];
1162 for (ic = mybegin; ic < myend; ic++) {
1163 P_marker[ic] = jj_counter;
1164 jj_row_begining = jj_counter;
1165 RAP_j[jj_counter] = ic;
1166 RAP_data[jj_counter] = 0.0;
1167 jj_counter++;
1168 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1169 r_entry = R_data[jj1];
1170
1171 i1 = R_j[jj1];
1172 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1173 r_a_product = r_entry * A_data[jj2];
1174
1175 i2 = A_j[jj2];
1176 if (A_marker[i2] != ic) {
1177 A_marker[i2] = ic;
1178 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1179 r_a_p_product = r_a_product * P_data[jj3];
1180
1181 i3 = P_j[jj3];
1182 if (P_marker[i3] < jj_row_begining) {
1183 P_marker[i3] = jj_counter;
1184 RAP_data[jj_counter] = r_a_p_product;
1185 RAP_j[jj_counter] = i3;
1186 jj_counter++;
1187 } else {
1188 RAP_data[P_marker[i3]] += r_a_p_product;
1189 }
1190 }
1191 } else {
1192 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1193 i3 = P_j[jj3];
1194 r_a_p_product = r_a_product * P_data[jj3];
1195 RAP_data[P_marker[i3]] += r_a_p_product;
1196 }
1197 }
1198 }
1199 }
1200 }
1201 }
1202 } else {
1203#endif
1204 jj_counter = 0;
1205 for (ic = 0; ic < n_coarse; ic++) {
1206 Ps_marker[ic] = jj_counter;
1207 jj_row_begining = jj_counter;
1208 RAP_j[jj_counter] = ic;
1209 RAP_data[jj_counter] = 0.0;
1210 jj_counter++;
1211
1212 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1213 r_entry = R_data[jj1];
1214
1215 i1 = R_j[jj1];
1216 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1217 r_a_product = r_entry * A_data[jj2];
1218
1219 i2 = A_j[jj2];
1220 if (As_marker[i2] != ic) {
1221 As_marker[i2] = ic;
1222 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1223 r_a_p_product = r_a_product * P_data[jj3];
1224
1225 i3 = P_j[jj3];
1226 if (Ps_marker[i3] < jj_row_begining) {
1227 Ps_marker[i3] = jj_counter;
1228 RAP_data[jj_counter] = r_a_p_product;
1229 RAP_j[jj_counter] = i3;
1230 jj_counter++;
1231 } else {
1232 RAP_data[Ps_marker[i3]] += r_a_p_product;
1233 }
1234 }
1235 } else {
1236 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1237 i3 = P_j[jj3];
1238 r_a_p_product = r_a_product * P_data[jj3];
1239 RAP_data[Ps_marker[i3]] += r_a_p_product;
1240 }
1241 }
1242 }
1243 }
1244 }
1245#ifdef _OPENMP
1246 }
1247#endif
1248
1249 RAP->row = n_coarse;
1250 RAP->col = n_coarse;
1251 RAP->nnz = RAP_size;
1252 RAP->IA = RAP_i;
1253 RAP->JA = RAP_j;
1254 RAP->val = RAP_data;
1255
1256 fasp_mem_free(Ps_marker);
1257 Ps_marker = NULL;
1258}
1259
1277 const dCSRmat* A,
1278 const dCSRmat* P,
1279 dCSRmat* RAP)
1280{
1281 const INT n_coarse = R->row;
1282 const INT* R_i = R->IA;
1283 const INT* R_j = R->JA;
1284
1285 const INT n_fine = A->row;
1286 const INT* A_i = A->IA;
1287 const INT* A_j = A->JA;
1288 const REAL* A_data = A->val;
1289
1290 const INT* P_i = P->IA;
1291 const INT* P_j = P->JA;
1292
1293 INT RAP_size;
1294 INT* RAP_i = NULL;
1295 INT* RAP_j = NULL;
1296 REAL* RAP_data = NULL;
1297
1298#ifdef _OPENMP
1299 INT* P_marker = NULL;
1300 INT* A_marker = NULL;
1301#endif
1302
1303 INT* Ps_marker = NULL;
1304 INT* As_marker = NULL;
1305
1306 INT ic, i1, i2, i3, jj1, jj2, jj3;
1307 INT jj_counter, jj_row_begining;
1308
1309 INT nthreads = 1;
1310
1311#ifdef _OPENMP
1312 INT myid, mybegin, myend, Ctemp;
1313 nthreads = fasp_get_num_threads();
1314#endif
1315
1316 INT coarse_mul_nthreads = n_coarse * nthreads;
1317 INT fine_mul_nthreads = n_fine * nthreads;
1318 INT coarse_add_nthreads = n_coarse + nthreads;
1319 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
1320 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
1321
1322 Ps_marker = (INT*)fasp_mem_calloc(total_calloc, sizeof(INT));
1323 As_marker = Ps_marker + coarse_mul_nthreads;
1324
1325 /*------------------------------------------------------*
1326 * First Pass: Determine size of RAP and set up RAP_i *
1327 *------------------------------------------------------*/
1328 RAP_i = (INT*)fasp_mem_calloc(n_coarse + 1, sizeof(INT));
1329
1330 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1331
1332#ifdef _OPENMP
1333 INT* RAP_temp = As_marker + fine_mul_nthreads;
1334 INT* part_end = RAP_temp + coarse_add_nthreads;
1335
1336 if (n_coarse > OPENMP_HOLDS) {
1337#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
1338 jj_counter, ic, jj_row_begining, jj1, i1, jj2, \
1339 i2, jj3, i3)
1340 for (myid = 0; myid < nthreads; myid++) {
1341 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1342 P_marker = Ps_marker + myid * n_coarse;
1343 A_marker = As_marker + myid * n_fine;
1344 jj_counter = 0;
1345 for (ic = mybegin; ic < myend; ic++) {
1346 P_marker[ic] = jj_counter;
1347 jj_row_begining = jj_counter;
1348 jj_counter++;
1349
1350 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1351 i1 = R_j[jj1];
1352 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1353 i2 = A_j[jj2];
1354 if (A_marker[i2] != ic) {
1355 A_marker[i2] = ic;
1356 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1357 i3 = P_j[jj3];
1358 if (P_marker[i3] < jj_row_begining) {
1359 P_marker[i3] = jj_counter;
1360 jj_counter++;
1361 }
1362 }
1363 }
1364 }
1365 }
1366
1367 RAP_temp[ic + myid] = jj_row_begining;
1368 }
1369 RAP_temp[myend + myid] = jj_counter;
1370
1371 part_end[myid] = myend + myid + 1;
1372 }
1373 fasp_iarray_cp(part_end[0], RAP_temp, RAP_i);
1374 jj_counter = part_end[0];
1375 Ctemp = 0;
1376 for (i1 = 1; i1 < nthreads; i1++) {
1377 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
1378 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
1379 RAP_i[jj_counter] = RAP_temp[jj1] + Ctemp;
1380 jj_counter++;
1381 }
1382 }
1383 RAP_size = RAP_i[n_coarse];
1384 }
1385
1386 else {
1387#endif
1388 jj_counter = 0;
1389 for (ic = 0; ic < n_coarse; ic++) {
1390 Ps_marker[ic] = jj_counter;
1391 jj_row_begining = jj_counter;
1392 jj_counter++;
1393
1394 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1395 i1 = R_j[jj1];
1396
1397 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1398 i2 = A_j[jj2];
1399 if (As_marker[i2] != ic) {
1400 As_marker[i2] = ic;
1401 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1402 i3 = P_j[jj3];
1403 if (Ps_marker[i3] < jj_row_begining) {
1404 Ps_marker[i3] = jj_counter;
1405 jj_counter++;
1406 }
1407 }
1408 }
1409 }
1410 }
1411
1412 RAP_i[ic] = jj_row_begining;
1413 }
1414
1415 RAP_i[n_coarse] = jj_counter;
1416 RAP_size = jj_counter;
1417#ifdef _OPENMP
1418 }
1419#endif
1420
1421 RAP_j = (INT*)fasp_mem_calloc(RAP_size, sizeof(INT));
1422 RAP_data = (REAL*)fasp_mem_calloc(RAP_size, sizeof(REAL));
1423
1424 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1425
1426#ifdef _OPENMP
1427 if (n_coarse > OPENMP_HOLDS) {
1428#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, jj_counter, \
1429 ic, jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
1430 for (myid = 0; myid < nthreads; myid++) {
1431 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1432 P_marker = Ps_marker + myid * n_coarse;
1433 A_marker = As_marker + myid * n_fine;
1434 jj_counter = RAP_i[mybegin];
1435 for (ic = mybegin; ic < myend; ic++) {
1436 P_marker[ic] = jj_counter;
1437 jj_row_begining = jj_counter;
1438 RAP_j[jj_counter] = ic;
1439 RAP_data[jj_counter] = 0.0;
1440 jj_counter++;
1441 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1442
1443 i1 = R_j[jj1];
1444 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1445
1446 i2 = A_j[jj2];
1447 if (A_marker[i2] != ic) {
1448 A_marker[i2] = ic;
1449 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1450
1451 i3 = P_j[jj3];
1452 if (P_marker[i3] < jj_row_begining) {
1453 P_marker[i3] = jj_counter;
1454 RAP_data[jj_counter] = A_data[jj2];
1455 RAP_j[jj_counter] = i3;
1456 jj_counter++;
1457 } else {
1458 RAP_data[P_marker[i3]] += A_data[jj2];
1459 }
1460 }
1461 } else {
1462 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1463 i3 = P_j[jj3];
1464 RAP_data[P_marker[i3]] += A_data[jj2];
1465 }
1466 }
1467 }
1468 }
1469 }
1470 }
1471 } else {
1472#endif
1473 jj_counter = 0;
1474 for (ic = 0; ic < n_coarse; ic++) {
1475 Ps_marker[ic] = jj_counter;
1476 jj_row_begining = jj_counter;
1477 RAP_j[jj_counter] = ic;
1478 RAP_data[jj_counter] = 0.0;
1479 jj_counter++;
1480
1481 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1482 i1 = R_j[jj1];
1483 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1484 i2 = A_j[jj2];
1485 if (As_marker[i2] != ic) {
1486 As_marker[i2] = ic;
1487 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1488 i3 = P_j[jj3];
1489 if (Ps_marker[i3] < jj_row_begining) {
1490 Ps_marker[i3] = jj_counter;
1491 RAP_data[jj_counter] = A_data[jj2];
1492 RAP_j[jj_counter] = i3;
1493 jj_counter++;
1494 } else {
1495 RAP_data[Ps_marker[i3]] += A_data[jj2];
1496 }
1497 }
1498 } else {
1499 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1500 i3 = P_j[jj3];
1501 RAP_data[Ps_marker[i3]] += A_data[jj2];
1502 }
1503 }
1504 }
1505 }
1506 }
1507#ifdef _OPENMP
1508 }
1509#endif
1510
1511 RAP->row = n_coarse;
1512 RAP->col = n_coarse;
1513 RAP->nnz = RAP_size;
1514 RAP->IA = RAP_i;
1515 RAP->JA = RAP_j;
1516 RAP->val = RAP_data;
1517
1518 fasp_mem_free(Ps_marker);
1519 Ps_marker = NULL;
1520}
1521
1540 const dCSRmat* A,
1541 const dCSRmat* P,
1542 dCSRmat* B)
1543{
1544 const INT row = R->row, col = P->col;
1545 const INT * ir = R->IA, *ia = A->IA, *ip = P->IA;
1546 const INT * jr = R->JA, *ja = A->JA, *jp = P->JA;
1547 const REAL* aj = A->val;
1548
1549 INT * iac, *jac;
1550 REAL* acj;
1551
1552 INT* index = (INT*)fasp_mem_calloc(A->col, sizeof(INT));
1553 INT* iindex = (INT*)fasp_mem_calloc(col, sizeof(INT));
1554
1555 INT nB = A->nnz;
1556 INT i, i1, j, jj, k, length;
1557 INT begin_row, end_row, begin_rowA, end_rowA, begin_rowR, end_rowR;
1558 INT istart, iistart, count;
1559
1560 // for (i=0; i<A->col; ++i) index[i] = -2;
1561 fasp_iarray_set(A->col, index, -2);
1562
1563 // memcpy(iindex,index,col*sizeof(INT));
1564 fasp_iarray_cp(col, index, iindex);
1565
1566 jac = (INT*)fasp_mem_calloc(nB, sizeof(INT));
1567
1568 iac = (INT*)fasp_mem_calloc(row + 1, sizeof(INT));
1569
1570 REAL* temp = (REAL*)fasp_mem_calloc(A->col, sizeof(REAL));
1571
1572 iac[0] = 0;
1573
1574 // First loop: form sparsity partern of R*A*P
1575 for (i = 0; i < row; ++i) {
1576 // reset istart and length at the begining of each loop
1577 istart = -1;
1578 length = 0;
1579 i1 = i + 1;
1580
1581 // go across the rows in R
1582 begin_rowR = ir[i];
1583 end_rowR = ir[i1];
1584 for (jj = begin_rowR; jj < end_rowR; ++jj) {
1585 j = jr[jj];
1586 // for each column in A
1587 begin_rowA = ia[j];
1588 end_rowA = ia[j + 1];
1589 for (k = begin_rowA; k < end_rowA; ++k) {
1590 if (index[ja[k]] == -2) {
1591 index[ja[k]] = istart;
1592 istart = ja[k];
1593 ++length;
1594 }
1595 }
1596 }
1597
1598 // book-keeping [reseting length and setting iistart]
1599 count = length;
1600 iistart = -1;
1601 length = 0;
1602
1603 // use each column that would have resulted from R*A
1604 for (j = 0; j < count; ++j) {
1605 jj = istart;
1606 istart = index[istart];
1607 index[jj] = -2;
1608
1609 // go across the row of P
1610 begin_row = ip[jj];
1611 end_row = ip[jj + 1];
1612 for (k = begin_row; k < end_row; ++k) {
1613 // pull out the appropriate columns of P
1614 if (iindex[jp[k]] == -2) {
1615 iindex[jp[k]] = iistart;
1616 iistart = jp[k];
1617 ++length;
1618 }
1619 } // end for k
1620 } // end for j
1621
1622 // set B->IA
1623 iac[i1] = iac[i] + length;
1624
1625 if (iac[i1] > nB) { // Memory not enough!!!
1626 nB = nB * 2;
1627 jac = (INT*)fasp_mem_realloc(jac, nB * sizeof(INT));
1628 }
1629
1630 // put the correct columns of p into the column list of the products
1631 begin_row = iac[i];
1632 end_row = iac[i1];
1633 for (j = begin_row; j < end_row; ++j) {
1634 // put the value in B->JA
1635 jac[j] = iistart;
1636 // set istart to the next value
1637 iistart = iindex[iistart];
1638 // set the iindex spot to 0
1639 iindex[jac[j]] = -2;
1640 } // end j
1641
1642 } // end i: First loop
1643
1644 jac = (INT*)fasp_mem_realloc(jac, (iac[row]) * sizeof(INT));
1645
1646 acj = (REAL*)fasp_mem_calloc(iac[row], sizeof(REAL));
1647
1648 INT* BTindex = (INT*)fasp_mem_calloc(col, sizeof(INT));
1649
1650 // Second loop: compute entries of R*A*P
1651 for (i = 0; i < row; ++i) {
1652 i1 = i + 1;
1653
1654 // each col of B
1655 begin_row = iac[i];
1656 end_row = iac[i1];
1657 for (j = begin_row; j < end_row; ++j) {
1658 BTindex[jac[j]] = j;
1659 }
1660
1661 // reset istart and length at the beginning of each loop
1662 istart = -1;
1663 length = 0;
1664
1665 // go across the rows in R
1666 begin_rowR = ir[i];
1667 end_rowR = ir[i1];
1668 for (jj = begin_rowR; jj < end_rowR; ++jj) {
1669 j = jr[jj];
1670
1671 // for each column in A
1672 begin_rowA = ia[j];
1673 end_rowA = ia[j + 1];
1674 for (k = begin_rowA; k < end_rowA; ++k) {
1675 if (index[ja[k]] == -2) {
1676 index[ja[k]] = istart;
1677 istart = ja[k];
1678 ++length;
1679 }
1680 temp[ja[k]] += aj[k];
1681 }
1682 }
1683
1684 // book-keeping [resetting length and setting iistart]
1685 // use each column that would have resulted from R*A
1686 for (j = 0; j < length; ++j) {
1687 jj = istart;
1688 istart = index[istart];
1689 index[jj] = -2;
1690
1691 // go across the row of P
1692 begin_row = ip[jj];
1693 end_row = ip[jj + 1];
1694 for (k = begin_row; k < end_row; ++k) {
1695 // pull out the appropriate columns of P
1696 acj[BTindex[jp[k]]] += temp[jj];
1697 }
1698 temp[jj] = 0.0;
1699 }
1700
1701 } // end for i: Second loop
1702
1703 // setup coarse matrix B
1704 B->row = row;
1705 B->col = col;
1706 B->IA = iac;
1707 B->JA = jac;
1708 B->val = acj;
1709 B->nnz = B->IA[B->row] - B->IA[0];
1710
1711 fasp_mem_free(temp);
1712 temp = NULL;
1713 fasp_mem_free(index);
1714 index = NULL;
1715 fasp_mem_free(iindex);
1716 iindex = NULL;
1717 fasp_mem_free(BTindex);
1718 BTindex = NULL;
1719}
1720
1746 const dCSRmat* A,
1747 const dCSRmat* P,
1748 dCSRmat* Ac)
1749{
1750 const INT nc = Pt->row, n = Pt->col, nnzP = P->nnz, nnzA = A->nnz;
1751 INT i, maxrpout;
1752
1753 // shift A from usual to ltz format
1754#ifdef _OPENMP
1755#pragma omp parallel for if (n > OPENMP_HOLDS)
1756#endif
1757 for (i = 0; i <= n; ++i) {
1758 A->IA[i]++;
1759 P->IA[i]++;
1760 }
1761
1762#ifdef _OPENMP
1763#pragma omp parallel for if (nnzA > OPENMP_HOLDS)
1764#endif
1765 for (i = 0; i < nnzA; ++i) {
1766 A->JA[i]++;
1767 }
1768
1769#ifdef _OPENMP
1770#pragma omp parallel for if (nc > OPENMP_HOLDS)
1771#endif
1772 for (i = 0; i <= nc; ++i) {
1773 Pt->IA[i]++;
1774 }
1775
1776#ifdef _OPENMP
1777#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1778#endif
1779 for (i = 0; i < nnzP; ++i) {
1780 P->JA[i]++;
1781 Pt->JA[i]++;
1782 }
1783
1784 // compute P' A P
1785 dCSRmat PtAP =
1786 fasp_blas_dcsr_rap2(Pt->IA, Pt->JA, Pt->val, A->IA, A->JA, A->val, Pt->IA,
1787 Pt->JA, Pt->val, n, nc, &maxrpout, P->IA, P->JA);
1788
1789 Ac->row = PtAP.row;
1790 Ac->col = PtAP.col;
1791 Ac->nnz = PtAP.nnz;
1792 Ac->IA = PtAP.IA;
1793 Ac->JA = PtAP.JA;
1794 Ac->val = PtAP.val;
1795
1796 // shift A back from ltz format
1797#ifdef _OPENMP
1798#pragma omp parallel for if (Ac->row > OPENMP_HOLDS)
1799#endif
1800 for (i = 0; i <= Ac->row; ++i) Ac->IA[i]--;
1801
1802#ifdef _OPENMP
1803#pragma omp parallel for if (Ac->nnz > OPENMP_HOLDS)
1804#endif
1805 for (i = 0; i < Ac->nnz; ++i) Ac->JA[i]--;
1806
1807#ifdef _OPENMP
1808#pragma omp parallel for if (n > OPENMP_HOLDS)
1809#endif
1810 for (i = 0; i <= n; ++i) A->IA[i]--;
1811
1812#ifdef _OPENMP
1813#pragma omp parallel for if (nnzA > OPENMP_HOLDS)
1814#endif
1815 for (i = 0; i < nnzA; ++i) A->JA[i]--;
1816
1817#ifdef _OPENMP
1818#pragma omp parallel for if (n > OPENMP_HOLDS)
1819#endif
1820 for (i = 0; i <= n; ++i) P->IA[i]--;
1821
1822#ifdef _OPENMP
1823#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1824#endif
1825 for (i = 0; i < nnzP; ++i) P->JA[i]--;
1826
1827#ifdef _OPENMP
1828#pragma omp parallel for if (nc > OPENMP_HOLDS)
1829#endif
1830 for (i = 0; i <= nc; ++i) Pt->IA[i]--;
1831
1832#ifdef _OPENMP
1833#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1834#endif
1835 for (i = 0; i < nnzP; ++i) Pt->JA[i]--;
1836
1837 return;
1838}
1839
1856 INT* jr,
1857 REAL* r,
1858 INT* ia,
1859 INT* ja,
1860 REAL* a,
1861 INT* ipt,
1862 INT* jpt,
1863 REAL* pt,
1864 INT n,
1865 INT nc,
1866 INT* maxrpout,
1867 INT* ipin,
1868 INT* jpin)
1869{
1870 dCSRmat ac;
1871 INT n1, nc1, nnzp, maxrp;
1872 INT * ip = NULL, *jp = NULL;
1873
1874 /*
1875 if ipin is null, this
1876 means that we need to do the transpose of p here; otherwise,
1877 these are considered to be input
1878 */
1879 maxrp = 0;
1880 nnzp = ipt[nc] - 1;
1881 n1 = n + 1;
1882
1883 if (!ipin) {
1884 ip = (INT*)calloc(n1, sizeof(INT));
1885 jp = (INT*)calloc(nnzp, sizeof(INT));
1886 /* these must be null anyway, so no need to assign null
1887 ipin=NULL;
1888 jpin=NULL;
1889 */
1890 } else {
1891 ip = ipin;
1892 jp = jpin;
1893 }
1894
1895 fasp_sparse_iit_(ipt, jpt, &nc, &n, ip, jp);
1896
1897 /* triple matrix product: R * A * transpose(P^T)=R*A*P.*/
1898 /* A is square n by n*/
1899 /* Note: to compute R*A* P the input are R, A and P^T */
1900 /* we need to transpose now the structure of P, because the input is P^T */
1901 /* end of transpose of the boolean corresponding to P */
1902 /* ic are the addresses of the rows of the output */
1903 nc1 = nc + 1;
1904 ac.IA = (INT*)calloc(nc1, sizeof(INT));
1905
1906 /*
1907 First call is with jc=null so that we find the number of
1908 nonzeros in the result
1909 */
1910 ac.JA = NULL;
1911 fasp_sparse_rapms_(ir, jr, ia, ja, ip, jp, &n, &nc, ac.IA, ac.JA, &maxrp);
1912 ac.nnz = ac.IA[nc] - 1;
1913 ac.JA = (INT*)calloc(ac.nnz, sizeof(INT));
1914
1915 /*
1916 second call is to fill the column indexes array jc.
1917 */
1918 fasp_sparse_rapms_(ir, jr, ia, ja, ip, jp, &n, &nc, ac.IA, ac.JA, &maxrp);
1919 if (!ipin) {
1920 if (ip) free(ip);
1921 if (jp) free(jp);
1922 }
1923 ac.val = (REAL*)calloc(ac.nnz, sizeof(REAL));
1924 /* this is the compute with the entries */
1925 fasp_sparse_rapcmp_(ir, jr, r, ia, ja, a, ipt, jpt, pt, &n, &nc, ac.IA, ac.JA,
1926 ac.val, &maxrp);
1927 ac.row = nc;
1928 ac.col = nc;
1929
1930 /*=========================================================*/
1931 *maxrpout = maxrp;
1932
1933 return ac;
1934}
1935
1954void fasp_blas_dcsr_rap4(dCSRmat* R, dCSRmat* A, dCSRmat* P, dCSRmat* B, INT* icor_ysk)
1955{
1956 SHORT nthreads = 1, use_openmp = FALSE;
1957
1958#ifdef _OPENMP
1959 if (R->row > OPENMP_HOLDS) {
1960 use_openmp = TRUE;
1961 nthreads = fasp_get_num_threads();
1962 }
1963#endif
1964
1965 if (use_openmp) {
1966 const INT row = R->row, col = P->col;
1967 INT * ir = R->IA, *ia = A->IA, *ip = P->IA;
1968 INT * jr = R->JA, *ja = A->JA, *jp = P->JA;
1969 REAL * rj = R->val, *aj = A->val, *pj = P->val;
1970 INT istart, iistart;
1971 INT end_row, end_rowA, end_rowR;
1972 INT i, j, jj, k, length, myid, mybegin, myend;
1973 INT jj_counter, ic, jj_row_begining, jj1, i1, jj2, i2, jj3, i3;
1974 INT* index = NULL;
1975 INT* iindex = NULL;
1976 INT* BTindex = NULL;
1977 REAL* temp = NULL;
1978 INT FiveMyid, min_A, min_P, A_pos, P_pos, FiveIc;
1979 INT minus_one_length_A = icor_ysk[5 * nthreads];
1980 INT minus_one_length_P = icor_ysk[5 * nthreads + 1];
1981 INT minus_one_length = minus_one_length_A + minus_one_length_P;
1982
1983 INT* iindexs =
1984 (INT*)fasp_mem_calloc(minus_one_length + minus_one_length_P, sizeof(INT));
1985
1986#if DEBUG_MODE > 1
1987 total_alloc_mem += minus_one_length * sizeof(INT);
1988#endif
1989 INT* indexs = iindexs + minus_one_length_P;
1990 INT* BTindexs = indexs + minus_one_length_A;
1991
1992 INT* iac = (INT*)fasp_mem_calloc(row + 1, sizeof(INT));
1993
1994#if DEBUG_MODE > 1
1995 total_alloc_mem += (row + 1) * sizeof(INT);
1996#endif
1997
1998 INT* part_end = (INT*)fasp_mem_calloc(2 * nthreads + row, sizeof(INT));
1999
2000#if DEBUG_MODE > 1
2001 total_alloc_mem += (2 * nthreads + row) * sizeof(INT);
2002#endif
2003
2004 INT* iac_temp = part_end + nthreads;
2005 INT** iindex_array = (INT**)fasp_mem_calloc(nthreads, sizeof(INT*));
2006 INT** index_array = (INT**)fasp_mem_calloc(nthreads, sizeof(INT*));
2007
2008 fasp_iarray_set(minus_one_length, iindexs, -2);
2009
2010#ifdef _OPENMP
2011#pragma omp parallel for private(myid, FiveMyid, mybegin, myend, min_A, min_P, index, \
2012 iindex, A_pos, P_pos, ic, FiveIc, jj_counter, \
2013 jj_row_begining, end_rowR, jj1, i1, end_rowA, \
2014 jj2, i2, end_row, jj3, i3)
2015#endif
2016 for (myid = 0; myid < nthreads; myid++) {
2017 FiveMyid = myid * 5;
2018 mybegin = icor_ysk[FiveMyid];
2019 if (myid == nthreads - 1) {
2020 myend = row;
2021 } else {
2022 myend = icor_ysk[FiveMyid + 5];
2023 }
2024 min_A = icor_ysk[FiveMyid + 2];
2025 min_P = icor_ysk[FiveMyid + 4];
2026 A_pos = 0;
2027 P_pos = 0;
2028 for (ic = myid - 1; ic >= 0; ic--) {
2029 FiveIc = ic * 5;
2030 A_pos += icor_ysk[FiveIc + 1];
2031 P_pos += icor_ysk[FiveIc + 3];
2032 }
2033 iindex_array[myid] = iindex = iindexs + P_pos - min_P;
2034 index_array[myid] = index = indexs + A_pos - min_A;
2035 jj_counter = 0;
2036 for (ic = mybegin; ic < myend; ic++) {
2037 iindex[ic] = jj_counter;
2038 jj_row_begining = jj_counter;
2039 jj_counter++;
2040 end_rowR = ir[ic + 1];
2041 for (jj1 = ir[ic]; jj1 < end_rowR; jj1++) {
2042 i1 = jr[jj1];
2043 end_rowA = ia[i1 + 1];
2044 for (jj2 = ia[i1]; jj2 < end_rowA; jj2++) {
2045 i2 = ja[jj2];
2046 if (index[i2] != ic) {
2047 index[i2] = ic;
2048 end_row = ip[i2 + 1];
2049 for (jj3 = ip[i2]; jj3 < end_row; jj3++) {
2050 i3 = jp[jj3];
2051 if (iindex[i3] < jj_row_begining) {
2052 iindex[i3] = jj_counter;
2053 jj_counter++;
2054 }
2055 }
2056 }
2057 }
2058 }
2059 iac_temp[ic + myid] = jj_row_begining;
2060 }
2061 iac_temp[myend + myid] = jj_counter;
2062 part_end[myid] = myend + myid + 1;
2063 }
2064 fasp_iarray_cp(part_end[0], iac_temp, iac);
2065 jj_counter = part_end[0];
2066 INT Ctemp = 0;
2067 for (i1 = 1; i1 < nthreads; i1++) {
2068 Ctemp += iac_temp[part_end[i1 - 1] - 1];
2069 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
2070 iac[jj_counter] = iac_temp[jj1] + Ctemp;
2071 jj_counter++;
2072 }
2073 }
2074 INT* jac = (INT*)fasp_mem_calloc(iac[row], sizeof(INT));
2075#if DEBUG_MODE > 1
2076 total_alloc_mem += iac[row] * sizeof(INT);
2077#endif
2078 fasp_iarray_set(minus_one_length, iindexs, -2);
2079#ifdef _OPENMP
2080#pragma omp parallel for private(myid, index, iindex, FiveMyid, mybegin, myend, i, \
2081 istart, length, i1, end_rowR, jj, j, end_rowA, k, \
2082 iistart, end_row)
2083#endif
2084 for (myid = 0; myid < nthreads; myid++) {
2085 iindex = iindex_array[myid];
2086 index = index_array[myid];
2087 FiveMyid = myid * 5;
2088 mybegin = icor_ysk[FiveMyid];
2089 if (myid == nthreads - 1) {
2090 myend = row;
2091 } else {
2092 myend = icor_ysk[FiveMyid + 5];
2093 }
2094 for (i = mybegin; i < myend; ++i) {
2095 istart = -1;
2096 length = 0;
2097 i1 = i + 1;
2098 // go across the rows in R
2099 end_rowR = ir[i1];
2100 for (jj = ir[i]; jj < end_rowR; ++jj) {
2101 j = jr[jj];
2102 // for each column in A
2103 end_rowA = ia[j + 1];
2104 for (k = ia[j]; k < end_rowA; ++k) {
2105 if (index[ja[k]] == -2) {
2106 index[ja[k]] = istart;
2107 istart = ja[k];
2108 ++length;
2109 }
2110 }
2111 }
2112 // book-keeping [resetting length and setting iistart]
2113 // count = length;
2114 iistart = -1;
2115 // length = 0;
2116 // use each column that would have resulted from R*A
2117 // for (j = 0; j < count; ++ j) {
2118 for (j = 0; j < length; ++j) {
2119 jj = istart;
2120 istart = index[istart];
2121 index[jj] = -2;
2122 // go across the row of P
2123 end_row = ip[jj + 1];
2124 for (k = ip[jj]; k < end_row; ++k) {
2125 // pull out the appropriate columns of P
2126 if (iindex[jp[k]] == -2) {
2127 iindex[jp[k]] = iistart;
2128 iistart = jp[k];
2129 //++length;
2130 }
2131 } // end for k
2132 } // end for j
2133 // put the correct columns of p into the column list of the products
2134 end_row = iac[i1];
2135 for (j = iac[i]; j < end_row; ++j) {
2136 // put the value in B->JA
2137 jac[j] = iistart;
2138 // set istart to the next value
2139 iistart = iindex[iistart];
2140 // set the iindex spot to 0
2141 iindex[jac[j]] = -2;
2142 } // end j
2143 }
2144 }
2145 // Third loop: compute entries of R*A*P
2146 REAL* acj = (REAL*)fasp_mem_calloc(iac[row], sizeof(REAL));
2147#if DEBUG_MODE > 1
2148 total_alloc_mem += iac[row] * sizeof(REAL);
2149#endif
2150 REAL* temps = (REAL*)fasp_mem_calloc(minus_one_length_A, sizeof(REAL));
2151#if DEBUG_MODE > 1
2152 total_alloc_mem += minus_one_length_A * sizeof(REAL);
2153#endif
2154
2155#ifdef _OPENMP
2156#pragma omp parallel for private(myid, index, FiveMyid, mybegin, myend, min_A, min_P, \
2157 A_pos, P_pos, ic, FiveIc, BTindex, temp, i, i1, \
2158 end_row, j, istart, length, end_rowR, jj, \
2159 end_rowA, k)
2160#endif
2161 for (myid = 0; myid < nthreads; myid++) {
2162 index = index_array[myid];
2163 FiveMyid = myid * 5;
2164 mybegin = icor_ysk[FiveMyid];
2165 if (myid == nthreads - 1) {
2166 myend = row;
2167 } else {
2168 myend = icor_ysk[FiveMyid + 5];
2169 }
2170 min_A = icor_ysk[FiveMyid + 2];
2171 min_P = icor_ysk[FiveMyid + 4];
2172 A_pos = 0;
2173 P_pos = 0;
2174 for (ic = myid - 1; ic >= 0; ic--) {
2175 FiveIc = ic * 5;
2176 A_pos += icor_ysk[FiveIc + 1];
2177 P_pos += icor_ysk[FiveIc + 3];
2178 }
2179 BTindex = BTindexs + P_pos - min_P;
2180 temp = temps + A_pos - min_A;
2181 for (i = mybegin; i < myend; ++i) {
2182 i1 = i + 1;
2183 // each col of B
2184 end_row = iac[i1];
2185 for (j = iac[i]; j < end_row; ++j) {
2186 BTindex[jac[j]] = j;
2187 }
2188 // reset istart and length at the beginning of each loop
2189 istart = -1;
2190 length = 0;
2191 // go across the rows in R
2192 end_rowR = ir[i1];
2193 for (jj = ir[i]; jj < end_rowR; ++jj) {
2194 j = jr[jj];
2195 // for each column in A
2196 end_rowA = ia[j + 1];
2197 for (k = ia[j]; k < end_rowA; ++k) {
2198 if (index[ja[k]] == -2) {
2199 index[ja[k]] = istart;
2200 istart = ja[k];
2201 ++length;
2202 }
2203 temp[ja[k]] += rj[jj] * aj[k];
2204 }
2205 }
2206 // book-keeping [resetting length and setting iistart]
2207 // use each column that would have resulted from R*A
2208 for (j = 0; j < length; ++j) {
2209 jj = istart;
2210 istart = index[istart];
2211 index[jj] = -2;
2212 // go across the row of P
2213 end_row = ip[jj + 1];
2214 for (k = ip[jj]; k < end_row; ++k) {
2215 // pull out the appropriate columns of P
2216 acj[BTindex[jp[k]]] += temp[jj] * pj[k];
2217 }
2218 temp[jj] = 0.0;
2219 }
2220 }
2221 }
2222 // setup coarse matrix B
2223 B->row = row;
2224 B->col = col;
2225 B->IA = iac;
2226 B->JA = jac;
2227 B->val = acj;
2228 B->nnz = B->IA[B->row] - B->IA[0];
2229
2230 fasp_mem_free(temps);
2231 temps = NULL;
2232 fasp_mem_free(iindexs);
2233 iindexs = NULL;
2234 fasp_mem_free(part_end);
2235 part_end = NULL;
2236 fasp_mem_free(iindex_array);
2237 iindex_array = NULL;
2238 fasp_mem_free(index_array);
2239 index_array = NULL;
2240 } else {
2241 fasp_blas_dcsr_rap(R, A, P, B);
2242 }
2243}
2244
2245/*---------------------------------*/
2246/*-- End of File --*/
2247/*---------------------------------*/
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:98
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
Definition: AuxArray.c:227
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:152
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
Definition: AuxMemory.c:113
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:93
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
Definition: BlaArray.c:43
void fasp_dcsr_alloc(const INT m, const INT n, const INT nnz, dCSRmat *A)
Allocate CSR sparse matrix memory space.
Definition: BlaSparseCSR.c:138
void fasp_sparse_rapms_(INT *ir, INT *jr, INT *ia, INT *ja, INT *ip, INT *jp, INT *nin, INT *ncin, INT *iac, INT *jac, INT *maxrout)
Calculates the nonzero structure of R*A*P, if jac is not null. If jac is null only finds num of nonze...
void fasp_sparse_iit_(INT *ia, INT *ja, INT *na, INT *ma, INT *iat, INT *jat)
Transpose a boolean matrix (only given by ia, ja)
void fasp_sparse_rapcmp_(INT *ir, INT *jr, REAL *r, INT *ia, INT *ja, REAL *a, INT *ipt, INT *jpt, REAL *pt, INT *nin, INT *ncin, INT *iac, INT *jac, REAL *ac, INT *idummy)
Calculates R*A*P after the nonzero structure of the result is known. iac,jac,ac have to be allocated ...
void fasp_blas_dcsr_mxv_agg(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x (nonzeros of A = 1)
Definition: BlaSpmvCSR.c:438
void fasp_blas_ldcsr_aAxpy(const REAL alpha, const dCSRmat *A, const LONGREAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: BlaSpmvCSR.c:609
void fasp_blas_dcsr_axm(dCSRmat *A, const REAL alpha)
Multiply a sparse matrix A in CSR format by a scalar alpha.
Definition: BlaSpmvCSR.c:220
void fasp_blas_dcsr_mxv(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: BlaSpmvCSR.c:242
dCSRmat fasp_blas_dcsr_rap2(INT *ir, INT *jr, REAL *r, INT *ia, INT *ja, REAL *a, INT *ipt, INT *jpt, REAL *pt, INT n, INT nc, INT *maxrpout, INT *ipin, INT *jpin)
Compute R*A*P.
Definition: BlaSpmvCSR.c:1855
void fasp_blas_dcsr_aAxpy_agg(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y (nonzeros of A = 1)
Definition: BlaSpmvCSR.c:727
unsigned long total_alloc_mem
void fasp_blas_dcsr_mxm(const dCSRmat *A, const dCSRmat *B, dCSRmat *C)
Sparse matrix multiplication C=A*B.
Definition: BlaSpmvCSR.c:893
void fasp_blas_dcsr_rap_agg1(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *B)
Triple sparse matrix multiplication B=R*A*P (nonzeros of R, P = 1)
Definition: BlaSpmvCSR.c:1539
void fasp_blas_dcsr_rap_agg(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *RAP)
Triple sparse matrix multiplication B=R*A*P (nonzeros of R, P = 1)
Definition: BlaSpmvCSR.c:1276
REAL fasp_blas_dcsr_vmv(const dCSRmat *A, const REAL *x, const REAL *y)
vector-Matrix-vector multiplication alpha = y'*A*x
Definition: BlaSpmvCSR.c:839
SHORT fasp_blas_dcsr_add(const dCSRmat *A, const REAL alpha, const dCSRmat *B, const REAL beta, dCSRmat *C)
compute C = alpha*A + beta*B in CSR format
Definition: BlaSpmvCSR.c:60
unsigned long total_alloc_count
void fasp_blas_dcsr_rap4(dCSRmat *R, dCSRmat *A, dCSRmat *P, dCSRmat *B, INT *icor_ysk)
Triple sparse matrix multiplication B=R*A*P.
Definition: BlaSpmvCSR.c:1954
void fasp_blas_dcsr_rap(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *RAP)
Triple sparse matrix multiplication B=R*A*P.
Definition: BlaSpmvCSR.c:999
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: BlaSpmvCSR.c:494
void fasp_blas_dcsr_ptap(const dCSRmat *Pt, const dCSRmat *A, const dCSRmat *P, dCSRmat *Ac)
Triple sparse matrix multiplication B=P'*A*P.
Definition: BlaSpmvCSR.c:1745
Main header file for the FASP project.
#define REAL
Definition: fasp.h:75
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:71
#define LONGREAL
Definition: fasp.h:76
#define INT
Definition: fasp.h:72
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:19
#define OPENMP_HOLDS
Definition: fasp_const.h:269
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62
#define ERROR_MAT_SIZE
Definition: fasp_const.h:26
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:151
INT col
column of matrix A, n
Definition: fasp.h:157
REAL * val
nonzero entries of A
Definition: fasp.h:169
INT row
row number of matrix A, m
Definition: fasp.h:154
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:163
INT nnz
number of nonzero entries
Definition: fasp.h:160
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:166