Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
Loading...
Searching...
No Matches
BlaSparseCSR.c
Go to the documentation of this file.
1
15#include <math.h>
16#include <time.h>
17
18#ifdef _OPENMP
19#include <omp.h>
20#endif
21
22#include "fasp.h"
23#include "fasp_functs.h"
24
25#if MULTI_COLOR_ORDER
26static void generate_S_theta(dCSRmat*, iCSRmat*, REAL);
27#endif
28
29/*---------------------------------*/
30/*-- Public Functions --*/
31/*---------------------------------*/
32
47dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
48{
49 dCSRmat A;
50
51 if (m > 0) {
52 A.IA = (INT*)fasp_mem_calloc(m + 1, sizeof(INT));
53 } else {
54 A.IA = NULL;
55 }
56
57 if (n > 0) {
58 A.JA = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
59 } else {
60 A.JA = NULL;
61 }
62
63 if (nnz > 0) {
64 A.val = (REAL*)fasp_mem_calloc(nnz, sizeof(REAL));
65 } else {
66 A.val = NULL;
67 }
68
69 A.row = m;
70 A.col = n;
71 A.nnz = nnz;
72
73#if MULTI_COLOR_ORDER
74 A.color = 0;
75 A.IC = NULL;
76 A.ICMAP = NULL;
77#endif
78
79 return A;
80}
81
96iCSRmat fasp_icsr_create(const INT m, const INT n, const INT nnz)
97{
98 iCSRmat A;
99
100 if (m > 0) {
101 A.IA = (INT*)fasp_mem_calloc(m + 1, sizeof(INT));
102 } else {
103 A.IA = NULL;
104 }
105
106 if (n > 0) {
107 A.JA = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
108 } else {
109 A.JA = NULL;
110 }
111
112 if (nnz > 0) {
113 A.val = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
114 } else {
115 A.val = NULL;
116 }
117
118 A.row = m;
119 A.col = n;
120 A.nnz = nnz;
121
122 return A;
123}
124
138void fasp_dcsr_alloc(const INT m, const INT n, const INT nnz, dCSRmat* A)
139{
140 if (m <= 0 || n <= 0) {
141 printf("### ERROR: Matrix dim %d, %d must be positive! [%s]\n", m, n,
142 __FUNCTION__);
143 return;
144 }
145
146 if (m > 0) {
147 A->IA = (INT*)fasp_mem_calloc(m + 1, sizeof(INT));
148 } else {
149 A->IA = NULL;
150 }
151
152 if (nnz > 0) {
153 A->JA = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
154 A->val = (REAL*)fasp_mem_calloc(nnz, sizeof(REAL));
155 } else {
156 A->JA = NULL;
157 A->val = NULL;
158 }
159
160 A->row = m;
161 A->col = n;
162 A->nnz = nnz;
163
164#if MULTI_COLOR_ORDER
165 A->color = 0;
166 A->IC = NULL;
167 A->ICMAP = NULL;
168#endif
169
170 return;
171}
172
185{
186 if (A == NULL) return;
187
188 fasp_mem_free(A->IA);
189 A->IA = NULL;
190 fasp_mem_free(A->JA);
191 A->JA = NULL;
192 fasp_mem_free(A->val);
193 A->val = NULL;
194
195#if MULTI_COLOR_ORDER
196 fasp_mem_free(A->IC);
197 A->IC = NULL;
198 fasp_mem_free(A->ICMAP);
199 A->ICMAP = NULL;
200#endif
201
202 A->col = 0;
203 A->row = 0;
204 A->nnz = 0;
205 A = NULL;
206}
207
220{
221 if (A == NULL) return;
222
223 fasp_mem_free(A->IA);
224 A->IA = NULL;
225 fasp_mem_free(A->JA);
226 A->JA = NULL;
227 fasp_mem_free(A->val);
228 A->val = NULL;
229 A->col = 0;
230 A->row = 0;
231 A->nnz = 0;
232 A = NULL;
233}
234
246{
247 const INT row = A->row;
248 const INT* ia = A->IA;
249 INT i, max;
250
251 for (max = i = 0; i < row; ++i) max = MAX(max, ia[i + 1] - ia[i]);
252
253 return (max);
254}
255
276{
277 const INT n = A->row, nnz = A->nnz;
278 const INT * ia = A->IA, *ja = A->JA;
279 const REAL* Aval = A->val;
280 INT i, j, k, jaj, i1, i2, start;
281 SHORT nthreads = 1, use_openmp = FALSE;
282
283#ifdef _OPENMP
284 if (MIN(n, nnz) > OPENMP_HOLDS) {
285 use_openmp = TRUE;
286 nthreads = fasp_get_num_threads();
287 }
288#endif
289
290 dCSRmat Aperm = fasp_dcsr_create(n, n, nnz);
291
292 // form the transpose of P
293 INT* Pt = (INT*)fasp_mem_calloc(n, sizeof(INT));
294
295 if (use_openmp) {
296 INT myid, mybegin, myend;
297#ifdef _OPENMP
298#pragma omp parallel for private(myid, mybegin, myend, i)
299#endif
300 for (myid = 0; myid < nthreads; ++myid) {
301 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
302 for (i = mybegin; i < myend; ++i) Pt[P[i]] = i;
303 }
304 } else {
305 for (i = 0; i < n; ++i) Pt[P[i]] = i;
306 }
307
308 // compute IA of P*A (row permutation)
309 Aperm.IA[0] = 0;
310 for (i = 0; i < n; ++i) {
311 k = P[i];
312 Aperm.IA[i + 1] = Aperm.IA[i] + (ia[k + 1] - ia[k]);
313 }
314
315 // perform actual P*A
316 if (use_openmp) {
317 INT myid, mybegin, myend;
318#ifdef _OPENMP
319#pragma omp parallel for private(myid, mybegin, myend, i1, i2, k, start, j, jaj)
320#endif
321 for (myid = 0; myid < nthreads; ++myid) {
322 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
323 for (i = mybegin; i < myend; ++i) {
324 i1 = Aperm.IA[i];
325 i2 = Aperm.IA[i + 1] - 1;
326 k = P[i];
327 start = ia[k];
328 for (j = i1; j <= i2; ++j) {
329 jaj = start + j - i1;
330 Aperm.JA[j] = ja[jaj];
331 Aperm.val[j] = Aval[jaj];
332 }
333 }
334 }
335 } else {
336 for (i = 0; i < n; ++i) {
337 i1 = Aperm.IA[i];
338 i2 = Aperm.IA[i + 1] - 1;
339 k = P[i];
340 start = ia[k];
341 for (j = i1; j <= i2; ++j) {
342 jaj = start + j - i1;
343 Aperm.JA[j] = ja[jaj];
344 Aperm.val[j] = Aval[jaj];
345 }
346 }
347 }
348
349 // perform P*A*P' (column permutation)
350 if (use_openmp) {
351 INT myid, mybegin, myend;
352#ifdef _OPENMP
353#pragma omp parallel for private(myid, mybegin, myend, k, j)
354#endif
355 for (myid = 0; myid < nthreads; ++myid) {
356 fasp_get_start_end(myid, nthreads, nnz, &mybegin, &myend);
357 for (k = mybegin; k < myend; ++k) {
358 j = Aperm.JA[k];
359 Aperm.JA[k] = Pt[j];
360 }
361 }
362 } else {
363 for (k = 0; k < nnz; ++k) {
364 j = Aperm.JA[k];
365 Aperm.JA[k] = Pt[j];
366 }
367 }
368
369 fasp_mem_free(Pt);
370 Pt = NULL;
371
372 return (Aperm);
373}
374
386{
387 const INT n = A->col;
388 INT i, j, start, row_length;
389
390 // temp memory for sorting rows of A
391 INT * index, *ja;
392 REAL* a;
393
394 index = (INT*)fasp_mem_calloc(n, sizeof(INT));
395 ja = (INT*)fasp_mem_calloc(n, sizeof(INT));
396 a = (REAL*)fasp_mem_calloc(n, sizeof(REAL));
397
398 for (i = 0; i < n; ++i) {
399 start = A->IA[i];
400 row_length = A->IA[i + 1] - start;
401
402 for (j = 0; j < row_length; ++j) index[j] = j;
403
404 fasp_aux_iQuickSortIndex(&(A->JA[start]), 0, row_length - 1, index);
405
406 for (j = 0; j < row_length; ++j) {
407 ja[j] = A->JA[start + index[j]];
408 a[j] = A->val[start + index[j]];
409 }
410
411 for (j = 0; j < row_length; ++j) {
412 A->JA[start + j] = ja[j];
413 A->val[start + j] = a[j];
414 }
415 }
416
417 // clean up memory
418 fasp_mem_free(index);
419 index = NULL;
420 fasp_mem_free(ja);
421 ja = NULL;
422 fasp_mem_free(a);
423 a = NULL;
424}
425
446SHORT fasp_dcsr_getblk(const dCSRmat* A, const INT* Is, const INT* Js, const INT m,
447 const INT n, dCSRmat* B)
448{
449 SHORT use_openmp = FALSE;
450 SHORT status = FASP_SUCCESS;
451 INT i, j, k, nnz = 0;
452 INT* col_flag;
453
454#ifdef _OPENMP
455 INT stride_i, mybegin, myend, myid, nthreads;
456 if (n > OPENMP_HOLDS) {
457 use_openmp = TRUE;
458 nthreads = fasp_get_num_threads();
459 }
460#endif
461
462 // create column flags
463 col_flag = (INT*)fasp_mem_calloc(A->col, sizeof(INT));
464
465 B->row = m;
466 B->col = n;
467
468 B->IA = (INT*)fasp_mem_calloc(m + 1, sizeof(INT));
469 B->JA = (INT*)fasp_mem_calloc(A->nnz, sizeof(INT));
470 B->val = (REAL*)fasp_mem_calloc(A->nnz, sizeof(REAL));
471
472#if MULTI_COLOR_ORDER
473 B->color = 0;
474 B->IC = NULL;
475 B->ICMAP = NULL;
476#endif
477
478 if (use_openmp) {
479#ifdef _OPENMP
480 stride_i = n / nthreads;
481#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
482 {
483 myid = omp_get_thread_num();
484 mybegin = myid * stride_i;
485 if (myid < nthreads - 1)
486 myend = mybegin + stride_i;
487 else
488 myend = n;
489 for (i = mybegin; i < myend; ++i) {
490 col_flag[Js[i]] = i + 1;
491 }
492 }
493#endif
494 } else {
495 for (i = 0; i < n; ++i) col_flag[Js[i]] = i + 1;
496 }
497
498 // Count nonzeros for sub matrix and fill in
499 B->IA[0] = 0;
500 for (i = 0; i < m; ++i) {
501 for (k = A->IA[Is[i]]; k < A->IA[Is[i] + 1]; ++k) {
502 j = A->JA[k];
503 if (col_flag[j] > 0) {
504 B->JA[nnz] = col_flag[j] - 1;
505 B->val[nnz] = A->val[k];
506 nnz++;
507 }
508 } /* end for k */
509 B->IA[i + 1] = nnz;
510 } /* end for i */
511 B->nnz = nnz;
512
513 // re-allocate memory space
514 B->JA = (INT*)fasp_mem_realloc(B->JA, sizeof(INT) * nnz);
515 B->val = (REAL*)fasp_mem_realloc(B->val, sizeof(REAL) * nnz);
516
517 fasp_mem_free(col_flag);
518 col_flag = NULL;
519
520 return (status);
521}
522
537void fasp_dcsr_getdiag(INT n, const dCSRmat* A, dvector* diag)
538{
539 INT i, k, j, ibegin, iend;
540
541 SHORT nthreads = 1, use_openmp = FALSE;
542
543 if (n == 0 || n > A->row || n > A->col) n = MIN(A->row, A->col);
544
545#ifdef _OPENMP
546 if (n > OPENMP_HOLDS) {
547 use_openmp = TRUE;
548 nthreads = fasp_get_num_threads();
549 }
550#endif
551
552 fasp_dvec_alloc(n, diag);
553
554 if (use_openmp) {
555 INT mybegin, myend, myid;
556#ifdef _OPENMP
557#pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, k, j)
558#endif
559 for (myid = 0; myid < nthreads; myid++) {
560 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
561 for (i = mybegin; i < myend; i++) {
562 ibegin = A->IA[i];
563 iend = A->IA[i + 1];
564 for (k = ibegin; k < iend; ++k) {
565 j = A->JA[k];
566 if ((j - i) == 0) {
567 diag->val[i] = A->val[k];
568 break;
569 } // end if
570 } // end for k
571 } // end for i
572 }
573 } else {
574 for (i = 0; i < n; ++i) {
575 ibegin = A->IA[i];
576 iend = A->IA[i + 1];
577 for (k = ibegin; k < iend; ++k) {
578 j = A->JA[k];
579 if ((j - i) == 0) {
580 diag->val[i] = A->val[k];
581 break;
582 } // end if
583 } // end for k
584 } // end for i
585 }
586}
587
602void fasp_dcsr_getcol(const INT n, const dCSRmat* A, REAL* col)
603{
604 INT i, j, row_begin, row_end;
605 INT nrow = A->row, ncol = A->col;
606 INT status = FASP_SUCCESS;
607
608 SHORT nthreads = 1, use_openmp = FALSE;
609
610#ifdef _OPENMP
611 if (nrow > OPENMP_HOLDS) {
612 use_openmp = TRUE;
613 nthreads = fasp_get_num_threads();
614 }
615#endif
616
617 // check the column index n
618 if (n < 0 || n >= ncol) {
619 printf("### ERROR: Illegal column index %d! [%s]\n", n, __FUNCTION__);
620 status = ERROR_DUMMY_VAR;
621 goto FINISHED;
622 }
623
624 // get the column
625 if (use_openmp) {
626 INT mybegin, myend, myid;
627
628#ifdef _OPENMP
629#pragma omp parallel for private(myid, mybegin, myend, i, j, row_begin, row_end)
630#endif
631 for (myid = 0; myid < nthreads; myid++) {
632 fasp_get_start_end(myid, nthreads, nrow, &mybegin, &myend);
633 for (i = mybegin; i < myend; i++) {
634 col[i] = 0.0;
635 row_begin = A->IA[i];
636 row_end = A->IA[i + 1];
637 for (j = row_begin; j < row_end; ++j) {
638 if (A->JA[j] == n) {
639 col[i] = A->val[j];
640 }
641 } // end for j
642 } // end for i
643 }
644 } else {
645 for (i = 0; i < nrow; ++i) {
646 // set the entry to zero
647 col[i] = 0.0;
648 row_begin = A->IA[i];
649 row_end = A->IA[i + 1];
650 for (j = row_begin; j < row_end; ++j) {
651 if (A->JA[j] == n) {
652 col[i] = A->val[j];
653 }
654 } // end for j
655 } // end for i
656 }
657
658FINISHED:
659 fasp_chkerr(status, __FUNCTION__);
660}
661
681{
682 const INT num_rowsA = A->row;
683 REAL* A_data = A->val;
684 INT* A_i = A->IA;
685 INT* A_j = A->JA;
686
687 // Local variable
688 INT i, j;
689 INT tempi, row_size;
690 REAL tempd;
691
692#ifdef _OPENMP
693 // variables for OpenMP
694 INT myid, mybegin, myend, ibegin, iend;
695 INT nthreads = fasp_get_num_threads();
696#endif
697
698#if DEBUG_MODE > 0
699 printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
700#endif
701
702#ifdef _OPENMP
703 if (num_rowsA > OPENMP_HOLDS) {
704#pragma omp parallel for private(myid, i, j, ibegin, iend, tempi, tempd, mybegin, myend)
705 for (myid = 0; myid < nthreads; myid++) {
706 fasp_get_start_end(myid, nthreads, num_rowsA, &mybegin, &myend);
707 for (i = mybegin; i < myend; i++) {
708 ibegin = A_i[i];
709 iend = A_i[i + 1];
710 // check whether the first entry is already diagonal
711 if (A_j[ibegin] != i) {
712 for (j = ibegin + 1; j < iend; j++) {
713 if (A_j[j] == i) {
714#if DEBUG_MODE > 2
715 printf("### DEBUG: Switch entry_%d with entry_0\n", j);
716#endif
717 tempi = A_j[ibegin];
718 A_j[ibegin] = A_j[j];
719 A_j[j] = tempi;
720
721 tempd = A_data[ibegin];
722 A_data[ibegin] = A_data[j];
723 A_data[j] = tempd;
724 break;
725 }
726 }
727 if (j == iend) {
728 printf("### ERROR: Diagonal entry %d is zero!\n", i);
729 fasp_chkerr(ERROR_MISC, __FUNCTION__);
730 }
731 }
732 }
733 }
734 } else {
735#endif
736 for (i = 0; i < num_rowsA; i++) {
737 row_size = A_i[i + 1] - A_i[i];
738 // check whether the first entry is already diagonal
739 if (A_j[0] != i) {
740 for (j = 1; j < row_size; j++) {
741 if (A_j[j] == i) {
742#if DEBUG_MODE > 2
743 printf("### DEBUG: Switch entry_%d with entry_0\n", j);
744#endif
745 tempi = A_j[0];
746 A_j[0] = A_j[j];
747 A_j[j] = tempi;
748
749 tempd = A_data[0];
750 A_data[0] = A_data[j];
751 A_data[j] = tempd;
752
753 break;
754 }
755 }
756 if (j == row_size) {
757 printf("### ERROR: Diagonal entry %d is zero!\n", i);
758 fasp_chkerr(ERROR_MISC, __FUNCTION__);
759 }
760 }
761 A_j += row_size;
762 A_data += row_size;
763 }
764#ifdef _OPENMP
765 }
766#endif
767
768#if DEBUG_MODE > 0
769 printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);
770#endif
771}
772
787{
788 const INT m = A->row;
789 const INT *ia = A->IA, *ja = A->JA;
790 REAL* aj = A->val;
791
792 // Local variables
793 INT i, j, k, begin_row, end_row;
794 SHORT status = ERROR_UNKNOWN;
795
796 for (i = 0; i < m; ++i) {
797 begin_row = ia[i];
798 end_row = ia[i + 1];
799 for (k = begin_row; k < end_row; ++k) {
800 j = ja[k];
801 if (i == j) {
802 if (aj[k] < 0.0)
803 goto FINISHED;
804 else if (aj[k] < SMALLREAL)
805 aj[k] = value;
806 }
807 } // end for k
808 } // end for i
809
810 status = FASP_SUCCESS;
811
812FINISHED:
813 return status;
814}
815
827void fasp_icsr_cp(const iCSRmat* A, iCSRmat* B)
828{
829 B->row = A->row;
830 B->col = A->col;
831 B->nnz = A->nnz;
832
833 fasp_iarray_cp(A->row + 1, A->IA, B->IA);
834 fasp_iarray_cp(A->nnz, A->JA, B->JA);
835 fasp_iarray_cp(A->nnz, A->val, B->val);
836}
837
851void fasp_dcsr_cp(const dCSRmat* A, dCSRmat* B)
852{
853 B->row = A->row;
854 B->col = A->col;
855 B->nnz = A->nnz;
856
857 fasp_iarray_cp(A->row + 1, A->IA, B->IA);
858 fasp_iarray_cp(A->nnz, A->JA, B->JA);
859 fasp_darray_cp(A->nnz, A->val, B->val);
860}
861
875void fasp_icsr_trans(const iCSRmat* A, iCSRmat* AT)
876{
877 const INT n = A->row, m = A->col, nnz = A->nnz, m1 = m - 1;
878
879 // Local variables
880 INT i, j, k, p;
881 INT ibegin, iend;
882
883#if DEBUG_MODE > 1
884 printf("### DEBUG: m=%d, n=%d, nnz=%d\n", m, n, nnz);
885#endif
886
887 AT->row = m;
888 AT->col = n;
889 AT->nnz = nnz;
890
891 AT->IA = (INT*)fasp_mem_calloc(m + 1, sizeof(INT));
892
893 AT->JA = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
894
895 if (A->val) {
896 AT->val = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
897 } else {
898 AT->val = NULL;
899 }
900
901 // first pass: find the Number of nonzeros in the first m-1 columns of A
902 // Note: these Numbers are stored in the array AT.IA from 1 to m-1
903 fasp_iarray_set(m + 1, AT->IA, 0);
904
905 for (j = 0; j < nnz; ++j) {
906 i = A->JA[j]; // column Number of A = row Number of A'
907 if (i < m1) AT->IA[i + 2]++;
908 }
909
910 for (i = 2; i <= m; ++i) AT->IA[i] += AT->IA[i - 1];
911
912 // second pass: form A'
913 if (A->val != NULL) {
914 for (i = 0; i < n; ++i) {
915 ibegin = A->IA[i];
916 iend = A->IA[i + 1];
917 for (p = ibegin; p < iend; p++) {
918 j = A->JA[p] + 1;
919 k = AT->IA[j];
920 AT->JA[k] = i;
921 AT->val[k] = A->val[p];
922 AT->IA[j] = k + 1;
923 } // end for p
924 } // end for i
925 } else {
926 for (i = 0; i < n; ++i) {
927 ibegin = A->IA[i];
928 iend = A->IA[i + 1];
929 for (p = ibegin; p < iend; p++) {
930 j = A->JA[p] + 1;
931 k = AT->IA[j];
932 AT->JA[k] = i;
933 AT->IA[j] = k + 1;
934 } // end for p
935 } // end for i
936 } // end if
937}
938
953{
954 const INT n = A->row, m = A->col, nnz = A->nnz;
955
956 // Local variables
957 INT i, j, k, p;
958
959 AT->row = m;
960 AT->col = n;
961 AT->nnz = nnz;
962
963 AT->IA = (INT*)fasp_mem_calloc(m + 1, sizeof(INT));
964
965 AT->JA = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
966
967 if (A->val) {
968 AT->val = (REAL*)fasp_mem_calloc(nnz, sizeof(REAL));
969
970 } else {
971 AT->val = NULL;
972 }
973
974#if MULTI_COLOR_ORDER
975 AT->color = 0;
976 AT->IC = NULL;
977 AT->ICMAP = NULL;
978#endif
979
980 // first pass: find the Number of nonzeros in the first m-1 columns of A
981 // Note: these Numbers are stored in the array AT.IA from 1 to m-1
982
983 // fasp_iarray_set(m+1, AT->IA, 0);
984 memset(AT->IA, 0, sizeof(INT) * (m + 1));
985
986 for (j = 0; j < nnz; ++j) {
987 i = A->JA[j]; // column Number of A = row Number of A'
988 if (i < m - 1) AT->IA[i + 2]++;
989 }
990
991 for (i = 2; i <= m; ++i) AT->IA[i] += AT->IA[i - 1];
992
993 // second pass: form A'
994 if (A->val) {
995 for (i = 0; i < n; ++i) {
996 INT ibegin = A->IA[i], iend = A->IA[i + 1];
997 for (p = ibegin; p < iend; p++) {
998 j = A->JA[p] + 1;
999 k = AT->IA[j];
1000 AT->JA[k] = i;
1001 AT->val[k] = A->val[p];
1002 AT->IA[j] = k + 1;
1003 } // end for p
1004 } // end for i
1005 } else {
1006 for (i = 0; i < n; ++i) {
1007 INT ibegin = A->IA[i], iend1 = A->IA[i + 1];
1008 for (p = ibegin; p < iend1; p++) {
1009 j = A->JA[p] + 1;
1010 k = AT->IA[j];
1011 AT->JA[k] = i;
1012 AT->IA[j] = k + 1;
1013 } // end for p
1014 } // end of i
1015 } // end if
1016
1017 return FASP_SUCCESS;
1018}
1019
1037void fasp_dcsr_transpose(INT* row[2], INT* col[2], REAL* val[2], INT* nn, INT* tniz)
1038{
1039 const INT nca = nn[1]; // Number of columns
1040
1041 INT* izc = (INT*)fasp_mem_calloc(nn[1], sizeof(INT));
1042 INT* izcaux = (INT*)fasp_mem_calloc(nn[1], sizeof(INT));
1043
1044 // Local variables
1045 INT i, m, itmp;
1046
1047 // first pass: to set order right
1048 for (i = 0; i < tniz[0]; ++i) izc[col[0][i]]++;
1049
1050 izcaux[0] = 0;
1051 for (i = 1; i < nca; ++i) izcaux[i] = izcaux[i - 1] + izc[i - 1];
1052
1053 // second pass: form transpose
1054 memset(izc, 0, nca * sizeof(INT));
1055
1056 for (i = 0; i < tniz[0]; ++i) {
1057 m = col[0][i];
1058 itmp = izcaux[m] + izc[m];
1059 row[1][itmp] = m;
1060 col[1][itmp] = row[0][i];
1061 val[1][itmp] = val[0][i];
1062 izc[m]++;
1063 }
1064
1065 fasp_mem_free(izc);
1066 izc = NULL;
1067 fasp_mem_free(izcaux);
1068 izcaux = NULL;
1069}
1070
1086void fasp_dcsr_compress(const dCSRmat* A, dCSRmat* B, const REAL dtol)
1087{
1088 INT i, j, k;
1089 INT ibegin, iend1;
1090
1091 SHORT nthreads = 1, use_openmp = FALSE;
1092
1093#ifdef _OPENMP
1094 if (B->nnz > OPENMP_HOLDS) {
1095 use_openmp = TRUE;
1096 nthreads = fasp_get_num_threads();
1097 }
1098#endif
1099
1100 INT* index = (INT*)fasp_mem_calloc(A->nnz, sizeof(INT));
1101
1102 B->row = A->row;
1103 B->col = A->col;
1104
1105 B->IA = (INT*)fasp_mem_calloc(A->row + 1, sizeof(INT));
1106
1107 B->IA[0] = A->IA[0];
1108
1109 // first pass: determine the size of B
1110 k = 0;
1111 for (i = 0; i < A->row; ++i) {
1112 ibegin = A->IA[i];
1113 iend1 = A->IA[i + 1];
1114 for (j = ibegin; j < iend1; ++j)
1115 if (ABS(A->val[j]) > dtol) {
1116 index[k] = j;
1117 ++k;
1118 } /* end of j */
1119 B->IA[i + 1] = k;
1120 } /* end of i */
1121 B->nnz = k;
1122 B->JA = (INT*)fasp_mem_calloc(B->nnz, sizeof(INT));
1123 B->val = (REAL*)fasp_mem_calloc(B->nnz, sizeof(REAL));
1124
1125 // second pass: generate the index and element to B
1126 if (use_openmp) {
1127 INT myid, mybegin, myend;
1128#ifdef _OPENMP
1129#pragma omp parallel for private(myid, i, mybegin, myend)
1130#endif
1131 for (myid = 0; myid < nthreads; myid++) {
1132 fasp_get_start_end(myid, nthreads, B->nnz, &mybegin, &myend);
1133 for (i = mybegin; i < myend; ++i) {
1134 B->JA[i] = A->JA[index[i]];
1135 B->val[i] = A->val[index[i]];
1136 }
1137 }
1138 } else {
1139 for (i = 0; i < B->nnz; ++i) {
1140 B->JA[i] = A->JA[index[i]];
1141 B->val[i] = A->val[index[i]];
1142 }
1143 }
1144
1145 fasp_mem_free(index);
1146 index = NULL;
1147}
1148
1167{
1168 const INT row = A->row;
1169 const INT nnz = A->nnz;
1170
1171 INT i, j, k;
1172 INT ibegin, iend = A->IA[0];
1173 SHORT status = FASP_SUCCESS;
1174 k = 0;
1175 for (i = 0; i < row; ++i) {
1176 ibegin = iend;
1177 iend = A->IA[i + 1];
1178 for (j = ibegin; j < iend; ++j)
1179 if (ABS(A->val[j]) > dtol || i == A->JA[j]) {
1180 A->JA[k] = A->JA[j];
1181 A->val[k] = A->val[j];
1182 ++k;
1183 } /* end of j */
1184 A->IA[i + 1] = k;
1185 } /* end of i */
1186
1187 if (k <= nnz) {
1188 A->nnz = k;
1189 A->JA = (INT*)fasp_mem_realloc(A->JA, k * sizeof(INT));
1190 A->val = (REAL*)fasp_mem_realloc(A->val, k * sizeof(REAL));
1191 } else {
1192 printf("### WARNING: Size of compressed matrix is bigger than original!\n");
1193 status = ERROR_UNKNOWN;
1194 }
1195
1196 return (status);
1197}
1198
1212void fasp_dcsr_shift(dCSRmat* A, const INT offset)
1213{
1214 const INT nnz = A->nnz;
1215 const INT n = A->row + 1;
1216 INT i, *ai = A->IA, *aj = A->JA;
1217 SHORT nthreads = 1, use_openmp = FALSE;
1218
1219#ifdef _OPENMP
1220 if (MIN(n, nnz) > OPENMP_HOLDS) {
1221 use_openmp = TRUE;
1222 nthreads = fasp_get_num_threads();
1223 }
1224#endif
1225
1226 if (use_openmp) {
1227 INT myid, mybegin, myend;
1228#ifdef _OPENMP
1229#pragma omp parallel for private(myid, mybegin, myend, i)
1230#endif
1231 for (myid = 0; myid < nthreads; myid++) {
1232 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
1233 for (i = mybegin; i < myend; i++) {
1234 ai[i] += offset;
1235 }
1236 }
1237 } else {
1238 for (i = 0; i < n; ++i) ai[i] += offset;
1239 }
1240
1241 if (use_openmp) {
1242 INT myid, mybegin, myend;
1243#ifdef _OPENMP
1244#pragma omp parallel for private(myid, mybegin, myend, i)
1245#endif
1246 for (myid = 0; myid < nthreads; myid++) {
1247 fasp_get_start_end(myid, nthreads, nnz, &mybegin, &myend);
1248 for (i = mybegin; i < myend; i++) {
1249 aj[i] += offset;
1250 }
1251 }
1252 } else {
1253 for (i = 0; i < nnz; ++i) aj[i] += offset;
1254 }
1255}
1256
1271{
1272 // information about matrix A
1273 const INT n = A->row;
1274 const INT* IA = A->IA;
1275 const INT* JA = A->JA;
1276 REAL* val = A->val;
1277 REAL* work;
1278
1279 SHORT nthreads = 1, use_openmp = FALSE;
1280
1281 // local variables
1282 INT i, j, k, row_start, row_end;
1283
1284#ifdef _OPENMP
1285 if (n > OPENMP_HOLDS) {
1286 use_openmp = TRUE;
1287 nthreads = fasp_get_num_threads();
1288 }
1289#endif
1290
1291 if (diag->row != n) {
1292 printf("### ERROR: Size of diag = %d != size of matrix = %d!", diag->row, n);
1293 fasp_chkerr(ERROR_MISC, __FUNCTION__);
1294 }
1295
1296 // work space
1297 work = (REAL*)fasp_mem_calloc(n, sizeof(REAL));
1298
1299 if (use_openmp) {
1300 INT myid, mybegin, myend;
1301#ifdef _OPENMP
1302#pragma omp parallel for private(myid, mybegin, myend, i)
1303#endif
1304 for (myid = 0; myid < nthreads; myid++) {
1305 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
1306 for (i = mybegin; i < myend; i++) work[i] = sqrt(diag->val[i]);
1307 }
1308 } else {
1309 // square root of diagonal entries
1310 for (i = 0; i < n; i++) work[i] = sqrt(diag->val[i]);
1311 }
1312
1313 if (use_openmp) {
1314 INT myid, mybegin, myend;
1315#ifdef _OPENMP
1316#pragma omp parallel for private(myid, mybegin, myend, row_start, row_end, i, j, k)
1317#endif
1318 for (myid = 0; myid < nthreads; myid++) {
1319 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
1320 for (i = mybegin; i < myend; i++) {
1321 row_start = IA[i];
1322 row_end = IA[i + 1];
1323 for (j = row_start; j < row_end; j++) {
1324 k = JA[j];
1325 val[j] = val[j] / (work[i] * work[k]);
1326 }
1327 }
1328 }
1329 } else {
1330 // main loop
1331 for (i = 0; i < n; i++) {
1332 row_start = IA[i];
1333 row_end = IA[i + 1];
1334 for (j = row_start; j < row_end; j++) {
1335 k = JA[j];
1336 val[j] = val[j] / (work[i] * work[k]);
1337 }
1338 }
1339 }
1340
1341 // free work space
1342 fasp_mem_free(work);
1343 work = NULL;
1344}
1345
1358{
1359 // local variable
1360 dCSRmat AT;
1361
1362 // return variable
1363 dCSRmat SA;
1364
1365#if MULTI_COLOR_ORDER
1366 AT.IC = NULL;
1367 SA.IC = NULL;
1368 AT.ICMAP = NULL;
1369 SA.ICMAP = NULL;
1370#endif
1371
1372 // get the transpose of A
1373 fasp_dcsr_trans(A, &AT);
1374
1375 // get symmetrized A
1376 fasp_blas_dcsr_add(A, 1.0, &AT, 0.0, &SA);
1377
1378 // clean
1379 fasp_dcsr_free(&AT);
1380
1381 // return
1382 return SA;
1383}
1384
1417{
1418 /* tested for permutation and transposition */
1419 /* transpose or permute; if A.val is null ===> transpose the
1420 structure only */
1421 const INT n = A->row, m = A->col, nnz = A->nnz;
1422 const INT * ia = NULL, *ja = NULL;
1423 const REAL* a = NULL;
1424 INT m1 = m + 1;
1425 ia = A->IA;
1426 ja = A->JA;
1427 a = A->val;
1428 /* introducing few extra pointers hould not hurt too much the speed */
1429 INT * iat = NULL, *jat = NULL;
1430 REAL* at = NULL;
1431
1432 /* loop variables */
1433 INT i, j, jp, pi, iabeg, iaend, k;
1434
1435 /* initialize */
1436 AT->row = m;
1437 AT->col = n;
1438 AT->nnz = nnz;
1439
1440 /* all these should be allocated or change this to allocate them here */
1441 iat = AT->IA;
1442 jat = AT->JA;
1443 at = AT->val;
1444 for (i = 0; i < m1; ++i) iat[i] = 0;
1445 iaend = ia[n];
1446 for (i = 0; i < iaend; ++i) {
1447 j = ja[i] + 2;
1448 if (j < m1) iat[j]++;
1449 }
1450 iat[0] = 0;
1451 iat[1] = 0;
1452 if (m != 1) {
1453 for (i = 2; i < m1; ++i) {
1454 iat[i] += iat[i - 1];
1455 }
1456 }
1457
1458 if (p && a) {
1459 /* so we permute and also use matrix entries */
1460 for (i = 0; i < n; ++i) {
1461 pi = p[i];
1462 iabeg = ia[pi];
1463 iaend = ia[pi + 1];
1464 if (iaend > iabeg) {
1465 for (jp = iabeg; jp < iaend; ++jp) {
1466 j = ja[jp] + 1;
1467 k = iat[j];
1468 jat[k] = i;
1469 at[k] = a[jp];
1470 iat[j] = k + 1;
1471 }
1472 }
1473 }
1474 } else if (a && !p) {
1475 /* transpose values, no permutation */
1476 for (i = 0; i < n; ++i) {
1477 iabeg = ia[i];
1478 iaend = ia[i + 1];
1479 if (iaend > iabeg) {
1480 for (jp = iabeg; jp < iaend; ++jp) {
1481 j = ja[jp] + 1;
1482 k = iat[j];
1483 jat[k] = i;
1484 at[k] = a[jp];
1485 iat[j] = k + 1;
1486 }
1487 }
1488 }
1489 } else if (!a && p) {
1490 /* Only integers and permutation (only a is null) */
1491 for (i = 0; i < n; ++i) {
1492 pi = p[i];
1493 iabeg = ia[pi];
1494 iaend = ia[pi + 1];
1495 if (iaend > iabeg) {
1496 for (jp = iabeg; jp < iaend; ++jp) {
1497 j = ja[jp] + 1;
1498 k = iat[j];
1499 jat[k] = i;
1500 iat[j] = k + 1;
1501 }
1502 }
1503 }
1504 } else {
1505 /* Only integers and no permutation (both a and p are null */
1506 for (i = 0; i < n; ++i) {
1507 iabeg = ia[i];
1508 iaend = ia[i + 1];
1509 if (iaend > iabeg) {
1510 for (jp = iabeg; jp < iaend; ++jp) {
1511 j = ja[jp] + 1;
1512 k = iat[j];
1513 jat[k] = i;
1514 iat[j] = k + 1;
1515 }
1516 }
1517 }
1518 }
1519
1520 return;
1521}
1522
1541{
1542 const INT n = A->row, nnz = A->nnz;
1543 dCSRmat Aperm1, Aperm;
1544
1545 Aperm1 = fasp_dcsr_create(n, n, nnz);
1546 Aperm = fasp_dcsr_create(n, n, nnz);
1547
1548 fasp_dcsr_transz(A, p, &Aperm1);
1549 fasp_dcsr_transz(&Aperm1, p, &Aperm);
1550
1551 // clean up
1552 fasp_dcsr_free(&Aperm1);
1553
1554 return (Aperm);
1555}
1556
1571void fasp_dcsr_sortz(dCSRmat* A, const SHORT isym)
1572{
1573 const INT n = A->row, m = A->col, nnz = A->nnz;
1574 dCSRmat AT = fasp_dcsr_create(m, n, nnz);
1575
1576 /* watch carefully who is a pointer and who is not in fasp_dcsr_transz() */
1577 fasp_dcsr_transz(A, NULL, &AT);
1578
1579 /* if the matrix is symmetric, then only one transpose is needed
1580 and now we just copy */
1581 if ((m == n) && (isym))
1582 fasp_dcsr_cp(&AT, A);
1583 else
1584 fasp_dcsr_transz(&AT, NULL, A);
1585
1586 // clean up
1587 fasp_dcsr_free(&AT);
1588}
1589
1602void fasp_dcsr_multicoloring(dCSRmat* A, INT* flags, INT* groups)
1603{
1604#if MULTI_COLOR_ORDER
1605 INT k, i, j, pre, group;
1606 INT iend;
1607 INT icount;
1608 INT front, rear;
1609 INT n = A->row;
1610 INT* IA = A->IA;
1611 INT* JA = A->JA;
1612 INT* cq = (INT*)malloc(sizeof(INT) * (n + 1));
1613 INT* newr = (INT*)malloc(sizeof(INT) * (n + 1));
1614
1615#ifdef _OPENMP
1616#pragma omp parallel for private(k)
1617#endif
1618 for (k = 0; k < n; k++) cq[k] = k;
1619
1620 group = 0;
1621 for (k = 0; k < n; k++) {
1622 if ((IA[k + 1] - IA[k]) > group) group = IA[k + 1] - IA[k];
1623 }
1624
1625 A->IC = (INT*)malloc(sizeof(INT) * (group + 2));
1626 A->ICMAP = (INT*)malloc(sizeof(INT) * (n));
1627
1628 front = n - 1;
1629 rear = n - 1;
1630
1631 memset(newr, -1, sizeof(INT) * (n + 1));
1632 memset(A->ICMAP, 0, sizeof(INT) * n);
1633
1634 group = 0;
1635 icount = 0;
1636 A->IC[0] = 0;
1637 pre = 0;
1638
1639 do {
1640 front++;
1641 if (front == n) front = 0;
1642 i = cq[front];
1643 if (i <= pre) {
1644 A->IC[group] = icount;
1645 A->ICMAP[icount] = i;
1646 group++;
1647 icount++;
1648 iend = IA[i + 1];
1649 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
1650 } else if (newr[i] == group) {
1651 rear++;
1652 if (rear == n) rear = 0;
1653 cq[rear] = i;
1654 } else {
1655 A->ICMAP[icount] = i;
1656 icount++;
1657 iend = IA[i + 1];
1658 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
1659 }
1660 pre = i;
1661
1662 } while (rear != front);
1663
1664 A->IC[group] = icount;
1665 A->color = group;
1666 free(cq);
1667 free(newr);
1668 *groups = group;
1669#else
1670 printf("### ERROR: %s has not been defined!\n", __FUNCTION__);
1671#endif
1672}
1673
1687void dCSRmat_Multicoloring(dCSRmat* A, INT* rowmax, INT* groups)
1688{
1689#if MULTI_COLOR_ORDER
1690 INT k, i, j, pre, group;
1691 INT igold, iend, iavg;
1692 INT icount;
1693 INT front, rear;
1694 INT n = A->row;
1695 INT* IA = A->IA;
1696 INT* JA = A->JA;
1697
1698 INT* cq = (INT*)malloc(sizeof(INT) * (n + 1));
1699 INT* newr = (INT*)malloc(sizeof(INT) * (n + 1));
1700
1701 for (k = 0; k < n; k++) cq[k] = k;
1702
1703 group = 0;
1704
1705 for (k = 0; k < n; k++) {
1706 if ((IA[k + 1] - IA[k]) > group) group = IA[k + 1] - IA[k];
1707 }
1708 *rowmax = group;
1709#if 0
1710 iavg = IA[n]/n ;
1711 igold = (INT)MAX(iavg,group*0.618) +1;
1712 igold =group ;
1713#endif
1714
1715 A->IC = (INT*)malloc(sizeof(INT) * (group + 2));
1716 A->ICMAP = (INT*)malloc(sizeof(INT) * (n));
1717
1718 front = n - 1;
1719 rear = n - 1;
1720
1721 memset(newr, -1, sizeof(INT) * (n + 1));
1722 memset(A->ICMAP, 0, sizeof(INT) * n);
1723
1724 group = 0;
1725 icount = 0;
1726 A->IC[0] = 0;
1727 pre = 0;
1728
1729 do {
1730 // front = (front+1)%n;
1731 front++;
1732 if (front == n) front = 0; // front = front < n ? front : 0 ;
1733 i = cq[front];
1734
1735 if (i <= pre) {
1736 A->IC[group] = icount;
1737 A->ICMAP[icount] = i;
1738 group++;
1739 icount++;
1740 iend = IA[i + 1];
1741 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
1742 } else if (newr[i] == group) {
1743 // rear = (rear +1)%n;
1744 rear++;
1745 if (rear == n) rear = 0;
1746 cq[rear] = i;
1747 } else {
1748 A->ICMAP[icount] = i;
1749 icount++;
1750 iend = IA[i + 1];
1751 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
1752 }
1753 pre = i;
1754 } while (rear != front);
1755
1756 A->IC[group] = icount;
1757 A->color = group;
1758
1759#if 0
1760 for(i=0; i < A->color; i++ ){
1761 for(j=A -> IC[i]; j < A-> IC[i+1];j++)
1762 printf("color %d ICMAP[%d] = %d \n", i,j,A-> ICMAP[j]);
1763 printf( "A.color = %d A.row= %d %d\n",A -> color,A -> row,A-> IC[i+1] - A-> IC[i] );
1764 getchar();
1765 }
1766#endif
1767
1768 // printf(" Max Row Numbers %d avg %d igold %d max %d %d\n", group, iavg, igold,
1769 // (INT)MAX(iavg,group*0.618),A->IA[n]/n );
1770 free(cq);
1771 free(newr);
1772 *groups = group;
1773#endif
1774}
1775
1776#if MULTI_COLOR_ORDER
1777static void generate_S_theta(dCSRmat* A, iCSRmat* S, REAL theta)
1778{
1779 const INT row = A->row, col = A->col;
1780 const INT row_plus_one = row + 1;
1781 const INT nnz = A->IA[row] - A->IA[0];
1782
1783 INT index, i, j, begin_row, end_row;
1784 INT * ia = A->IA, *ja = A->JA;
1785 REAL* aj = A->val;
1786
1787 // get the diagnal entry of A
1788 // dvector diag; fasp_dcsr_getdiag(0, A, &diag);
1789
1790 /* generate S */
1791 REAL row_abs_sum;
1792
1793 // copy the structure of A to S
1794 S->row = row;
1795 S->col = col;
1796 S->nnz = nnz;
1797 S->val = NULL;
1798
1799 S->IA = (INT*)fasp_mem_calloc(row_plus_one, sizeof(INT));
1800
1801 S->JA = (INT*)fasp_mem_calloc(nnz, sizeof(INT));
1802
1803 fasp_iarray_cp(row_plus_one, ia, S->IA);
1804 fasp_iarray_cp(nnz, ja, S->JA);
1805
1806#ifdef _OPENMP
1807#pragma omp parallel for private(i, j, begin_row, end_row, row_abs_sum)
1808#endif
1809 for (i = 0; i < row; ++i) {
1810 /* compute scaling factor and row sum */
1811 row_abs_sum = 0;
1812 begin_row = ia[i];
1813 end_row = ia[i + 1];
1814 for (j = begin_row; j < end_row; j++) {
1815 row_abs_sum += ABS(aj[j]);
1816 }
1817 row_abs_sum = row_abs_sum * theta;
1818
1819 /* deal with the element of S */
1820 for (j = begin_row; j < end_row; j++) {
1821 if ((row_abs_sum >= ABS(aj[j])) && (ja[j] != i)) {
1822 S->JA[j] = -1;
1823 }
1824 }
1825 } // end for i
1826
1827 /* Compress the strength matrix */
1828 index = 0;
1829 for (i = 0; i < row; ++i) {
1830 S->IA[i] = index;
1831 begin_row = ia[i];
1832 end_row = ia[i + 1] - 1;
1833 for (j = begin_row; j <= end_row; j++) {
1834 if (S->JA[j] > -1) {
1835 S->JA[index] = S->JA[j];
1836 index++;
1837 }
1838 }
1839 }
1840
1841 if (index > 0) {
1842 S->IA[row] = index;
1843 S->nnz = index;
1844 S->JA = (INT*)fasp_mem_realloc(S->JA, index * sizeof(INT));
1845 } else {
1846 S->nnz = 0;
1847 S->JA = NULL;
1848 }
1849}
1850#endif
1851
1868 INT* groups)
1869{
1870#if MULTI_COLOR_ORDER
1871 INT k, i, j, pre, group;
1872 INT igold, iend, iavg;
1873 INT icount;
1874 INT front, rear;
1875 INT n = A->row;
1876 INT* IA = S->IA;
1877 INT* JA = S->JA;
1878
1879 INT* cq = (INT*)malloc(sizeof(INT) * (n + 1));
1880 INT* newr = (INT*)malloc(sizeof(INT) * (n + 1));
1881
1882#ifdef _OPENMP
1883#pragma omp parallel for private(k)
1884#endif
1885 for (k = 0; k < n; k++) {
1886 cq[k] = k;
1887 }
1888 group = 0;
1889 for (k = 0; k < n; k++) {
1890 if ((IA[k + 1] - IA[k]) > group) group = IA[k + 1] - IA[k];
1891 }
1892 *flags = group;
1893#if 1
1894 iavg = IA[n] / n;
1895 igold = (INT)MAX(iavg, group * 0.618) + 1;
1896 igold = group;
1897#endif
1898
1899 A->IC = (INT*)malloc(sizeof(INT) * (group + 2));
1900 A->ICMAP = (INT*)malloc(sizeof(INT) * (n + 1));
1901
1902 front = n - 1;
1903 rear = n - 1;
1904
1905 memset(newr, -1, sizeof(INT) * (n + 1));
1906 memset(A->ICMAP, 0, sizeof(INT) * n);
1907
1908 group = 0;
1909 icount = 0;
1910 A->IC[0] = 0;
1911 pre = 0;
1912
1913 do {
1914 // front = (front+1)%n;
1915 front++;
1916 if (front == n) front = 0; // front = front < n ? front : 0 ;
1917 i = cq[front];
1918
1919 if (i <= pre) {
1920 A->IC[group] = icount;
1921 A->ICMAP[icount] = i;
1922 group++;
1923 icount++;
1924#if 0
1925 if ((IA[i+1]-IA[i]) > igold)
1926 iend = MIN(IA[i+1], (IA[i] + igold));
1927 else
1928#endif
1929 iend = IA[i + 1];
1930 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
1931 } else if (newr[i] == group) {
1932 // rear = (rear +1)%n;
1933 rear++;
1934 if (rear == n) rear = 0;
1935 cq[rear] = i;
1936 } else {
1937 A->ICMAP[icount] = i;
1938 icount++;
1939#if 0
1940 if ((IA[i+1] - IA[i]) > igold) iend =MIN(IA[i+1], (IA[i] + igold));
1941 else
1942#endif
1943 iend = IA[i + 1];
1944 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
1945 }
1946 pre = i;
1947
1948 } while (rear != front);
1949
1950 A->IC[group] = icount;
1951 A->color = group;
1952
1953#if 0
1954 for(i=0; i < A->color; i++ ){
1955 for(j=A -> IC[i]; j < A-> IC[i+1];j++)
1956 printf("color %d ICMAP[%d] = %d \n", i,j,A-> ICMAP[j]);
1957 printf( "A.color = %d A.row= %d %d\n",A -> color,A -> row,A-> IC[i+1] - A-> IC[i] );
1958 getchar();
1959 }
1960#endif
1961 printf(" Max Row Numbers %d avg %d igold %d max %d %d\n", group, iavg, igold,
1962 (INT)MAX(iavg, group * 0.618), A->IA[n] / n);
1963 free(cq);
1964 free(newr);
1965 *groups = group;
1966#endif
1967}
1968
1984void dCSRmat_Multicoloring_Theta(dCSRmat* A, REAL theta, INT* rowmax, INT* groups)
1985{
1986#if MULTI_COLOR_ORDER
1987 INT k, i, j, pre, group;
1988 INT igold, iend, iavg;
1989 INT icount;
1990 INT front, rear;
1991 INT n = A->row;
1992 //---------------------------------------------------------------------------
1993 iCSRmat S;
1994 INT * IA, *JA;
1995 if (theta > 0 && theta < 1.0) {
1996 generate_S_theta(A, &S, theta);
1997 IA = S.IA;
1998 JA = S.JA;
1999 } else if (theta == 1.0) {
2000
2001 A->IC = (INT*)malloc(sizeof(INT) * 2);
2002 A->ICMAP = (INT*)malloc(sizeof(INT) * (n + 1));
2003 A->IC[0] = 0;
2004 A->IC[1] = n;
2005#ifdef _OPENMP
2006#pragma omp parallel for private(k)
2007#endif
2008 for (k = 0; k < n; k++) A->ICMAP[k] = k;
2009
2010 A->color = 1;
2011 *groups = 1;
2012 *rowmax = 1;
2013 printf("Theta = %lf \n", theta);
2014
2015 return;
2016
2017 } else {
2018 IA = A->IA;
2019 JA = A->JA;
2020 }
2021 //---------------------------------------------------------------------------
2022 INT* cq = (INT*)malloc(sizeof(INT) * (n + 1));
2023 INT* newr = (INT*)malloc(sizeof(INT) * (n + 1));
2024
2025#ifdef _OPENMP
2026#pragma omp parallel for private(k)
2027#endif
2028 for (k = 0; k < n; k++) {
2029 cq[k] = k;
2030 }
2031 group = 0;
2032 for (k = 0; k < n; k++) {
2033 if ((A->IA[k + 1] - A->IA[k]) > group) group = A->IA[k + 1] - A->IA[k];
2034 }
2035 *rowmax = group;
2036
2037#if 0
2038 iavg = IA[n]/n ;
2039 igold = (INT)MAX(iavg,group*0.618) +1;
2040 igold = group ;
2041#endif
2042
2043 A->IC = (INT*)malloc(sizeof(INT) * (group + 2));
2044 A->ICMAP = (INT*)malloc(sizeof(INT) * (n + 1));
2045
2046 front = n - 1;
2047 rear = n - 1;
2048
2049 memset(newr, -1, sizeof(INT) * (n + 1));
2050 memset(A->ICMAP, 0, sizeof(INT) * n);
2051
2052 group = 0;
2053 icount = 0;
2054 A->IC[0] = 0;
2055 pre = 0;
2056
2057 do {
2058 // front = (front+1)%n;
2059 front++;
2060 if (front == n) front = 0; // front = front < n ? front : 0 ;
2061 i = cq[front];
2062
2063 if (i <= pre) {
2064 A->IC[group] = icount;
2065 A->ICMAP[icount] = i;
2066 group++;
2067 icount++;
2068#if 0
2069 if ((IA[i+1]-IA[i]) > igold)
2070 iend = MIN(IA[i+1], (IA[i] + igold));
2071 else
2072#endif
2073 iend = IA[i + 1];
2074 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
2075 } else if (newr[i] == group) {
2076 // rear = (rear +1)%n;
2077 rear++;
2078 if (rear == n) rear = 0;
2079 cq[rear] = i;
2080 } else {
2081 A->ICMAP[icount] = i;
2082 icount++;
2083#if 0
2084 if ((IA[i+1] - IA[i]) > igold) iend =MIN(IA[i+1], (IA[i] + igold));
2085 else
2086#endif
2087 iend = IA[i + 1];
2088 for (j = IA[i]; j < iend; j++) newr[JA[j]] = group;
2089 }
2090 pre = i;
2091
2092 // printf("pre = %d\n",pre);
2093 } while (rear != front);
2094
2095 // printf("group\n");
2096 A->IC[group] = icount;
2097 A->color = group;
2098
2099#if 0
2100 for(i=0; i < A->color; i++ ){
2101 for(j=A -> IC[i]; j < A-> IC[i+1];j++)
2102 printf("color %d ICMAP[%d] = %d \n", i,j,A-> ICMAP[j]);
2103 printf( "A.color = %d A.row= %d %d\n",A -> color,A -> row,A-> IC[i+1] - A-> IC[i] );
2104 getchar();
2105 }
2106 printf(" Max Row Numbers %d avg %d igold %d max %d %d\n", group, iavg, igold, (INT)MAX(iavg,group*0.618),A->IA[n]/n );
2107#endif
2108 free(cq);
2109 free(newr);
2110 if (theta > 0) {
2111 fasp_mem_free(S.IA);
2112 fasp_mem_free(S.JA);
2113 }
2114 *groups = group;
2115#endif
2116 return;
2117}
2118
2119/*
2120 * TODO: Why it is not in ItrSmootherCSR.c? Move?
2121 * TODO: Add Doxygen!
2122 */
2123void fasp_smoother_dcsr_gs_multicolor(dvector* u, dCSRmat* A, dvector* b, INT L,
2124 const INT order)
2125{
2126#if MULTI_COLOR_ORDER
2127 const INT nrow = A->row; // number of rows
2128 const INT * ia = A->IA, *ja = A->JA;
2129 const REAL *aj = A->val, *bval = b->val;
2130 REAL* uval = u->val;
2131
2132 INT i, j, k, begin_row, end_row;
2133 REAL t, d = 0.0;
2134
2135 INT myid, mybegin, myend;
2136 INT color = A->color;
2137 INT* IC = A->IC;
2138 INT* ICMAP = A->ICMAP;
2139 INT I;
2140
2141 // From color to 0 order
2142 if (order == -1) {
2143 while (L--) {
2144 for (myid = color - 1; myid > -1; myid--) {
2145 mybegin = IC[myid];
2146 myend = IC[myid + 1];
2147#ifdef _OPENMP
2148#pragma omp parallel for private(I, i, t, begin_row, end_row, k, j, d)
2149#endif
2150 for (I = mybegin; I < myend; I++) {
2151 i = ICMAP[I];
2152 t = bval[i];
2153 begin_row = ia[i], end_row = ia[i + 1];
2154 for (k = begin_row; k < end_row; k++) {
2155 j = ja[k];
2156 if (i != j)
2157 t -= aj[k] * uval[j];
2158 else
2159 d = aj[k];
2160 } // end for k
2161 if (ABS(d) > SMALLREAL) uval[i] = t / d;
2162 } // end for I
2163 } // end for myid
2164 } // end while
2165 }
2166 // From 0 to color order
2167 else {
2168 while (L--) {
2169 for (myid = 0; myid < color; myid++) {
2170 mybegin = IC[myid];
2171 myend = IC[myid + 1];
2172#ifdef _OPENMP
2173#pragma omp parallel for private(I, i, t, begin_row, end_row, k, j, d)
2174#endif
2175 for (I = mybegin; I < myend; I++) {
2176 i = ICMAP[I];
2177 t = bval[i];
2178 begin_row = ia[i], end_row = ia[i + 1];
2179 for (k = begin_row; k < end_row; k++) {
2180 j = ja[k];
2181 if (i != j)
2182 t -= aj[k] * uval[j];
2183 else
2184 d = aj[k];
2185 } // end for k
2186 if (ABS(d) > SMALLREAL) uval[i] = t / d;
2187 } // end for I
2188 } // end for myid
2189 } // end while
2190 } // end if order
2191#else
2192 printf("### ERROR: MULTI_COLOR_ORDER has not been turn on!!! \n");
2193#endif
2194 return;
2195}
2196
2197/*---------------------------------*/
2198/*-- End of File --*/
2199/*---------------------------------*/
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:98
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: AuxArray.c:210
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
Definition: AuxArray.c:227
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:152
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
Definition: AuxMemory.c:113
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
Definition: AuxMessage.c:213
void fasp_aux_iQuickSortIndex(INT *a, INT left, INT right, INT *index)
Reorder the index of (INT type) so that 'a' is in ascending order.
Definition: AuxSort.c:286
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:93
void fasp_dvec_alloc(const INT m, dvector *u)
Create dvector data space of REAL type.
Definition: AuxVector.c:105
void fasp_dcsr_diagpref(dCSRmat *A)
Re-order the column and data arrays of a CSR matrix, so that the first entry in each row is the diago...
Definition: BlaSparseCSR.c:680
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:47
void fasp_dcsr_shift(dCSRmat *A, const INT offset)
Re-index a REAL matrix in CSR format to make the index starting from 0 or 1.
void fasp_icsr_trans(const iCSRmat *A, iCSRmat *AT)
Find transpose of iCSRmat matrix A.
Definition: BlaSparseCSR.c:875
void dCSRmat_Multicoloring_Theta(dCSRmat *A, REAL theta, INT *rowmax, INT *groups)
Use the greedy multicoloring algorithm to get color groups for for the adjacency graph of A.
SHORT fasp_dcsr_getblk(const dCSRmat *A, const INT *Is, const INT *Js, const INT m, const INT n, dCSRmat *B)
Get a sub CSR matrix of A with specified rows and columns.
Definition: BlaSparseCSR.c:446
void fasp_dcsr_compress(const dCSRmat *A, dCSRmat *B, const REAL dtol)
Compress a CSR matrix A and store in CSR matrix B by dropping small entries abs(aij)<=dtol.
void fasp_dcsr_free(dCSRmat *A)
Free CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:184
void dCSRmat_Multicoloring_Strong_Coupled(dCSRmat *A, iCSRmat *S, INT *flags, INT *groups)
Use the greedy multicoloring algorithm to get color groups for the adjacency graph of A.
void fasp_dcsr_transz(dCSRmat *A, INT *p, dCSRmat *AT)
Generalized transpose of A: (n x m) matrix given in dCSRmat format.
SHORT fasp_dcsr_regdiag(dCSRmat *A, const REAL value)
Regularize diagonal entries of a CSR sparse matrix.
Definition: BlaSparseCSR.c:786
void fasp_dcsr_sort(dCSRmat *A)
Sort each row of A in ascending order w.r.t. column indices.
Definition: BlaSparseCSR.c:385
void fasp_icsr_free(iCSRmat *A)
Free CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:219
void fasp_dcsr_multicoloring(dCSRmat *A, INT *flags, INT *groups)
Use the greedy multi-coloring to get color groups of the adjacency graph of A.
void dCSRmat_Multicoloring(dCSRmat *A, INT *rowmax, INT *groups)
Use the greedy multicoloring algorithm to get color groups for for the adjacency graph of A.
void fasp_dcsr_cp(const dCSRmat *A, dCSRmat *B)
copy a dCSRmat to a new one B=A
Definition: BlaSparseCSR.c:851
void fasp_dcsr_transpose(INT *row[2], INT *col[2], REAL *val[2], INT *nn, INT *tniz)
Transpose of a dCSRmat matrix.
SHORT fasp_dcsr_compress_inplace(dCSRmat *A, const REAL dtol)
Compress a CSR matrix A IN PLACE by dropping small entries abs(aij)<=dtol.
iCSRmat fasp_icsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:96
void fasp_dcsr_getcol(const INT n, const dCSRmat *A, REAL *col)
Get the n-th column of a CSR matrix A.
Definition: BlaSparseCSR.c:602
INT fasp_dcsr_bandwidth(const dCSRmat *A)
Get bandwith of matrix.
Definition: BlaSparseCSR.c:245
void fasp_dcsr_getdiag(INT n, const dCSRmat *A, dvector *diag)
Get first n diagonal entries of a CSR matrix A.
Definition: BlaSparseCSR.c:537
dCSRmat fasp_dcsr_permz(dCSRmat *A, INT *p)
Permute rows and cols of A, i.e. A=PAP' by the ordering in p.
void fasp_icsr_cp(const iCSRmat *A, iCSRmat *B)
Copy a iCSRmat to a new one B=A.
Definition: BlaSparseCSR.c:827
void fasp_dcsr_alloc(const INT m, const INT n, const INT nnz, dCSRmat *A)
Allocate CSR sparse matrix memory space.
Definition: BlaSparseCSR.c:138
void fasp_dcsr_symdiagscale(dCSRmat *A, const dvector *diag)
Symmetric diagonal scaling D^{-1/2}AD^{-1/2}.
void fasp_dcsr_sortz(dCSRmat *A, const SHORT isym)
Sort each row of A in ascending order w.r.t. column indices.
dCSRmat fasp_dcsr_sympart(dCSRmat *A)
Get symmetric part of a dCSRmat matrix.
dCSRmat fasp_dcsr_perm(dCSRmat *A, INT *P)
Apply permutation of A, i.e. Aperm=PAP' by the orders given in P.
Definition: BlaSparseCSR.c:275
INT fasp_dcsr_trans(const dCSRmat *A, dCSRmat *AT)
Find transpose of dCSRmat matrix A.
Definition: BlaSparseCSR.c:952
SHORT fasp_blas_dcsr_add(const dCSRmat *A, const REAL alpha, const dCSRmat *B, const REAL beta, dCSRmat *C)
compute C = alpha*A + beta*B in CSR format
Definition: BlaSpmvCSR.c:60
Main header file for the FASP project.
#define MIN(a, b)
Definition: fasp.h:83
#define REAL
Definition: fasp.h:75
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:71
#define ABS(a)
Definition: fasp.h:84
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:82
#define INT
Definition: fasp.h:72
#define ERROR_MISC
Definition: fasp_const.h:28
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:19
#define OPENMP_HOLDS
Definition: fasp_const.h:269
#define ERROR_DUMMY_VAR
Definition: fasp_const.h:33
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62
#define ERROR_UNKNOWN
Definition: fasp_const.h:56
#define SMALLREAL
Definition: fasp_const.h:256
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:151
INT col
column of matrix A, n
Definition: fasp.h:157
REAL * val
nonzero entries of A
Definition: fasp.h:169
INT row
row number of matrix A, m
Definition: fasp.h:154
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:163
INT nnz
number of nonzero entries
Definition: fasp.h:160
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:166
Vector with n entries of REAL type.
Definition: fasp.h:354
REAL * val
actual vector entries
Definition: fasp.h:360
INT row
number of rows
Definition: fasp.h:357
Sparse matrix of INT type in CSR format.
Definition: fasp.h:190
INT col
column of matrix A, n
Definition: fasp.h:196
INT row
row number of matrix A, m
Definition: fasp.h:193
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:202
INT nnz
number of nonzero entries
Definition: fasp.h:199
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:205
INT * val
nonzero entries of A
Definition: fasp.h:208