Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
Loading...
Searching...
No Matches
BlaILUSetupBSR.c
Go to the documentation of this file.
1
16#include <math.h>
17#include <time.h>
18
19#include "fasp.h"
20#include "fasp_functs.h"
21
22/*---------------------------------*/
23/*-- Declare Private Functions --*/
24/*---------------------------------*/
25
26static INT numfactor (dBSRmat *, REAL *, INT *, INT *);
27static INT numfactor_mulcol (dBSRmat *, REAL *, INT *, INT *, INT, INT *, INT *);
28static INT numfactor_levsch (dBSRmat *, REAL *, INT *, INT *, INT, INT *, INT *);
29static void generate_S_theta(dCSRmat *, iCSRmat *, REAL);
30// static void topologic_sort_ILU (ILU_data *);
31// static void mulcol_independ_set (AMG_data *, INT);
32
33/*---------------------------------*/
34/*-- Public Functions --*/
35/*---------------------------------*/
36
56 ILU_data *iludata,
57 ILU_param *iluparam)
58{
59
60 const SHORT prtlvl = iluparam->print_level;
61 const INT n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;
62
63 // local variables
64 INT lfil = iluparam->ILU_lfil;
65 INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
66 SHORT status = FASP_SUCCESS;
67 REAL setup_start, setup_end, setup_duration;
68
69#if DEBUG_MODE > 0
70 printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
71 printf("### DEBUG: m = %d, n = %d, nnz = %d\n", A->ROW, n, nnz);
72#endif
73
74 fasp_gettime(&setup_start);
75
76 // Expected amount of memory for ILU needed and allocate memory
77 iwk = (lfil+2)*nnz;
78
79#if DEBUG_MODE > 0
80 if (iluparam->ILU_type == ILUtp) {
81 printf("### WARNING: iludata->type = %d not supported!\n",
82 iluparam->ILU_type);
83 }
84#endif
85
86 // setup preconditioner
87 iludata->type = 0; // Must be initialized
88 iludata->iperm = NULL;
89 iludata->A = NULL; // No need for BSR matrix
90 iludata->row = iludata->col = n;
91 iludata->nb = nb;
92 iludata->ilevL = iludata->jlevL = NULL;
93 iludata->ilevU = iludata->jlevU = NULL;
94
95 ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));
96 uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));
97
98#if DEBUG_MODE > 1
99 printf("### DEBUG: symbolic factorization ... \n");
100#endif
101
102 // ILU decomposition
103 // (1) symbolic factoration
104 fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);
105
106 if ( ierr != 0 ) {
107 printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
108 status = ERROR_SOLVER_ILUSETUP;
109 goto FINISHED;
110 }
111
112 iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));
113
114#if DEBUG_MODE > 1
115 printf("### DEBUG: numerical factorization ... \n");
116#endif
117
118 // (2) numerical factoration
119 status = numfactor(A, iludata->luval, ijlu, uptr);
120
121 if ( status < 0 ) {
122 printf("### ERROR: ILU factorization failed! [%s]\n", __FUNCTION__);
123 status = ERROR_SOLVER_ILUSETUP;
124 goto FINISHED;
125 }
126
127 //nwork = 6*nzlu*nb;
128 nwork = 20*A->ROW*A->nb;
129 iludata->nzlu = nzlu;
130 iludata->nwork = nwork;
131 iludata->ijlu = (INT*)fasp_mem_calloc(nzlu, sizeof(INT));
132
133 memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));
134 iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));
135 // Check: Is the work space too large? --Xiaozhe
136
137#if DEBUG_MODE > 1
138 printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
139 printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
140#endif
141
142 if ( iwk < nzlu ) {
143 printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
144 status = ERROR_SOLVER_ILUSETUP;
145 goto FINISHED;
146 }
147
148 if ( prtlvl > PRINT_NONE ) {
149 fasp_gettime(&setup_end);
150 setup_duration = setup_end - setup_start;
151 printf("BSR ILU(%d)-seq setup costs %f seconds.\n", lfil, setup_duration);
152 }
153
154FINISHED:
155 fasp_mem_free(ijlu); ijlu = NULL;
156 fasp_mem_free(uptr); uptr = NULL;
157
158#if DEBUG_MODE > 0
159 printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);
160#endif
161
162 return status;
163}
164
188 ILU_data *iludata,
189 ILU_param *iluparam,
190 INT step)
191{
192
193 const SHORT prtlvl = iluparam->print_level;
194 const INT n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;
195
196 // local variables
197 INT lfil = iluparam->ILU_lfil;
198 static INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
199 SHORT status = FASP_SUCCESS;
200
201 REAL setup_start, setup_end, setup_duration;
202
203#if DEBUG_MODE > 0
204 printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
205 printf("### DEBUG: m = %d, n = %d, nnz = %d\n", A->ROW, n, nnz);
206#endif
207
208 fasp_gettime(&setup_start);
209
210 if (step==1) {
211 // Expected amount of memory for ILU needed and allocate memory
212 iwk = (lfil+2)*nnz;
213
214 #if DEBUG_MODE > 0
215 if (iluparam->ILU_type == ILUtp) {
216 printf("### WARNING: iludata->type = %d not supported!\n",
217 iluparam->ILU_type);
218 }
219 #endif
220
221 // setup preconditioner
222 iludata->type = 0; // Must be initialized
223 iludata->iperm = NULL;
224 iludata->A = NULL; // No need for BSR matrix
225 iludata->row = iludata->col = n;
226 iludata->nb = nb;
227 iludata->ilevL = iludata->jlevL = NULL;
228 iludata->ilevU = iludata->jlevU = NULL;
229
230 ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));
231
232 if (uptr != NULL) fasp_mem_free(uptr);
233 uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));
234
235 #if DEBUG_MODE > 1
236 printf("### DEBUG: symbolic factorization ... \n");
237 #endif
238
239 // ILU decomposition
240 // (1) symbolic factoration
241 fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);
242
243 iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));
244
245
246 #if DEBUG_MODE > 1
247 printf("### DEBUG: numerical factorization ... \n");
248 #endif
249
250 //nwork = 6*nzlu*nb;
251 nwork = 5*A->ROW*A->nb;
252 iludata->nwork = nwork;
253 iludata->nzlu = nzlu;
254 iludata->ijlu = (INT*)fasp_mem_calloc(nzlu, sizeof(INT));
255
256 memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));
257 fasp_mem_free(ijlu); ijlu = NULL;
258
259 iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));
260 // Check: Is the work space too large? --Xiaozhe
261
262 #if DEBUG_MODE > 1
263 printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
264 printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
265 #endif
266
267 if ( ierr != 0 ) {
268 printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
269 status = ERROR_SOLVER_ILUSETUP;
270 goto FINISHED;
271 }
272
273 if ( iwk < nzlu ) {
274 printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
275 status = ERROR_SOLVER_ILUSETUP;
276 goto FINISHED;
277 }
278 }
279 else if (step==2) {
280 // (2) numerical factoration
281 numfactor(A, iludata->luval, iludata->ijlu, uptr);
282
283 } else {
284
285FINISHED:
286 fasp_mem_free(uptr); uptr = NULL;
287 }
288
289 if ( prtlvl > PRINT_NONE ) {
290 fasp_gettime(&setup_end);
291 setup_duration = setup_end - setup_start;
292 printf("BSR ILU(%d) setup costs %f seconds.\n", lfil, setup_duration);
293 }
294
295#if DEBUG_MODE > 0
296 printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);
297#endif
298
299 return status;
300}
301
321 ILU_data *iludata,
322 ILU_param *iluparam)
323{
324
325 const SHORT prtlvl = iluparam->print_level;
326 const INT n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;
327
328 // local variables
329 INT lfil = iluparam->ILU_lfil;
330 INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
331 SHORT status = FASP_SUCCESS;
332
333 REAL setup_start, setup_end, setup_duration;
334 REAL symbolic_start, symbolic_end, numfac_start, numfac_end;
335
336#if DEBUG_MODE > 0
337 printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
338 printf("### DEBUG: m = %d, n = %d, nnz = %d\n", A->ROW, n, nnz);
339#endif
340
341 fasp_gettime(&setup_start);
342
343 // Expected amount of memory for ILU needed and allocate memory
344 iwk = (lfil+2)*nnz;
345
346#if DEBUG_MODE > 0
347 if (iluparam->ILU_type == ILUtp) {
348 printf("### WARNING: iludata->type = %d not supported any more!\n",
349 iluparam->ILU_type);
350 }
351#endif
352
353 // setup preconditioner
354 iludata->type = 0; // Must be initialized
355 iludata->iperm = NULL;
356 iludata->A = NULL; // No need for BSR matrix
357 iludata->row = iludata->col = n;
358 iludata->nb = nb;
359
360 ijlu = (INT *) fasp_mem_calloc(iwk, sizeof(INT));
361 uptr = (INT *) fasp_mem_calloc(A->ROW,sizeof(INT));
362
363#if DEBUG_MODE > 1
364 printf("### DEBUG: symbolic factorization ... \n");
365#endif
366
367 // ILU decomposition
368 // (1) symbolic factoration
369 fasp_gettime(&symbolic_start);
370
371 fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);
372
373 fasp_gettime(&symbolic_end);
374
375#if prtlvl > PRINT_MIN
376 printf("ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);
377#endif
378
379 nwork = 5*A->ROW*A->nb;
380 iludata->nzlu = nzlu;
381 iludata->nwork = nwork;
382 iludata->ijlu = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));
383 iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));
384 iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));
385 memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));
386 fasp_darray_set(nzlu*nb2, iludata->luval, 0.0);
387
388#if DEBUG_MODE > 1
389 printf("### DEBUG: numerical factorization ... \n");
390#endif
391
392 // (2) numerical factoration
393 fasp_gettime(&numfac_start);
394
395 numfactor_mulcol(A, iludata->luval, ijlu, uptr, iludata->nlevL,
396 iludata->ilevL, iludata->jlevL);
397
398 fasp_gettime(&numfac_end);
399
400#if prtlvl > PRINT_MIN
401 printf("ILU numerical factorization time = %f\n", numfac_end-numfac_start);
402#endif
403
404#if DEBUG_MODE > 1
405 printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
406 printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
407#endif
408
409 if ( ierr != 0 ) {
410 printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
411 status = ERROR_SOLVER_ILUSETUP;
412 goto FINISHED;
413 }
414
415 if ( iwk < nzlu ) {
416 printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
417 status = ERROR_SOLVER_ILUSETUP;
418 goto FINISHED;
419 }
420
421 if ( prtlvl > PRINT_NONE ) {
422 fasp_gettime(&setup_end);
423 setup_duration = setup_end - setup_start;
424 printf("BSR ILU(%d)-mc setup costs %f seconds.\n", lfil, setup_duration);
425 }
426
427FINISHED:
428 fasp_mem_free(ijlu); ijlu = NULL;
429 fasp_mem_free(uptr); uptr = NULL;
430
431#if DEBUG_MODE > 0
432 printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);
433#endif
434
435 return status;
436}
437
457 ILU_data *iludata,
458 ILU_param *iluparam)
459{
460 const SHORT prtlvl = iluparam->print_level;
461 const INT n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;
462
463 // local variables
464 INT lfil = iluparam->ILU_lfil;
465 INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
466 SHORT status = FASP_SUCCESS;
467
468 REAL setup_start, setup_end, setup_duration;
469 REAL symbolic_start, symbolic_end, numfac_start, numfac_end;
470
471#if DEBUG_MODE > 0
472 printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
473 printf("### DEBUG: m=%d, n=%d, nnz=%d\n", A->ROW, n, nnz);
474#endif
475
476 fasp_gettime(&setup_start);
477
478 // Expected amount of memory for ILU needed and allocate memory
479 iwk = (lfil+2)*nnz;
480
481#if DEBUG_MODE > 0
482 if (iluparam->ILU_type == ILUtp) {
483 printf("### WARNING: iludata->type = %d not supported!\n",
484 iluparam->ILU_type);
485 }
486#endif
487
488 // setup preconditioner
489 iludata->type = 0; // Must be initialized
490 iludata->iperm = NULL;
491 iludata->A = NULL; // No need for BSR matrix
492 iludata->row = iludata->col=n;
493 iludata->nb = nb;
494
495 ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));
496 uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));
497
498#if DEBUG_MODE > 1
499 printf("### DEBUG: symbolic factorization ... \n");
500#endif
501
502 fasp_gettime(&symbolic_start);
503
504 // ILU decomposition
505 // (1) symbolic factoration
506 fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);
507
508 fasp_gettime(&symbolic_end);
509
510#if prtlvl > PRINT_MIN
511 printf("ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);
512#endif
513
514 nwork = 5*A->ROW*A->nb;
515 iludata->nzlu = nzlu;
516 iludata->nwork = nwork;
517 iludata->ijlu = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));
518 iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));
519 iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));
520 memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));
521 fasp_darray_set(nzlu*nb2, iludata->luval, 0.0);
522 iludata->uptr = NULL; iludata->ic = NULL; iludata->icmap = NULL;
523
524 topologic_sort_ILU(iludata);
525
526#if DEBUG_MODE > 1
527 printf("### DEBUG: numerical factorization ... \n");
528#endif
529
530 fasp_gettime(&numfac_start);
531
532 // (2) numerical factoration
533 numfactor_levsch(A, iludata->luval, ijlu, uptr, iludata->nlevL,
534 iludata->ilevL, iludata->jlevL);
535
536 fasp_gettime(&numfac_end);
537
538#if prtlvl > PRINT_MIN
539 printf("ILU numerical factorization time = %f\n", numfac_end-numfac_start);
540#endif
541
542#if DEBUG_MODE > 1
543 printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
544 printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
545#endif
546
547 if ( ierr != 0 ) {
548 printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
549 status = ERROR_SOLVER_ILUSETUP;
550 goto FINISHED;
551 }
552
553 if ( iwk < nzlu ) {
554 printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
555 status = ERROR_SOLVER_ILUSETUP;
556 goto FINISHED;
557 }
558
559 if ( prtlvl > PRINT_NONE ) {
560 fasp_gettime(&setup_end);
561 setup_duration = setup_end - setup_start;
562 printf("BSR ILU(%d)-ls setup costs %f seconds.\n", lfil, setup_duration);
563 }
564
565FINISHED:
566 fasp_mem_free(ijlu); ijlu = NULL;
567 fasp_mem_free(uptr); uptr = NULL;
568
569#if DEBUG_MODE > 0
570 printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);
571#endif
572
573 return status;
574}
575
598 ILU_data *iludata,
599 ILU_param *iluparam,
600 INT step)
601{
602 const SHORT prtlvl = iluparam->print_level;
603 const INT n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;
604
605 // local variables
606 INT lfil = iluparam->ILU_lfil;
607 static INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
608 SHORT status = FASP_SUCCESS;
609
610 REAL setup_start, setup_end, setup_duration;
611 REAL symbolic_start, symbolic_end, numfac_start, numfac_end;
612
613#if DEBUG_MODE > 0
614 printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
615 printf("### DEBUG: m=%d, n=%d, nnz=%d\n", A->ROW, n, nnz);
616 printf("### DEBUG: step=%d(1: symbolic factoration, 2: numerical factoration)\n", step);// zhaoli 2021.03.24
617#endif
618
619 fasp_gettime(&setup_start);
620 if (step==1) {
621 // Expected amount of memory for ILU needed and allocate memory
622 iwk = (lfil+2)*nnz;
623
624 #if DEBUG_MODE > 0
625 if (iluparam->ILU_type == ILUtp) {
626 printf("### WARNING: iludata->type = %d not supported!\n",
627 iluparam->ILU_type);
628 }
629 #endif
630
631 // setup preconditioner
632 iludata->type = 0; // Must be initialized
633 iludata->iperm = NULL;
634 iludata->A = NULL; // No need for BSR matrix
635 iludata->row = iludata->col=n;
636 iludata->nb = nb;
637
638 fasp_mem_free(ijlu);
639 ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));
640
641 fasp_mem_free(uptr);
642 uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));
643
644 #if DEBUG_MODE > 1
645 printf("### DEBUG: symbolic factorization ... \n");
646 #endif
647
648 fasp_gettime(&symbolic_start);
649
650 // ILU decomposition
651 // (1) symbolic factoration
652 fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);
653
654 fasp_gettime(&symbolic_end);
655
656 #if prtlvl > PRINT_MIN
657 printf("ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);
658 #endif
659
660 nwork = 5*A->ROW*A->nb;
661 iludata->nzlu = nzlu;
662 iludata->nwork = nwork;
663 iludata->ijlu = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));
664 iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));
665 iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));
666 memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));
667 fasp_mem_free(ijlu); ijlu = NULL;
668
669 fasp_darray_set(nzlu*nb2, iludata->luval, 0.0);
670 iludata->uptr = NULL; iludata->ic = NULL; iludata->icmap = NULL;
671
672 topologic_sort_ILU(iludata);
673 #if DEBUG_MODE > 1
674 printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
675 printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);
676 #endif
677
678 if ( ierr != 0 ) {
679 printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);
680 status = ERROR_SOLVER_ILUSETUP;
681 goto FINISHED;
682 }
683
684 if ( iwk < nzlu ) {
685 printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);
686 status = ERROR_SOLVER_ILUSETUP;
687 goto FINISHED;
688 }
689 } else if (step==2) {
690
691#if DEBUG_MODE > 1
692 printf("### DEBUG: numerical factorization ... \n");
693#endif
694
695 fasp_gettime(&numfac_start);
696
697 // (2) numerical factoration
698 numfactor_levsch(A, iludata->luval, iludata->ijlu, uptr, iludata->nlevL,
699 iludata->ilevL, iludata->jlevL);
700 fasp_gettime(&numfac_end);
701
702#if prtlvl > PRINT_MIN
703 printf("ILU numerical factorization time = %f\n", numfac_end-numfac_start);
704#endif
705 } else {
706
707FINISHED:
708// fasp_mem_free(ijlu); ijlu = NULL;
709 fasp_mem_free(uptr); uptr = NULL;
710 }
711
712 if ( prtlvl > PRINT_NONE ) {
713 fasp_gettime(&setup_end);
714 setup_duration = setup_end - setup_start;
715 printf("BSR ILU(%d)-ls setup costs %f seconds.\n", lfil, setup_duration);
716 }
717
718#if DEBUG_MODE > 0
719 printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);
720#endif
721
722
723 return status;
724}
725
746 dCSRmat *Ap,
747 ILU_data *iludata,
748 ILU_param *iluparam)
749{
750 INT status;
752 dCSRmat pp, Ap1;
753 dBSRmat A_LU;
754
755 if (iluparam->ILU_lfil==0) { //for ILU0
756 mgl[0].A = fasp_dcsr_sympart(Ap);
757 }
758 else if (iluparam->ILU_lfil==1) { // for ILU1
759 Ap1 = fasp_dcsr_create(Ap->row,Ap->col, Ap->nnz);
760 fasp_dcsr_cp(Ap, &Ap1);
761 fasp_blas_dcsr_mxm (Ap,&Ap1,&pp);
762 mgl[0].A = fasp_dcsr_sympart(&pp);
763 fasp_dcsr_free(&Ap1);
764 fasp_dcsr_free(&pp);
765 }
766
767 mgl->num_levels = 20;
768
769 mulcol_independ_set(mgl, 1);
770
771 A_LU = fasp_dbsr_perm(A, mgl[0].icmap);
772
773 // hold color info with nlevl, ilevL and jlevL.
774 iludata->nlevL = mgl[0].colors;
775 iludata->ilevL = mgl[0].ic;
776 iludata->jlevL = mgl[0].icmap;
777 iludata->nlevU = 0;
778 iludata->ilevU = NULL;
779 iludata->jlevU = NULL;
780 iludata->A = NULL; // No need for BSR matrix
781
782#if DEBUG_MODE > 0
783 if (iluparam->ILU_type == ILUtp) {
784 printf("### WARNING: iludata->type = %d not supported!\n",
785 iluparam->ILU_type);
786 }
787#endif
788
789 // setup preconditioner
790 iludata->type = 0; // Must be initialized
791 iludata->iperm = NULL;
792
793 status = fasp_ilu_dbsr_setup_omp(&A_LU,iludata,iluparam);
794
795 fasp_dcsr_free(&mgl[0].A);
796 fasp_dbsr_free(&A_LU);
797
798 return status;
799}
800
801/*---------------------------------*/
802/*-- Private Functions --*/
803/*---------------------------------*/
804
819static INT numfactor (dBSRmat *A,
820 REAL *luval,
821 INT *jlu,
822 INT *uptr)
823{
824 INT n=A->ROW,nb=A->nb, nb2=nb*nb, ib, ibstart,ibstart1;
825 INT k, indj, inds, indja,jluj, jlus, ijaj;
826 REAL *mult,*mult1;
827 INT *colptrs;
828 INT status=FASP_SUCCESS;
829
830 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
831 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
832 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
833
848 //for (k=0;k<n;k++) colptrs[k]=0;
849 memset(colptrs, 0, sizeof(INT)*n);
850
851 switch (nb) {
852
853 case 1:
854
855 for (k = 0; k < n; ++k) {
856
857 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
858 colptrs[jlu[indj]] = indj;
859 ibstart=indj*nb2;
860 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
861 }
862
863 colptrs[k] = k;
864
865 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
866 ijaj = A->JA[indja];
867 ibstart=colptrs[ijaj]*nb2;
868 ibstart1=indja*nb2;
869 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
870 }
871
872 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
873
874 jluj = jlu[indj];
875
876 luval[indj] = luval[indj]*luval[jluj];
877 mult[0] = luval[indj];
878
879 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
880 jlus = jlu[inds];
881 if (colptrs[jlus] != 0)
882 luval[colptrs[jlus]] = luval[colptrs[jlus]] - mult[0]*luval[inds];
883 }
884
885 }
886
887 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
888
889 colptrs[k] = 0;
890 luval[k] = 1.0/luval[k];
891 }
892
893 break;
894
895 case 3:
896
897 for (k = 0; k < n; ++k) {
898
899 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
900 colptrs[jlu[indj]] = indj;
901 ibstart=indj*nb2;
902 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
903 }
904
905 colptrs[k] = k;
906
907 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
908 ijaj = A->JA[indja];
909 ibstart=colptrs[ijaj]*nb2;
910 ibstart1=indja*nb2;
911 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
912 }
913
914 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
915 jluj = jlu[indj];
916
917 ibstart=indj*nb2;
918 fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
919 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
920
921 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
922 jlus = jlu[inds];
923 if (colptrs[jlus] != 0) {
924 fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);
925 ibstart=colptrs[jlus]*nb2;
926 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
927 }
928 }
929
930 }
931
932 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
933
934 colptrs[k] = 0;
935
936 fasp_smat_inv_nc3(&(luval[k*nb2]));
937 }
938
939 break;
940
941 case -5:
942
943 for (k = 0; k < n; ++k) {
944
945 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
946 colptrs[jlu[indj]] = indj;
947 ibstart=indj*nb2;
948 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
949 }
950
951 colptrs[k] = k;
952
953 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
954 ijaj = A->JA[indja];
955 ibstart=colptrs[ijaj]*nb2;
956 ibstart1=indja*nb2;
957 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
958 }
959
960 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
961 jluj = jlu[indj];
962
963 ibstart=indj*nb2;
964 fasp_blas_smat_mul_nc5(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
965 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
966
967 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
968 jlus = jlu[inds];
969 if (colptrs[jlus] != 0) {
970 fasp_blas_smat_mul_nc5(mult,&(luval[inds*nb2]),mult1);
971 ibstart=colptrs[jlus]*nb2;
972 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
973 }
974 }
975
976 }
977
978 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
979
980 colptrs[k] = 0;
981
982 // fasp_smat_inv_nc5(&(luval[k*nb2])); // not numerically stable --zcs 04/26/2021
983 status = fasp_smat_invp_nc(&(luval[k*nb2]), 5);
984 }
985
986 break;
987
988 case -7:
989
990 for (k = 0; k < n; ++k) {
991
992 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
993 colptrs[jlu[indj]] = indj;
994 ibstart=indj*nb2;
995 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
996 }
997
998 colptrs[k] = k;
999
1000 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1001 ijaj = A->JA[indja];
1002 ibstart=colptrs[ijaj]*nb2;
1003 ibstart1=indja*nb2;
1004 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1005 }
1006
1007 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1008 jluj = jlu[indj];
1009
1010 ibstart=indj*nb2;
1011 fasp_blas_smat_mul_nc7(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1012 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1013
1014 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1015 jlus = jlu[inds];
1016 if (colptrs[jlus] != 0) {
1017 fasp_blas_smat_mul_nc7(mult,&(luval[inds*nb2]),mult1);
1018 ibstart=colptrs[jlus]*nb2;
1019 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1020 }
1021 }
1022
1023 }
1024
1025 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1026
1027 colptrs[k] = 0;
1028
1029 // fasp_smat_inv(&(luval[k*nb2]),nb); // not numerically stable --zcs 04/26/2021
1030 status = fasp_smat_invp_nc(&(luval[k*nb2]), nb);
1031 }
1032
1033 break;
1034
1035 default:
1036
1037 for (k=0;k<n;k++) {
1038
1039 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1040 colptrs[jlu[indj]] = indj;
1041 ibstart=indj*nb2;
1042 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1043 }
1044
1045 colptrs[k] = k;
1046
1047 for (indja = A->IA[k]; indja < A->IA[k+1]; indja++) {
1048 ijaj = A->JA[indja];
1049 ibstart=colptrs[ijaj]*nb2;
1050 ibstart1=indja*nb2;
1051 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1052 }
1053
1054 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1055 jluj = jlu[indj];
1056
1057 ibstart=indj*nb2;
1058 fasp_blas_smat_mul(&(luval[ibstart]),&(luval[jluj*nb2]),mult,nb);
1059 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1060
1061 for (inds = uptr[jluj]; inds < jlu[jluj+1]; inds++) {
1062 jlus = jlu[inds];
1063 if (colptrs[jlus] != 0) {
1064 fasp_blas_smat_mul(mult,&(luval[inds*nb2]),mult1,nb);
1065 ibstart=colptrs[jlus]*nb2;
1066 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1067 }
1068 }
1069
1070 }
1071
1072 for (indj = jlu[k]; indj < jlu[k+1]; ++indj)
1073 colptrs[jlu[indj]] = 0;
1074
1075 colptrs[k] = 0;
1076
1077 //fasp_smat_inv(&(luval[k*nb2]),nb); // not numerically stable --zcs 04/26/2021
1078 status = fasp_smat_invp_nc(&(luval[k * nb2]), nb);
1079 }
1080 }
1081
1082 fasp_mem_free(colptrs); colptrs = NULL;
1083 fasp_mem_free(mult); mult = NULL;
1084 fasp_mem_free(mult1); mult1 = NULL;
1085
1086 return status;
1087}
1088
1107static INT numfactor_mulcol (dBSRmat *A,
1108 REAL *luval,
1109 INT *jlu,
1110 INT *uptr,
1111 INT ncolors,
1112 INT *ic,
1113 INT *icmap)
1114{
1115 INT status = FASP_SUCCESS;
1116
1117#ifdef _OPENMP
1118 INT n = A->ROW, nb = A->nb, nb2 = nb*nb;
1119 INT ib, ibstart,ibstart1;
1120 INT k, i, indj, inds, indja,jluj, jlus, ijaj, tmp;
1121 REAL *mult, *mult1;
1122 INT *colptrs;
1123
1138 switch (nb) {
1139
1140 case 1:
1141 for (i = 0; i < ncolors; ++i) {
1142#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,colptrs,tmp)
1143 {
1144 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1145 memset(colptrs, 0, sizeof(INT)*n);
1146#pragma omp for
1147 for (k = ic[i]; k < ic[i+1]; ++k) {
1148 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1149 colptrs[jlu[indj]] = indj;
1150 ibstart=indj*nb2;
1151 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1152 }
1153 colptrs[k] = k;
1154 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1155 ijaj = A->JA[indja];
1156 ibstart=colptrs[ijaj]*nb2;
1157 ibstart1=indja*nb2;
1158 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1159 }
1160 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1161 jluj = jlu[indj];
1162 luval[indj] = luval[indj]*luval[jluj];
1163 tmp = luval[indj];
1164 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1165 jlus = jlu[inds];
1166 if (colptrs[jlus] != 0)
1167 luval[colptrs[jlus]] = luval[colptrs[jlus]] - tmp*luval[inds];
1168 }
1169
1170 }
1171 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1172 colptrs[k] = 0;
1173 luval[k] = 1.0/luval[k];
1174 }
1175 fasp_mem_free(colptrs); colptrs = NULL;
1176 }
1177 }
1178
1179 break;
1180
1181 case 2:
1182
1183 for (i = 0; i < ncolors; ++i) {
1184#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs)
1185 {
1186 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1187 memset(colptrs, 0, sizeof(INT)*n);
1188 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1189 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1190#pragma omp for
1191 for (k = ic[i]; k < ic[i+1]; ++k) {
1192 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1193 colptrs[jlu[indj]] = indj;
1194 ibstart=indj*nb2;
1195 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1196 }
1197 colptrs[k] = k;
1198 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1199 ijaj = A->JA[indja];
1200 ibstart=colptrs[ijaj]*nb2;
1201 ibstart1=indja*nb2;
1202 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1203 }
1204 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1205 jluj = jlu[indj];
1206 ibstart=indj*nb2;
1207 fasp_blas_smat_mul_nc2(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1208 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1209 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1210 jlus = jlu[inds];
1211 if (colptrs[jlus] != 0) {
1212 fasp_blas_smat_mul_nc2(mult,&(luval[inds*nb2]),mult1);
1213 ibstart=colptrs[jlus]*nb2;
1214 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1215 }
1216 }
1217 }
1218 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1219 colptrs[k] = 0;
1220 fasp_smat_inv_nc2(&(luval[k*nb2]));
1221 }
1222 fasp_mem_free(colptrs); colptrs = NULL;
1223 fasp_mem_free(mult); mult = NULL;
1224 fasp_mem_free(mult1); mult1 = NULL;
1225 }
1226 }
1227 break;
1228
1229 case 3:
1230
1231 for (i = 0; i < ncolors; ++i) {
1232#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs)
1233 {
1234 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1235 memset(colptrs, 0, sizeof(INT)*n);
1236 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1237 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1238#pragma omp for
1239 for (k = ic[i]; k < ic[i+1]; ++k) {
1240 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1241 colptrs[jlu[indj]] = indj;
1242 ibstart=indj*nb2;
1243 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1244 }
1245 colptrs[k] = k;
1246 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1247 ijaj = A->JA[indja];
1248 ibstart=colptrs[ijaj]*nb2;
1249 ibstart1=indja*nb2;
1250 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1251 }
1252 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1253 jluj = jlu[indj];
1254 ibstart=indj*nb2;
1255 fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1256 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1257 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1258 jlus = jlu[inds];
1259 if (colptrs[jlus] != 0) {
1260 fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);
1261 ibstart=colptrs[jlus]*nb2;
1262 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1263 }
1264 }
1265 }
1266 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1267 colptrs[k] = 0;
1268 fasp_smat_inv_nc3(&(luval[k*nb2]));
1269 }
1270 fasp_mem_free(colptrs); colptrs = NULL;
1271 fasp_mem_free(mult); mult = NULL;
1272 fasp_mem_free(mult1); mult1 = NULL;
1273 }
1274 }
1275 break;
1276
1277 default:
1278 {
1279 if (nb > 3) printf("Multi-thread ILU numerical decomposition for %d\
1280 components has not been implemented!!!", nb);
1281 exit(0);
1282 }
1283 }
1284
1285#endif
1286
1287 return status;
1288}
1289
1309static INT numfactor_levsch (dBSRmat *A,
1310 REAL *luval,
1311 INT *jlu,
1312 INT *uptr,
1313 INT ncolors,
1314 INT *ic,
1315 INT *icmap)
1316{
1317 INT status = FASP_SUCCESS;
1318
1319#ifdef _OPENMP
1320 INT n = A->ROW, nb = A->nb, nb2 = nb*nb;
1321 INT ib, ibstart,ibstart1;
1322 INT k, i, indj, inds, indja, jluj, jlus, ijaj, tmp, ii;
1323 REAL *mult, *mult1;
1324 INT *colptrs;
1325
1340 switch (nb) {
1341
1342 case 1:
1343 for (i = 0; i < ncolors; ++i) {
1344#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,colptrs,tmp)
1345 {
1346 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1347 memset(colptrs, 0, sizeof(INT)*n);
1348#pragma omp for
1349 for (k = ic[i]; k < ic[i+1]; ++k) {
1350 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1351 colptrs[jlu[indj]] = indj;
1352 ibstart=indj*nb2;
1353 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1354 }
1355 colptrs[k] = k;
1356 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1357 ijaj = A->JA[indja];
1358 ibstart=colptrs[ijaj]*nb2;
1359 ibstart1=indja*nb2;
1360 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1361 }
1362 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1363 jluj = jlu[indj];
1364 luval[indj] = luval[indj]*luval[jluj];
1365 tmp = luval[indj];
1366 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1367 jlus = jlu[inds];
1368 if (colptrs[jlus] != 0)
1369 luval[colptrs[jlus]] = luval[colptrs[jlus]] - tmp*luval[inds];
1370 }
1371
1372 }
1373 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1374 colptrs[k] = 0;
1375 luval[k] = 1.0/luval[k];
1376 }
1377 fasp_mem_free(colptrs); colptrs = NULL;
1378 }
1379 }
1380
1381 break;
1382 case 2:
1383
1384 for (i = 0; i < ncolors; ++i) {
1385#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1386 {
1387 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1388 memset(colptrs, 0, sizeof(INT)*n);
1389 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1390 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1391#pragma omp for
1392 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1393 k = icmap[ii];
1394 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1395 colptrs[jlu[indj]] = indj;
1396 ibstart=indj*nb2;
1397 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1398 }
1399 colptrs[k] = k;
1400 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1401 ijaj = A->JA[indja];
1402 ibstart=colptrs[ijaj]*nb2;
1403 ibstart1=indja*nb2;
1404 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1405 }
1406 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1407 jluj = jlu[indj];
1408 ibstart=indj*nb2;
1409 fasp_blas_smat_mul_nc2(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1410 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1411 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1412 jlus = jlu[inds];
1413 if (colptrs[jlus] != 0) {
1414 fasp_blas_smat_mul_nc2(mult,&(luval[inds*nb2]),mult1);
1415 ibstart=colptrs[jlus]*nb2;
1416 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1417 }
1418 }
1419 }
1420 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1421 colptrs[k] = 0;
1422 fasp_smat_inv_nc2(&(luval[k*nb2]));
1423 }
1424 fasp_mem_free(colptrs); colptrs = NULL;
1425 fasp_mem_free(mult); mult = NULL;
1426 fasp_mem_free(mult1); mult1 = NULL;
1427 }
1428 }
1429 break;
1430
1431 case 3:
1432
1433 for (i = 0; i < ncolors; ++i) {
1434#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1435 {
1436 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1437 memset(colptrs, 0, sizeof(INT)*n);
1438 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1439 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1440#pragma omp for
1441 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1442 k = icmap[ii];
1443 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1444 colptrs[jlu[indj]] = indj;
1445 ibstart=indj*nb2;
1446 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1447 }
1448 colptrs[k] = k;
1449 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1450 ijaj = A->JA[indja];
1451 ibstart=colptrs[ijaj]*nb2;
1452 ibstart1=indja*nb2;
1453 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1454 }
1455 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1456 jluj = jlu[indj];
1457 ibstart=indj*nb2;
1458 fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1459 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1460 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1461 jlus = jlu[inds];
1462 if (colptrs[jlus] != 0) {
1463 fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);
1464 ibstart=colptrs[jlus]*nb2;
1465 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1466 }
1467 }
1468 }
1469 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1470 colptrs[k] = 0;
1471 fasp_smat_inv_nc3(&(luval[k*nb2]));
1472 }
1473 fasp_mem_free(colptrs); colptrs = NULL;
1474 fasp_mem_free(mult); mult = NULL;
1475 fasp_mem_free(mult1); mult1 = NULL;
1476 }
1477 }
1478 break;
1479
1480 case 4:
1481
1482 for (i = 0; i < ncolors; ++i) {
1483#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1484 {
1485 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1486 memset(colptrs, 0, sizeof(INT)*n);
1487 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1488 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1489#pragma omp for
1490 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1491 k = icmap[ii];
1492 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1493 colptrs[jlu[indj]] = indj;
1494 ibstart=indj*nb2;
1495 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1496 }
1497 colptrs[k] = k;
1498 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1499 ijaj = A->JA[indja];
1500 ibstart=colptrs[ijaj]*nb2;
1501 ibstart1=indja*nb2;
1502 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1503 }
1504 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1505 jluj = jlu[indj];
1506 ibstart=indj*nb2;
1507 fasp_blas_smat_mul_nc4(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1508 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1509 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1510 jlus = jlu[inds];
1511 if (colptrs[jlus] != 0) {
1512 fasp_blas_smat_mul_nc4(mult,&(luval[inds*nb2]),mult1);
1513 ibstart=colptrs[jlus]*nb2;
1514 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1515 }
1516 }
1517 }
1518 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1519 colptrs[k] = 0;
1520 fasp_smat_inv_nc4(&(luval[k*nb2]));
1521 }
1522 fasp_mem_free(colptrs); colptrs = NULL;
1523 fasp_mem_free(mult); mult = NULL;
1524 fasp_mem_free(mult1); mult1 = NULL;
1525 }
1526 }
1527 break;
1528
1529 case 5:
1530
1531 for (i = 0; i < ncolors; ++i) {
1532#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1533 {
1534 colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
1535 memset(colptrs, 0, sizeof(INT)*n);
1536 mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1537 mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
1538#pragma omp for
1539 for (ii = ic[i]; ii < ic[i+1]; ++ii) {
1540 k = icmap[ii];
1541 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
1542 colptrs[jlu[indj]] = indj;
1543 ibstart=indj*nb2;
1544 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
1545 }
1546 colptrs[k] = k;
1547 for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
1548 ijaj = A->JA[indja];
1549 ibstart=colptrs[ijaj]*nb2;
1550 ibstart1=indja*nb2;
1551 for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
1552 }
1553 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1554 jluj = jlu[indj];
1555 ibstart=indj*nb2;
1556 fasp_blas_smat_mul_nc5(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
1557 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
1558 for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
1559 jlus = jlu[inds];
1560 if (colptrs[jlus] != 0) {
1561 fasp_blas_smat_mul_nc5(mult,&(luval[inds*nb2]),mult1);
1562 ibstart=colptrs[jlus]*nb2;
1563 for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
1564 }
1565 }
1566 }
1567 for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
1568 colptrs[k] = 0;
1569 fasp_smat_inv_nc5(&(luval[k*nb2]));
1570 }
1571 fasp_mem_free(colptrs); colptrs = NULL;
1572 fasp_mem_free(mult); mult = NULL;
1573 fasp_mem_free(mult1); mult1 = NULL;
1574 }
1575 }
1576 break;
1577
1578 default:
1579 for (i = 0; i < ncolors; ++i) {
1580#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)
1581 {
1582 colptrs = (INT*)fasp_mem_calloc(n, sizeof(INT));
1583 memset(colptrs, 0, sizeof(INT) * n);
1584 mult = (REAL*)fasp_mem_calloc(nb2, sizeof(REAL));
1585 mult1 = (REAL*)fasp_mem_calloc(nb2, sizeof(REAL));
1586#pragma omp for
1587 for (ii = ic[i]; ii < ic[i + 1]; ++ii) {
1588 k = icmap[ii];
1589 for (indj = jlu[k]; indj < jlu[k + 1]; ++indj) {
1590 colptrs[jlu[indj]] = indj;
1591 ibstart = indj * nb2;
1592 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = 0;
1593 }
1594 colptrs[k] = k;
1595 for (indja = A->IA[k]; indja < A->IA[k + 1]; ++indja) {
1596 ijaj = A->JA[indja];
1597 ibstart = colptrs[ijaj] * nb2;
1598 ibstart1 = indja * nb2;
1599 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = A->val[ibstart1 + ib];
1600 }
1601 for (indj = jlu[k]; indj < uptr[k]; ++indj) {
1602 jluj = jlu[indj];
1603 ibstart = indj * nb2;
1604 fasp_blas_smat_mul(&(luval[ibstart]), &(luval[jluj * nb2]), mult, nb);
1605 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = mult[ib];
1606 for (inds = uptr[jluj]; inds < jlu[jluj + 1]; ++inds) {
1607 jlus = jlu[inds];
1608 if (colptrs[jlus] != 0) {
1609 fasp_blas_smat_mul(mult, &(luval[inds * nb2]), mult1, nb);
1610 ibstart = colptrs[jlus] * nb2;
1611 for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] -= mult1[ib];
1612 }
1613 }
1614 }
1615 for (indj = jlu[k]; indj < jlu[k + 1]; ++indj) colptrs[jlu[indj]] = 0;
1616 colptrs[k] = 0;
1617 fasp_smat_invp_nc(&(luval[k * nb2]), nb);
1618 }
1619 fasp_mem_free(colptrs); colptrs = NULL;
1620 fasp_mem_free(mult); mult = NULL;
1621 fasp_mem_free(mult1); mult1 = NULL;
1622 }
1623 }
1624 //if (nb > 5) printf("Multi-thread ILU numerical decomposition for %d components has not been implemented!!!\n", nb);
1625 //exit(0);
1626 break;
1627 }
1628
1629#endif
1630
1631 return status;
1632}
1633
1646static void generate_S_theta (dCSRmat *A,
1647 iCSRmat *S,
1648 REAL theta)
1649{
1650 const INT row=A->row, col=A->col;
1651 const INT row_plus_one = row+1;
1652 const INT nnz=A->IA[row]-A->IA[0];
1653
1654 INT index, i, j, begin_row, end_row;
1655 INT *ia=A->IA, *ja=A->JA;
1656 REAL *aj=A->val;
1657
1658 // get the diagnal entry of A
1659 //dvector diag; fasp_dcsr_getdiag(0, A, &diag);
1660
1661 /* generate S */
1662 REAL row_abs_sum;
1663
1664 // copy the structure of A to S
1665 S->row=row; S->col=col; S->nnz=nnz; S->val=NULL;
1666
1667 S->IA=(INT*)fasp_mem_calloc(row_plus_one, sizeof(INT));
1668
1669 S->JA=(INT*)fasp_mem_calloc(nnz, sizeof(INT));
1670
1671 fasp_iarray_cp(row_plus_one, ia, S->IA);
1672 fasp_iarray_cp(nnz, ja, S->JA);
1673
1674 for (i=0;i<row;++i) {
1675 /* compute scaling factor and row sum */
1676 row_abs_sum=0;
1677
1678 begin_row=ia[i]; end_row=ia[i+1];
1679
1680 for (j=begin_row;j<end_row;j++) row_abs_sum+=ABS(aj[j]);
1681
1682 row_abs_sum = row_abs_sum*theta;
1683
1684 /* deal with the diagonal element of S */
1685 // for (j=begin_row;j<end_row;j++) {
1686 // if (ja[j]==i) {S->JA[j]=-1; break;}
1687 // }
1688
1689 /* deal with the element of S */
1690 for (j=begin_row;j<end_row;j++){
1691 /* if $\sum_{j=1}^n |a_{ij}|*theta>= |a_{ij}|$ */
1692 if ( (row_abs_sum >= ABS(aj[j])) && (ja[j] !=i) ) S->JA[j]=-1;
1693 }
1694 } // end for i
1695
1696 /* Compress the strength matrix */
1697 index=0;
1698 for (i=0;i<row;++i) {
1699 S->IA[i]=index;
1700 begin_row=ia[i]; end_row=ia[i+1]-1;
1701 for (j=begin_row;j<=end_row;j++) {
1702 if (S->JA[j]>-1) {
1703 S->JA[index]=S->JA[j];
1704 index++;
1705 }
1706 }
1707 }
1708
1709 if (index > 0) {
1710 S->IA[row]=index;
1711 S->nnz=index;
1712 S->JA=(INT*)fasp_mem_realloc(S->JA,index*sizeof(INT));
1713 }
1714 else {
1715 S->nnz = 0;
1716 S->JA = NULL;
1717 }
1718}
1719
1734static void multicoloring (AMG_data *mgl,
1735 REAL theta,
1736 INT *rowmax,
1737 INT *groups)
1738{
1739 INT k, i, j, pre, group, iend;
1740 INT icount;
1741 INT front, rear;
1742 INT *IA, *JA;
1743 INT *cq, *newr;
1744
1745 const INT n = mgl->A.row;
1746 dCSRmat A = mgl->A;
1747 iCSRmat S;
1748
1749 S.IA = S.JA = NULL; S.val = NULL;
1750
1751 theta = MAX(0.0, MIN(1.0, theta));
1752
1753 if (theta > 0.0 && theta < 1.0) {
1754 generate_S_theta(&A, &S, theta);
1755 IA = S.IA;
1756 JA = S.JA;
1757 }
1758 else if (theta == 1.0) {
1759
1760 mgl->ic = (INT*)malloc(sizeof(INT)*2);
1761 mgl->icmap = (INT *)malloc(sizeof(INT)*(n+1));
1762 mgl->ic[0] = 0;
1763 mgl->ic[1] = n;
1764 for(k=0; k<n; k++) mgl->icmap[k]= k;
1765
1766 mgl->colors = 1;
1767 *groups = 1;
1768 *rowmax = 1;
1769
1770 printf("### WARNING: Theta = %lf! [%s]\n", theta, __FUNCTION__);
1771
1772 return;
1773 }
1774 else {
1775 IA = A.IA;
1776 JA = A.JA;
1777 }
1778
1779 cq = (INT *)malloc(sizeof(INT)*(n+1));
1780 newr = (INT *)malloc(sizeof(INT)*(n+1));
1781
1782#ifdef _OPENMP
1783#pragma omp parallel for private(k)
1784#endif
1785 for ( k=0; k<n; k++ ) cq[k]= k;
1786
1787 group = 0;
1788 for ( k=0; k<n; k++ ) {
1789 if ((A.IA[k+1] - A.IA[k]) > group ) group = A.IA[k+1] - A.IA[k];
1790 }
1791 *rowmax = group;
1792
1793 mgl->ic = (INT *)malloc(sizeof(INT)*(group+2));
1794 mgl->icmap = (INT *)malloc(sizeof(INT)*(n+1));
1795
1796 front = n-1;
1797 rear = n-1;
1798
1799 memset(newr, -1, sizeof(INT)*(n+1));
1800 memset(mgl->icmap, 0, sizeof(INT)*n);
1801
1802 group=0;
1803 icount = 0;
1804 mgl->ic[0] = 0;
1805 pre=0;
1806
1807 do {
1808 //front = (front+1)%n;
1809 front ++;
1810 if (front == n ) front =0; // front = front < n ? front : 0 ;
1811 i = cq[front];
1812
1813 if(i <= pre) {
1814 mgl->ic[group] = icount;
1815 mgl->icmap[icount] = i;
1816 group++;
1817 icount++;
1818#if 0
1819 if ((IA[i+1]-IA[i]) > igold)
1820 iend = MIN(IA[i+1], (IA[i] + igold));
1821 else
1822#endif
1823 iend = IA[i+1];
1824
1825 for (j= IA[i]; j< iend; j++) newr[JA[j]] = group;
1826 }
1827 else if (newr[i] == group) {
1828 //rear = (rear +1)%n;
1829 rear ++;
1830 if (rear == n) rear = 0;
1831 cq[rear] = i;
1832 }
1833 else {
1834 mgl->icmap[icount] = i;
1835 icount++;
1836#if 0
1837 if ((IA[i+1] - IA[i]) > igold) iend =MIN(IA[i+1], (IA[i] + igold));
1838 else
1839#endif
1840 iend = IA[i+1];
1841 for (j = IA[i]; j< iend; j++) newr[JA[j]] = group;
1842 }
1843 pre=i;
1844
1845 } while(rear != front);
1846
1847 mgl->ic[group] = icount;
1848 mgl->colors = group;
1849 *groups = group;
1850
1851 free(cq);
1852 free(newr);
1853
1854 fasp_mem_free(S.IA); S.IA = NULL;
1855 fasp_mem_free(S.JA); S.JA = NULL;
1856
1857 return;
1858}
1859
1871{
1872 INT i, j, k, l;
1873 INT nlevL, nlevU;
1874
1875 INT n = iludata->row;
1876 INT *ijlu = iludata->ijlu;
1877
1878 INT *level = (INT *)fasp_mem_calloc(n, sizeof(INT));
1879 INT *jlevL = (INT *)fasp_mem_calloc(n, sizeof(INT));
1880 INT *ilevL = (INT *)fasp_mem_calloc(n+1, sizeof(INT));
1881
1882 INT *jlevU = (INT *)fasp_mem_calloc(n, sizeof(INT));
1883 INT *ilevU = (INT *)fasp_mem_calloc(n+1, sizeof(INT));
1884
1885 nlevL = 0;
1886 ilevL[0] = 0;
1887
1888 // form level for each row of lower triangular matrix.
1889 for (i=0; i<n; i++) {
1890 l = 0;
1891 for(j=ijlu[i]; j<ijlu[i+1]; j++) if (ijlu[j]<=i) l = MAX(l, level[ijlu[j]]);
1892 level[i] = l+1;
1893 ilevL[l+1] ++;
1894 nlevL = MAX(nlevL, l+1);
1895 }
1896
1897 for (i=1; i<=nlevL; i++) ilevL[i] += ilevL[i-1];
1898
1899 for (i=0; i<n; i++) {
1900 k = ilevL[level[i]-1];
1901 jlevL[k] = i;
1902 ilevL[level[i]-1]++;
1903 }
1904
1905 for (i=nlevL-1; i>0; i--) ilevL[i] = ilevL[i-1];
1906
1907 // form level for each row of upper triangular matrix.
1908 nlevU = 0;
1909 ilevL[0] = 0;
1910
1911 for (i=0; i<n; i++) level[i] = 0;
1912
1913 ilevU[0] = 0;
1914
1915 for (i=n-1; i>=0; i--) {
1916 l = 0;
1917 for (j=ijlu[i]; j<ijlu[i+1]; j++) if (ijlu[j]>=i) l = MAX(l, level[ijlu[j]]);
1918 level[i] = l+1;
1919 ilevU[l+1] ++;
1920 nlevU = MAX(nlevU, l+1);
1921 }
1922
1923 for (i=1; i<=nlevU; i++) ilevU[i] += ilevU[i-1];
1924
1925 for (i=n-1; i>=0; i--) {
1926 k = ilevU[level[i]-1];
1927 jlevU[k] = i;
1928 ilevU[level[i]-1]++;
1929 }
1930
1931 for (i=nlevU-1; i>0; i--) ilevU[i] = ilevU[i-1];
1932
1933 ilevU[0] = 0;
1934
1935 iludata->nlevL = nlevL+1; iludata->ilevL = ilevL;iludata->jlevL = jlevL;
1936 iludata->nlevU = nlevU+1; iludata->ilevU = ilevU;iludata->jlevU = jlevU;
1937
1938 fasp_mem_free(level); level = NULL;
1939}
1940
1953 INT gslvl)
1954{
1955
1956 INT Colors, rowmax, level, prtlvl = 0;
1957
1958 REAL theta = 0.00;
1959
1960 INT maxlvl = MIN(gslvl, mgl->num_levels-1);
1961
1962#ifdef _OPENMP
1963#pragma omp parallel for private(level,rowmax,Colors) schedule(static, 1)
1964#endif
1965 for ( level=0; level<maxlvl; level++ ) {
1966
1967 multicoloring(&mgl[level], theta, &rowmax, &Colors);
1968
1969 // print
1970 if ( prtlvl > PRINT_MIN )
1971 printf("mgl[%3d].A.row = %12d rowmax = %5d rowavg = %7.2lf colors = %5d theta = %le\n",
1972 level, mgl[level].A.row, rowmax, (double)mgl[level].A.nnz/mgl[level].A.row,
1973 mgl[level].colors, theta);
1974 }
1975}
1976
1977/*---------------------------------*/
1978/*-- End of File --*/
1979/*---------------------------------*/
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:41
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
Definition: AuxArray.c:227
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:152
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
Definition: AuxMemory.c:113
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_gettime(REAL *time)
Get system time.
Definition: AuxTiming.c:37
SHORT fasp_ilu_dbsr_setup_levsch_omp(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A based on level schedule strategy.
SHORT fasp_ilu_dbsr_setup_mc_omp(dBSRmat *A, dCSRmat *Ap, ILU_data *iludata, ILU_param *iluparam)
Multi-thread ILU decoposition of a BSR matrix A based on graph coloring.
void topologic_sort_ILU(ILU_data *iludata)
Reordering vertices according to level schedule strategy.
SHORT fasp_ilu_dbsr_setup_step(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam, INT step)
Get ILU decoposition of a BSR matrix A.
SHORT fasp_ilu_dbsr_setup(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A.
void mulcol_independ_set(AMG_data *mgl, INT gslvl)
Multi-coloring vertices of adjacency graph of A.
SHORT fasp_ilu_dbsr_setup_omp(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Multi-thread ILU decoposition of a BSR matrix A based on graph coloring.
SHORT fasp_ilu_dbsr_setup_levsch_step(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam, INT step)
Get ILU decoposition of a BSR matrix A based on level schedule strategy.
void fasp_symbfactor(INT n, INT *colind, INT *rwptr, INT levfill, INT nzmax, INT *nzlu, INT *ijlu, INT *uptr, INT *ierr)
Symbolic factorization of a CSR matrix A in compressed sparse row format, with resulting factors stor...
Definition: BlaILU.c:1372
void fasp_smat_inv_nc3(REAL *a)
Compute the inverse matrix of a 3*3 full matrix A (in place)
void fasp_smat_inv_nc4(REAL *a)
Compute the inverse matrix of a 4*4 full matrix A (in place)
SHORT fasp_smat_invp_nc(REAL *a, const INT n)
Compute the inverse of a matrix using Gauss Elimination with Pivoting.
void fasp_smat_inv_nc2(REAL *a)
Compute the inverse matrix of a 2*2 full matrix A (in place)
void fasp_smat_inv_nc5(REAL *a)
Compute the inverse matrix of a 5*5 full matrix A (in place)
void fasp_blas_smat_mul_nc4(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 4*4 matrices a and b, stored in c.
Definition: BlaSmallMat.c:350
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: BlaSmallMat.c:596
void fasp_blas_smat_mul_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 2* matrices a and b, stored in c.
Definition: BlaSmallMat.c:289
void fasp_blas_smat_mul_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
Definition: BlaSmallMat.c:315
void fasp_blas_smat_mul_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
Definition: BlaSmallMat.c:452
void fasp_blas_smat_mul_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
Definition: BlaSmallMat.c:395
dBSRmat fasp_dbsr_perm(const dBSRmat *A, const INT *P)
Apply permutation of A, i.e. Aperm=PAP' by the orders given in P.
void fasp_dbsr_free(dBSRmat *A)
Free memory space for BSR format sparse matrix.
Definition: BlaSparseBSR.c:140
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:47
void fasp_dcsr_free(dCSRmat *A)
Free CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:184
void fasp_dcsr_cp(const dCSRmat *A, dCSRmat *B)
copy a dCSRmat to a new one B=A
Definition: BlaSparseCSR.c:851
dCSRmat fasp_dcsr_sympart(dCSRmat *A)
Get symmetric part of a dCSRmat matrix.
void fasp_blas_dcsr_mxm(const dCSRmat *A, const dCSRmat *B, dCSRmat *C)
Sparse matrix multiplication C=A*B.
Definition: BlaSpmvCSR.c:893
AMG_data * fasp_amg_data_create(SHORT max_levels)
Create and initialize AMG_data for classical and SA AMG.
Definition: PreDataInit.c:64
Main header file for the FASP project.
#define MIN(a, b)
Definition: fasp.h:83
#define REAL
Definition: fasp.h:75
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:71
#define ABS(a)
Definition: fasp.h:84
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:82
#define INT
Definition: fasp.h:72
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:19
#define ERROR_SOLVER_ILUSETUP
Definition: fasp_const.h:46
#define ILUtp
Definition: fasp_const.h:151
#define PRINT_NONE
Print level for all subroutines – not including DEBUG output.
Definition: fasp_const.h:73
#define PRINT_MIN
Definition: fasp_const.h:74
Data for AMG methods.
Definition: fasp.h:804
dCSRmat A
pointer to the matrix at level level_num
Definition: fasp.h:817
INT colors
number of colors
Definition: fasp.h:878
INT * ic
indices for different colors
Definition: fasp.h:872
INT * icmap
mapping from vertex to color
Definition: fasp.h:875
SHORT num_levels
number of levels in use <= max_levels
Definition: fasp.h:812
Data for ILU setup.
Definition: fasp.h:651
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
Definition: fasp.h:669
REAL * luval
nonzero entries of LU
Definition: fasp.h:672
INT col
column of matrix LU, n
Definition: fasp.h:663
INT * jlevU
mapping from row to color for upper triangle
Definition: fasp.h:716
INT nlevU
number of colors for upper triangle
Definition: fasp.h:704
INT nwork
work space size
Definition: fasp.h:678
INT nb
block size for BSR type only
Definition: fasp.h:675
INT row
row number of matrix LU, m
Definition: fasp.h:660
INT nlevL
number of colors for lower triangle
Definition: fasp.h:701
INT type
type of ILUdata
Definition: fasp.h:657
INT * ic
indices for different colors
Definition: fasp.h:692
INT * icmap
mapping from vertex to color
Definition: fasp.h:695
INT * iperm
permutation arrays for ILUtp
Definition: fasp.h:684
INT nzlu
number of nonzero entries
Definition: fasp.h:666
INT * jlevL
mapping from row to color for lower triangle
Definition: fasp.h:713
REAL * work
work space
Definition: fasp.h:681
INT * uptr
temporary work space
Definition: fasp.h:698
INT * ilevL
number of vertices in each color for lower triangle
Definition: fasp.h:707
dCSRmat * A
pointer to the original coefficient matrix
Definition: fasp.h:654
INT * ilevU
number of vertices in each color for upper triangle
Definition: fasp.h:710
Parameters for ILU.
Definition: fasp.h:404
INT ILU_lfil
level of fill-in for ILUk
Definition: fasp.h:413
SHORT print_level
print level
Definition: fasp.h:407
SHORT ILU_type
ILU type for decomposition.
Definition: fasp.h:410
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:34
INT COL
number of cols of sub-blocks in matrix A, N
Definition: fasp_block.h:40
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
Definition: fasp_block.h:43
REAL * val
Definition: fasp_block.h:57
INT nb
dimension of each sub-block
Definition: fasp_block.h:46
INT * IA
integer array of row pointers, the size is ROW+1
Definition: fasp_block.h:60
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:37
INT * JA
Definition: fasp_block.h:64
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:151
INT col
column of matrix A, n
Definition: fasp.h:157
REAL * val
nonzero entries of A
Definition: fasp.h:169
INT row
row number of matrix A, m
Definition: fasp.h:154
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:163
INT nnz
number of nonzero entries
Definition: fasp.h:160
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:166
Sparse matrix of INT type in CSR format.
Definition: fasp.h:190
INT col
column of matrix A, n
Definition: fasp.h:196
INT row
row number of matrix A, m
Definition: fasp.h:193
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:202
INT nnz
number of nonzero entries
Definition: fasp.h:199
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:205
INT * val
nonzero entries of A
Definition: fasp.h:208