fasp/BlaILUSetupBSR_8c_source.html

#include <math.h>

#include <time.h>


#include "fasp.h"

#include "fasp_functs.h"


/*---------------------------------*/

/*--  Declare Private Functions  --*/

/*---------------------------------*/


static INT numfactor (dBSRmat *, REAL *, INT *, INT *);

static INT numfactor_mulcol (dBSRmat *, REAL *, INT *, INT *, INT, INT *, INT *);

static INT numfactor_levsch (dBSRmat *, REAL *, INT *, INT *, INT, INT *, INT *);

static void generate_S_theta(dCSRmat *, iCSRmat *, REAL);

// static void topologic_sort_ILU (ILU_data *);

// static void mulcol_independ_set (AMG_data *, INT);


/*---------------------------------*/

/*--      Public Functions       --*/

/*---------------------------------*/


SHORT fasp_ilu_dbsr_setup(dBSRmat *A,

                          ILU_data *iludata,

                          ILU_param *iluparam)

{


    const SHORT  prtlvl = iluparam->print_level;

    const INT    n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;


    // local variables

    INT     lfil = iluparam->ILU_lfil;

    INT     ierr, iwk, nzlu, nwork, *ijlu, *uptr;

    SHORT   status = FASP_SUCCESS;

    REAL    setup_start, setup_end, setup_duration;


#if DEBUG_MODE > 0

    printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);

    printf("### DEBUG: m = %d, n = %d, nnz = %d\n", A->ROW, n, nnz);

#endif


    fasp_gettime(&setup_start);


    // Expected amount of memory for ILU needed and allocate memory

    iwk = (lfil+2)*nnz;


#if DEBUG_MODE > 0

    if (iluparam->ILU_type == ILUtp) {

        printf("### WARNING: iludata->type = %d not supported!\n",

               iluparam->ILU_type);

    }

#endif


    // setup preconditioner

    iludata->type  = 0; // Must be initialized

    iludata->iperm = NULL;

    iludata->A     = NULL; // No need for BSR matrix

    iludata->row   = iludata->col = n;

    iludata->nb    = nb;

    iludata->ilevL = iludata->jlevL = NULL;

    iludata->ilevU = iludata->jlevU = NULL;


    ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));

    uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));


#if DEBUG_MODE > 1

    printf("### DEBUG: symbolic factorization ... \n");

#endif


    // ILU decomposition

    // (1) symbolic factoration

    fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);


    if ( ierr != 0 ) {

        printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));


#if DEBUG_MODE > 1

    printf("### DEBUG: numerical factorization ... \n");

#endif


    // (2) numerical factoration

    status = numfactor(A, iludata->luval, ijlu, uptr);


    if ( status < 0 ) {

        printf("### ERROR: ILU factorization failed! [%s]\n", __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    //nwork = 6*nzlu*nb;

    nwork = 20*A->ROW*A->nb;

    iludata->nzlu  = nzlu;

    iludata->nwork = nwork;

    iludata->ijlu  = (INT*)fasp_mem_calloc(nzlu, sizeof(INT));


    memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));

    iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));

    // Check: Is the work space too large? --Xiaozhe


#if DEBUG_MODE > 1

    printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);

    printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);

#endif


    if ( iwk < nzlu ) {

        printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    if ( prtlvl > PRINT_NONE ) {

        fasp_gettime(&setup_end);

        setup_duration = setup_end - setup_start;

        printf("BSR ILU(%d)-seq setup costs %f seconds.\n", lfil, setup_duration);

    }


FINISHED:

    fasp_mem_free(ijlu);  ijlu = NULL;

    fasp_mem_free(uptr);  uptr = NULL;


#if DEBUG_MODE > 0

    printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);

#endif


    return status;

}


SHORT fasp_ilu_dbsr_setup_step (dBSRmat    *A,

                                ILU_data   *iludata,

                                ILU_param  *iluparam,

                                INT step)

{


    const SHORT  prtlvl = iluparam->print_level;

    const INT    n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;


    // local variables

       INT     lfil = iluparam->ILU_lfil;

    static INT     ierr, iwk, nzlu, nwork, *ijlu, *uptr;

    SHORT   status = FASP_SUCCESS;


    REAL    setup_start, setup_end, setup_duration;


#if DEBUG_MODE > 0

    printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);

    printf("### DEBUG: m = %d, n = %d, nnz = %d\n", A->ROW, n, nnz);

#endif


    fasp_gettime(&setup_start);


    if (step==1) {

        // Expected amount of memory for ILU needed and allocate memory

        iwk = (lfil+2)*nnz;


    #if DEBUG_MODE > 0

        if (iluparam->ILU_type == ILUtp) {

            printf("### WARNING: iludata->type = %d not supported!\n",

                iluparam->ILU_type);

        }

    #endif


        // setup preconditioner

        iludata->type  = 0; // Must be initialized

        iludata->iperm = NULL;

        iludata->A     = NULL; // No need for BSR matrix

        iludata->row   = iludata->col = n;

        iludata->nb    = nb;

        iludata->ilevL = iludata->jlevL = NULL;

        iludata->ilevU = iludata->jlevU = NULL;


        ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));


        if (uptr != NULL)   fasp_mem_free(uptr);

        uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));


    #if DEBUG_MODE > 1

        printf("### DEBUG: symbolic factorization ... \n");

    #endif


        // ILU decomposition

        // (1) symbolic factoration

        fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);


        iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));


    #if DEBUG_MODE > 1

        printf("### DEBUG: numerical factorization ... \n");

    #endif


        //nwork = 6*nzlu*nb;

        nwork = 5*A->ROW*A->nb;

        iludata->nwork = nwork;

        iludata->nzlu  = nzlu;

        iludata->ijlu  = (INT*)fasp_mem_calloc(nzlu, sizeof(INT));


        memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));

        fasp_mem_free(ijlu);  ijlu = NULL;


        iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));

        // Check: Is the work space too large? --Xiaozhe


    #if DEBUG_MODE > 1

        printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);

        printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);

    #endif


        if ( ierr != 0 ) {

            printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);

            status = ERROR_SOLVER_ILUSETUP;

            goto FINISHED;

        }


        if ( iwk < nzlu ) {

            printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);

            status = ERROR_SOLVER_ILUSETUP;

            goto FINISHED;

        }

    }

    else if (step==2) {

        // (2) numerical factoration

        numfactor(A, iludata->luval, iludata->ijlu, uptr);


    } else {


FINISHED:

            fasp_mem_free(uptr);  uptr = NULL;

    }


    if ( prtlvl > PRINT_NONE ) {

        fasp_gettime(&setup_end);

        setup_duration = setup_end - setup_start;

        printf("BSR ILU(%d) setup costs %f seconds.\n", lfil, setup_duration);

    }


#if DEBUG_MODE > 0

    printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);

#endif


    return status;

}


SHORT fasp_ilu_dbsr_setup_omp (dBSRmat    *A,

                               ILU_data   *iludata,

                               ILU_param  *iluparam)

{


    const SHORT  prtlvl = iluparam->print_level;

    const INT    n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;


    // local variables

    INT     lfil = iluparam->ILU_lfil;

    INT     ierr, iwk, nzlu, nwork, *ijlu, *uptr;

    SHORT   status = FASP_SUCCESS;


    REAL    setup_start, setup_end, setup_duration;

    REAL    symbolic_start, symbolic_end, numfac_start, numfac_end;


#if DEBUG_MODE > 0

    printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);

    printf("### DEBUG: m = %d, n = %d, nnz = %d\n", A->ROW, n, nnz);

#endif


    fasp_gettime(&setup_start);


    // Expected amount of memory for ILU needed and allocate memory

    iwk = (lfil+2)*nnz;


#if DEBUG_MODE > 0

    if (iluparam->ILU_type == ILUtp) {

        printf("### WARNING: iludata->type = %d not supported any more!\n",

               iluparam->ILU_type);

    }

#endif


    // setup preconditioner

    iludata->type  = 0; // Must be initialized

    iludata->iperm = NULL;

    iludata->A     = NULL; // No need for BSR matrix

    iludata->row   = iludata->col = n;

    iludata->nb    = nb;


    ijlu = (INT *) fasp_mem_calloc(iwk,   sizeof(INT));

    uptr = (INT *) fasp_mem_calloc(A->ROW,sizeof(INT));


#if DEBUG_MODE > 1

    printf("### DEBUG: symbolic factorization ... \n");

#endif


    // ILU decomposition

    // (1) symbolic factoration

    fasp_gettime(&symbolic_start);


    fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);


    fasp_gettime(&symbolic_end);


#if prtlvl > PRINT_MIN

    printf("ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);

#endif


    nwork = 5*A->ROW*A->nb;

    iludata->nzlu  = nzlu;

    iludata->nwork = nwork;

    iludata->ijlu  = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));

    iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));

    iludata->work  = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));

    memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));

    fasp_darray_set(nzlu*nb2, iludata->luval, 0.0);


#if DEBUG_MODE > 1

    printf("### DEBUG: numerical factorization ... \n");

#endif


    // (2) numerical factoration

    fasp_gettime(&numfac_start);


    numfactor_mulcol(A, iludata->luval, ijlu, uptr, iludata->nlevL,

                     iludata->ilevL, iludata->jlevL);


    fasp_gettime(&numfac_end);


#if prtlvl > PRINT_MIN

    printf("ILU numerical factorization time = %f\n", numfac_end-numfac_start);

#endif


#if DEBUG_MODE > 1

    printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);

    printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);

#endif


    if ( ierr != 0 ) {

        printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    if ( iwk < nzlu ) {

        printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    if ( prtlvl > PRINT_NONE ) {

        fasp_gettime(&setup_end);

        setup_duration = setup_end - setup_start;

        printf("BSR ILU(%d)-mc setup costs %f seconds.\n", lfil, setup_duration);

    }


FINISHED:

    fasp_mem_free(ijlu);  ijlu = NULL;

    fasp_mem_free(uptr);  uptr = NULL;


#if DEBUG_MODE > 0

    printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);

#endif


    return status;

}


SHORT fasp_ilu_dbsr_setup_levsch_omp (dBSRmat    *A,

                                      ILU_data   *iludata,

                                      ILU_param  *iluparam)

{

    const SHORT  prtlvl = iluparam->print_level;

    const INT    n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;


    // local variables

    INT lfil = iluparam->ILU_lfil;

    INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;

    SHORT   status = FASP_SUCCESS;


    REAL    setup_start, setup_end, setup_duration;

    REAL    symbolic_start, symbolic_end, numfac_start, numfac_end;


#if DEBUG_MODE > 0

    printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);

    printf("### DEBUG: m=%d, n=%d, nnz=%d\n", A->ROW, n, nnz);

#endif


    fasp_gettime(&setup_start);


    // Expected amount of memory for ILU needed and allocate memory

    iwk = (lfil+2)*nnz;


#if DEBUG_MODE > 0

    if (iluparam->ILU_type == ILUtp) {

        printf("### WARNING: iludata->type = %d not supported!\n",

               iluparam->ILU_type);

    }

#endif


    // setup preconditioner

    iludata->type  = 0; // Must be initialized

    iludata->iperm = NULL;

    iludata->A     = NULL; // No need for BSR matrix

    iludata->row   = iludata->col=n;

    iludata->nb    = nb;


    ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));

    uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));


#if DEBUG_MODE > 1

    printf("### DEBUG: symbolic factorization ... \n");

#endif


    fasp_gettime(&symbolic_start);


    // ILU decomposition

    // (1) symbolic factoration

    fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);


    fasp_gettime(&symbolic_end);


#if prtlvl > PRINT_MIN

    printf("ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);

#endif


    nwork = 5*A->ROW*A->nb;

    iludata->nzlu  = nzlu;

    iludata->nwork = nwork;

    iludata->ijlu  = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));

    iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));

    iludata->work  = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));

    memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));

    fasp_darray_set(nzlu*nb2, iludata->luval, 0.0);

    iludata->uptr = NULL; iludata->ic = NULL; iludata->icmap = NULL;


    topologic_sort_ILU(iludata);


#if DEBUG_MODE > 1

    printf("### DEBUG: numerical factorization ... \n");

#endif


    fasp_gettime(&numfac_start);


    // (2) numerical factoration

    numfactor_levsch(A, iludata->luval, ijlu, uptr, iludata->nlevL,

                     iludata->ilevL, iludata->jlevL);


    fasp_gettime(&numfac_end);


#if prtlvl > PRINT_MIN

    printf("ILU numerical factorization time = %f\n", numfac_end-numfac_start);

#endif


#if DEBUG_MODE > 1

    printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);

    printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);

#endif


    if ( ierr != 0 ) {

        printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    if ( iwk < nzlu ) {

        printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);

        status = ERROR_SOLVER_ILUSETUP;

        goto FINISHED;

    }


    if ( prtlvl > PRINT_NONE ) {

        fasp_gettime(&setup_end);

        setup_duration = setup_end - setup_start;

        printf("BSR ILU(%d)-ls setup costs %f seconds.\n", lfil, setup_duration);

    }


FINISHED:

    fasp_mem_free(ijlu);  ijlu = NULL;

    fasp_mem_free(uptr);  uptr = NULL;


#if DEBUG_MODE > 0

    printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);

#endif


    return status;

}


SHORT fasp_ilu_dbsr_setup_levsch_step (dBSRmat    *A,

                                       ILU_data   *iludata,

                                       ILU_param  *iluparam,

                                       INT step)

{

    const SHORT  prtlvl = iluparam->print_level;

    const INT    n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;


    // local variables

    INT lfil = iluparam->ILU_lfil;

    static INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;

    SHORT   status = FASP_SUCCESS;


    REAL    setup_start, setup_end, setup_duration;

    REAL    symbolic_start, symbolic_end, numfac_start, numfac_end;


#if DEBUG_MODE > 0

    printf("### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);

    printf("### DEBUG: m=%d, n=%d, nnz=%d\n", A->ROW, n, nnz);

    printf("### DEBUG: step=%d(1: symbolic factoration, 2: numerical factoration)\n", step);// zhaoli 2021.03.24

#endif


    fasp_gettime(&setup_start);

   if (step==1) {

        // Expected amount of memory for ILU needed and allocate memory

        iwk = (lfil+2)*nnz;


    #if DEBUG_MODE > 0

        if (iluparam->ILU_type == ILUtp) {

            printf("### WARNING: iludata->type = %d not supported!\n",

                iluparam->ILU_type);

        }

    #endif


        // setup preconditioner

        iludata->type  = 0; // Must be initialized

        iludata->iperm = NULL;

        iludata->A     = NULL; // No need for BSR matrix

        iludata->row   = iludata->col=n;

        iludata->nb    = nb;


        fasp_mem_free(ijlu);

        ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));


        fasp_mem_free(uptr);

        uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));


    #if DEBUG_MODE > 1

        printf("### DEBUG: symbolic factorization ... \n");

    #endif


        fasp_gettime(&symbolic_start);


        // ILU decomposition

        // (1) symbolic factoration

        fasp_symbfactor(A->ROW,A->JA,A->IA,lfil,iwk,&nzlu,ijlu,uptr,&ierr);


        fasp_gettime(&symbolic_end);


    #if prtlvl > PRINT_MIN

        printf("ILU symbolic factorization time = %f\n", symbolic_end-symbolic_start);

    #endif


        nwork = 5*A->ROW*A->nb;

        iludata->nzlu  = nzlu;

        iludata->nwork = nwork;

        iludata->ijlu  = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));

        iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));

        iludata->work  = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));

        memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));

        fasp_mem_free(ijlu);  ijlu = NULL;


        fasp_darray_set(nzlu*nb2, iludata->luval, 0.0);

        iludata->uptr = NULL; iludata->ic = NULL; iludata->icmap = NULL;


        topologic_sort_ILU(iludata);

    #if DEBUG_MODE > 1

        printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);

        printf("### DEBUG: iwk = %d, nzlu = %d\n", iwk, nzlu);

    #endif


        if ( ierr != 0 ) {

            printf("### ERROR: ILU setup failed (ierr=%d)! [%s]\n", ierr, __FUNCTION__);

            status = ERROR_SOLVER_ILUSETUP;

            goto FINISHED;

        }


        if ( iwk < nzlu ) {

            printf("### ERROR: ILU needs more RAM %d! [%s]\n", iwk-nzlu, __FUNCTION__);

            status = ERROR_SOLVER_ILUSETUP;

            goto FINISHED;

        }

   } else if (step==2) {


#if DEBUG_MODE > 1

    printf("### DEBUG: numerical factorization ... \n");

#endif


        fasp_gettime(&numfac_start);


        // (2) numerical factoration

        numfactor_levsch(A, iludata->luval, iludata->ijlu, uptr, iludata->nlevL,

                        iludata->ilevL, iludata->jlevL);

        fasp_gettime(&numfac_end);


#if prtlvl > PRINT_MIN

    printf("ILU numerical factorization time = %f\n", numfac_end-numfac_start);

#endif

   } else {


FINISHED:

//    fasp_mem_free(ijlu);  ijlu = NULL;

        fasp_mem_free(uptr);  uptr = NULL;

   }


    if ( prtlvl > PRINT_NONE ) {

        fasp_gettime(&setup_end);

        setup_duration = setup_end - setup_start;

        printf("BSR ILU(%d)-ls setup costs %f seconds.\n", lfil, setup_duration);

    }


#if DEBUG_MODE > 0

    printf("### DEBUG: [--End--] %s ...\n", __FUNCTION__);

#endif


    return status;

}


SHORT fasp_ilu_dbsr_setup_mc_omp (dBSRmat    *A,

                                  dCSRmat    *Ap,

                                  ILU_data   *iludata,

                                  ILU_param  *iluparam)

{

    INT status;

    AMG_data *mgl=fasp_amg_data_create(1);

    dCSRmat pp, Ap1;

    dBSRmat A_LU;


    if (iluparam->ILU_lfil==0) {  //for ILU0

        mgl[0].A = fasp_dcsr_sympart(Ap);

    }

    else if (iluparam->ILU_lfil==1) {  // for ILU1

        Ap1 = fasp_dcsr_create(Ap->row,Ap->col, Ap->nnz);

        fasp_dcsr_cp(Ap, &Ap1);

        fasp_blas_dcsr_mxm (Ap,&Ap1,&pp);

        mgl[0].A = fasp_dcsr_sympart(&pp);

        fasp_dcsr_free(&Ap1);

        fasp_dcsr_free(&pp);

    }


    mgl->num_levels = 20;


    mulcol_independ_set(mgl, 1);


    A_LU = fasp_dbsr_perm(A, mgl[0].icmap);


    // hold color info with nlevl, ilevL and jlevL.

    iludata->nlevL = mgl[0].colors;

    iludata->ilevL = mgl[0].ic;

    iludata->jlevL = mgl[0].icmap;

    iludata->nlevU = 0;

    iludata->ilevU = NULL;

    iludata->jlevU = NULL;

    iludata->A     = NULL; // No need for BSR matrix


#if DEBUG_MODE > 0

    if (iluparam->ILU_type == ILUtp) {

        printf("### WARNING: iludata->type = %d not supported!\n",

               iluparam->ILU_type);

    }

#endif


    // setup preconditioner

    iludata->type  = 0; // Must be initialized

    iludata->iperm = NULL;


    status = fasp_ilu_dbsr_setup_omp(&A_LU,iludata,iluparam);


    fasp_dcsr_free(&mgl[0].A);

    fasp_dbsr_free(&A_LU);


    return status;

}


/*---------------------------------*/

/*--      Private Functions      --*/

/*---------------------------------*/


static INT numfactor (dBSRmat   *A,

                      REAL      *luval,

                      INT       *jlu,

                      INT       *uptr)

{

    INT n=A->ROW,nb=A->nb, nb2=nb*nb, ib, ibstart,ibstart1;

    INT k, indj, inds, indja,jluj, jlus, ijaj;

    REAL  *mult,*mult1;

    INT *colptrs;

    INT status=FASP_SUCCESS;


    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));


    //for (k=0;k<n;k++) colptrs[k]=0;

    memset(colptrs, 0, sizeof(INT)*n);


    switch (nb) {


        case 1:


            for (k = 0; k < n; ++k) {


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                    colptrs[jlu[indj]] = indj;

                    ibstart=indj*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                }


                colptrs[k] =  k;


                for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                    ijaj = A->JA[indja];

                    ibstart=colptrs[ijaj]*nb2;

                    ibstart1=indja*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                }


                for (indj = jlu[k]; indj < uptr[k]; ++indj) {


                    jluj = jlu[indj];


                    luval[indj] = luval[indj]*luval[jluj];

                    mult[0] = luval[indj];


                    for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                        jlus = jlu[inds];

                        if (colptrs[jlus] != 0)

                            luval[colptrs[jlus]] = luval[colptrs[jlus]] - mult[0]*luval[inds];

                    }


                }


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;


                colptrs[k] =  0;

                luval[k] = 1.0/luval[k];

            }


            break;


        case 3:


            for (k = 0; k < n; ++k) {


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                    colptrs[jlu[indj]] = indj;

                    ibstart=indj*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                }


                colptrs[k] =  k;


                for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                    ijaj = A->JA[indja];

                    ibstart=colptrs[ijaj]*nb2;

                    ibstart1=indja*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                }


                for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                    jluj = jlu[indj];


                    ibstart=indj*nb2;

                    fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];


                    for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                        jlus = jlu[inds];

                        if (colptrs[jlus] != 0) {

                            fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);

                            ibstart=colptrs[jlus]*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                        }

                    }


                }


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;


                colptrs[k] =  0;


                fasp_smat_inv_nc3(&(luval[k*nb2]));

            }


            break;


        case -5:


            for (k = 0; k < n; ++k) {


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                    colptrs[jlu[indj]] = indj;

                    ibstart=indj*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                }


                colptrs[k] =  k;


                for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                    ijaj = A->JA[indja];

                    ibstart=colptrs[ijaj]*nb2;

                    ibstart1=indja*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                }


                for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                    jluj = jlu[indj];


                    ibstart=indj*nb2;

                    fasp_blas_smat_mul_nc5(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];


                    for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                        jlus = jlu[inds];

                        if (colptrs[jlus] != 0) {

                            fasp_blas_smat_mul_nc5(mult,&(luval[inds*nb2]),mult1);

                            ibstart=colptrs[jlus]*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                        }

                    }


                }


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;


                colptrs[k] =  0;


                // fasp_smat_inv_nc5(&(luval[k*nb2])); // not numerically stable --zcs 04/26/2021

                status = fasp_smat_invp_nc(&(luval[k*nb2]), 5);

            }


            break;


        case -7:


            for (k = 0; k < n; ++k) {


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                    colptrs[jlu[indj]] = indj;

                    ibstart=indj*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                }


                colptrs[k] =  k;


                for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                    ijaj = A->JA[indja];

                    ibstart=colptrs[ijaj]*nb2;

                    ibstart1=indja*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                }


                for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                    jluj = jlu[indj];


                    ibstart=indj*nb2;

                    fasp_blas_smat_mul_nc7(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];


                    for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                        jlus = jlu[inds];

                        if (colptrs[jlus] != 0) {

                            fasp_blas_smat_mul_nc7(mult,&(luval[inds*nb2]),mult1);

                            ibstart=colptrs[jlus]*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                        }

                    }


                }


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;


                colptrs[k] =  0;


                // fasp_smat_inv(&(luval[k*nb2]),nb); // not numerically stable --zcs 04/26/2021

                status = fasp_smat_invp_nc(&(luval[k*nb2]), nb);

            }


            break;


        default:


            for (k=0;k<n;k++) {


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                    colptrs[jlu[indj]] = indj;

                    ibstart=indj*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                }


                colptrs[k] =  k;


                for (indja = A->IA[k]; indja < A->IA[k+1]; indja++) {

                    ijaj = A->JA[indja];

                    ibstart=colptrs[ijaj]*nb2;

                    ibstart1=indja*nb2;

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                }


                for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                    jluj = jlu[indj];


                    ibstart=indj*nb2;

                    fasp_blas_smat_mul(&(luval[ibstart]),&(luval[jluj*nb2]),mult,nb);

                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];


                    for (inds = uptr[jluj]; inds < jlu[jluj+1]; inds++) {

                        jlus = jlu[inds];

                        if (colptrs[jlus] != 0) {

                            fasp_blas_smat_mul(mult,&(luval[inds*nb2]),mult1,nb);

                            ibstart=colptrs[jlus]*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                        }

                    }


                }


                for (indj = jlu[k]; indj < jlu[k+1]; ++indj)

                    colptrs[jlu[indj]] = 0;


                colptrs[k] =  0;


                //fasp_smat_inv(&(luval[k*nb2]),nb); // not numerically stable --zcs 04/26/2021

                status = fasp_smat_invp_nc(&(luval[k * nb2]), nb);

            }

    }


    fasp_mem_free(colptrs);  colptrs = NULL;

    fasp_mem_free(mult);     mult    = NULL;

    fasp_mem_free(mult1);    mult1   = NULL;


    return status;

}


static INT numfactor_mulcol (dBSRmat   *A,

                             REAL      *luval,

                             INT       *jlu,

                             INT       *uptr,

                             INT        ncolors,

                             INT       *ic,

                             INT       *icmap)

{

    INT status = FASP_SUCCESS;


#ifdef _OPENMP

    INT   n = A->ROW, nb = A->nb, nb2 = nb*nb;

    INT   ib, ibstart,ibstart1;

    INT   k, i, indj, inds, indja,jluj, jlus, ijaj, tmp;

    REAL  *mult, *mult1;

    INT   *colptrs;


    switch (nb) {


        case 1:

            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,colptrs,tmp)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

#pragma omp for

                    for (k = ic[i]; k < ic[i+1]; ++k) {

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            luval[indj] = luval[indj]*luval[jluj];

                            tmp = luval[indj];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0)

                                    luval[colptrs[jlus]] = luval[colptrs[jlus]] - tmp*luval[inds];

                            }


                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        luval[k] = 1.0/luval[k];

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                }

            }


            break;


        case 2:


            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

                    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

                    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

#pragma omp for

                    for (k = ic[i]; k < ic[i+1]; ++k) {

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart=indj*nb2;

                            fasp_blas_smat_mul_nc2(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul_nc2(mult,&(luval[inds*nb2]),mult1);

                                    ibstart=colptrs[jlus]*nb2;

                                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        fasp_smat_inv_nc2(&(luval[k*nb2]));

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult    = NULL;

                    fasp_mem_free(mult1);   mult1   = NULL;

                }

            }

            break;


        case 3:


            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

                    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

                    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

#pragma omp for

                    for (k = ic[i]; k < ic[i+1]; ++k) {

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart=indj*nb2;

                            fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);

                                    ibstart=colptrs[jlus]*nb2;

                                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        fasp_smat_inv_nc3(&(luval[k*nb2]));

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult    = NULL;

                    fasp_mem_free(mult1);   mult1   = NULL;

                }

            }

            break;


        default:

        {

            if (nb > 3) printf("Multi-thread ILU numerical decomposition for %d\

                               components has not been implemented!!!", nb);

            exit(0);

        }

    }


#endif


    return status;

}


static INT numfactor_levsch (dBSRmat *A,

                             REAL *luval,

                             INT *jlu,

                             INT *uptr,

                             INT ncolors,

                             INT *ic,

                             INT *icmap)

{

    INT status = FASP_SUCCESS;


#ifdef _OPENMP

    INT n = A->ROW, nb = A->nb, nb2 = nb*nb;

    INT ib, ibstart,ibstart1;

    INT k, i, indj, inds, indja, jluj, jlus, ijaj, tmp, ii;

    REAL *mult, *mult1;

    INT  *colptrs;


    switch (nb) {


        case 1:

            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,colptrs,tmp)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

#pragma omp for

                    for (k = ic[i]; k < ic[i+1]; ++k) {

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            luval[indj] = luval[indj]*luval[jluj];

                            tmp = luval[indj];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0)

                                    luval[colptrs[jlus]] = luval[colptrs[jlus]] - tmp*luval[inds];

                            }


                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        luval[k] = 1.0/luval[k];

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                }

            }


            break;

        case 2:


            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

                    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

                    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

#pragma omp for

                    for (ii = ic[i]; ii < ic[i+1]; ++ii) {

                        k = icmap[ii];

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart=indj*nb2;

                            fasp_blas_smat_mul_nc2(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul_nc2(mult,&(luval[inds*nb2]),mult1);

                                    ibstart=colptrs[jlus]*nb2;

                                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        fasp_smat_inv_nc2(&(luval[k*nb2]));

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult    = NULL;

                    fasp_mem_free(mult1);   mult1   = NULL;

                }

            }

            break;


        case 3:


            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

                    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

                    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

#pragma omp for

                    for (ii = ic[i]; ii < ic[i+1]; ++ii) {

                        k = icmap[ii];

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart=indj*nb2;

                            fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);

                                    ibstart=colptrs[jlus]*nb2;

                                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        fasp_smat_inv_nc3(&(luval[k*nb2]));

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult    = NULL;

                    fasp_mem_free(mult1);   mult1   = NULL;

                }

            }

            break;


        case 4:


            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

                    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

                    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

#pragma omp for

                    for (ii = ic[i]; ii < ic[i+1]; ++ii) {

                        k = icmap[ii];

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart=indj*nb2;

                            fasp_blas_smat_mul_nc4(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul_nc4(mult,&(luval[inds*nb2]),mult1);

                                    ibstart=colptrs[jlus]*nb2;

                                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        fasp_smat_inv_nc4(&(luval[k*nb2]));

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult    = NULL;

                    fasp_mem_free(mult1);   mult1   = NULL;

                }

            }

            break;


        case 5:


            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)

                {

                    colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));

                    memset(colptrs, 0, sizeof(INT)*n);

                    mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

                    mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));

#pragma omp for

                    for (ii = ic[i]; ii < ic[i+1]; ++ii) {

                        k = icmap[ii];

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart=indj*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;

                        }

                        colptrs[k] =  k;

                        for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart=colptrs[ijaj]*nb2;

                            ibstart1=indja*nb2;

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart=indj*nb2;

                            fasp_blas_smat_mul_nc5(&(luval[ibstart]),&(luval[jluj*nb2]),mult);

                            for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul_nc5(mult,&(luval[inds*nb2]),mult1);

                                    ibstart=colptrs[jlus]*nb2;

                                    for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] =  0;

                        fasp_smat_inv_nc5(&(luval[k*nb2]));

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult    = NULL;

                    fasp_mem_free(mult1);   mult1   = NULL;

                }

            }

            break;


        default:

            for (i = 0; i < ncolors; ++i) {

#pragma omp parallel private(k,indj,ibstart,ib,indja,ijaj,ibstart1,jluj,inds,jlus,mult,mult1,colptrs,ii)

                {

                    colptrs = (INT*)fasp_mem_calloc(n, sizeof(INT));

                    memset(colptrs, 0, sizeof(INT) * n);

                    mult = (REAL*)fasp_mem_calloc(nb2, sizeof(REAL));

                    mult1 = (REAL*)fasp_mem_calloc(nb2, sizeof(REAL));

#pragma omp for

                    for (ii = ic[i]; ii < ic[i + 1]; ++ii) {

                        k = icmap[ii];

                        for (indj = jlu[k]; indj < jlu[k + 1]; ++indj) {

                            colptrs[jlu[indj]] = indj;

                            ibstart = indj * nb2;

                            for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = 0;

                        }

                        colptrs[k] = k;

                        for (indja = A->IA[k]; indja < A->IA[k + 1]; ++indja) {

                            ijaj = A->JA[indja];

                            ibstart = colptrs[ijaj] * nb2;

                            ibstart1 = indja * nb2;

                            for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = A->val[ibstart1 + ib];

                        }

                        for (indj = jlu[k]; indj < uptr[k]; ++indj) {

                            jluj = jlu[indj];

                            ibstart = indj * nb2;

                            fasp_blas_smat_mul(&(luval[ibstart]), &(luval[jluj * nb2]), mult, nb);

                            for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] = mult[ib];

                            for (inds = uptr[jluj]; inds < jlu[jluj + 1]; ++inds) {

                                jlus = jlu[inds];

                                if (colptrs[jlus] != 0) {

                                    fasp_blas_smat_mul(mult, &(luval[inds * nb2]), mult1, nb);

                                    ibstart = colptrs[jlus] * nb2;

                                    for (ib = 0; ib < nb2; ++ib) luval[ibstart + ib] -= mult1[ib];

                                }

                            }

                        }

                        for (indj = jlu[k]; indj < jlu[k + 1]; ++indj) colptrs[jlu[indj]] = 0;

                        colptrs[k] = 0;

                        fasp_smat_invp_nc(&(luval[k * nb2]), nb);

                    }

                    fasp_mem_free(colptrs); colptrs = NULL;

                    fasp_mem_free(mult);    mult = NULL;

                    fasp_mem_free(mult1);   mult1 = NULL;

                }

            }

            //if (nb > 5) printf("Multi-thread ILU numerical decomposition for %d components has not been implemented!!!\n", nb);

            //exit(0);

            break;

    }


#endif


    return status;

}


static void generate_S_theta (dCSRmat *A,

                              iCSRmat *S,

                              REAL     theta)

{

    const INT row=A->row, col=A->col;

    const INT row_plus_one = row+1;

    const INT nnz=A->IA[row]-A->IA[0];


    INT index, i, j, begin_row, end_row;

    INT *ia=A->IA, *ja=A->JA;

    REAL *aj=A->val;


    // get the diagnal entry of A

    //dvector diag; fasp_dcsr_getdiag(0, A, &diag);


    /* generate S */

    REAL row_abs_sum;


    // copy the structure of A to S

    S->row=row; S->col=col; S->nnz=nnz; S->val=NULL;


    S->IA=(INT*)fasp_mem_calloc(row_plus_one, sizeof(INT));


    S->JA=(INT*)fasp_mem_calloc(nnz, sizeof(INT));


    fasp_iarray_cp(row_plus_one, ia, S->IA);

    fasp_iarray_cp(nnz, ja, S->JA);


    for (i=0;i<row;++i) {

        /* compute scaling factor and row sum */

        row_abs_sum=0;


        begin_row=ia[i]; end_row=ia[i+1];


        for (j=begin_row;j<end_row;j++) row_abs_sum+=ABS(aj[j]);


        row_abs_sum = row_abs_sum*theta;


        /* deal with the diagonal element of S */

        //  for (j=begin_row;j<end_row;j++) {

        //     if (ja[j]==i) {S->JA[j]=-1; break;}

        //  }


        /* deal with  the element of S */

        for (j=begin_row;j<end_row;j++){

            /* if $\sum_{j=1}^n |a_{ij}|*theta>= |a_{ij}|$ */

            if ( (row_abs_sum >= ABS(aj[j])) && (ja[j] !=i) ) S->JA[j]=-1;

        }

    } // end for i


    /* Compress the strength matrix */

    index=0;

    for (i=0;i<row;++i) {

        S->IA[i]=index;

        begin_row=ia[i]; end_row=ia[i+1]-1;

        for (j=begin_row;j<=end_row;j++) {

            if (S->JA[j]>-1) {

                S->JA[index]=S->JA[j];

                index++;

            }

        }

    }


    if (index > 0) {

        S->IA[row]=index;

        S->nnz=index;

        S->JA=(INT*)fasp_mem_realloc(S->JA,index*sizeof(INT));

    }

    else {

        S->nnz = 0;

        S->JA = NULL;

    }

}


static void multicoloring (AMG_data *mgl,

                           REAL      theta,

                           INT      *rowmax,

                           INT      *groups)

{

    INT k, i, j, pre, group, iend;

    INT icount;

    INT front, rear;

    INT *IA, *JA;

    INT *cq, *newr;


    const INT n = mgl->A.row;

    dCSRmat   A = mgl->A;

    iCSRmat   S;


    S.IA = S.JA = NULL; S.val = NULL;


    theta = MAX(0.0, MIN(1.0, theta));


    if (theta > 0.0 && theta < 1.0) {

        generate_S_theta(&A, &S, theta);

        IA = S.IA;

        JA = S.JA;

    }

    else if (theta == 1.0) {


        mgl->ic = (INT*)malloc(sizeof(INT)*2);

        mgl->icmap = (INT *)malloc(sizeof(INT)*(n+1));

        mgl->ic[0] = 0;

        mgl->ic[1] = n;

        for(k=0; k<n; k++)  mgl->icmap[k]= k;


        mgl->colors = 1;

        *groups = 1;

        *rowmax = 1;


        printf("### WARNING: Theta = %lf! [%s]\n", theta, __FUNCTION__);


        return;

    }

    else {

        IA = A.IA;

        JA = A.JA;

    }


    cq = (INT *)malloc(sizeof(INT)*(n+1));

    newr = (INT *)malloc(sizeof(INT)*(n+1));


#ifdef _OPENMP

#pragma omp parallel for private(k)

#endif

    for ( k=0; k<n; k++ ) cq[k]= k;


    group = 0;

    for ( k=0; k<n; k++ ) {

        if ((A.IA[k+1] - A.IA[k]) > group ) group = A.IA[k+1] - A.IA[k];

    }

    *rowmax = group;


    mgl->ic = (INT *)malloc(sizeof(INT)*(group+2));

    mgl->icmap = (INT *)malloc(sizeof(INT)*(n+1));


    front = n-1;

    rear = n-1;


    memset(newr, -1, sizeof(INT)*(n+1));

    memset(mgl->icmap, 0, sizeof(INT)*n);


    group=0;

    icount = 0;

    mgl->ic[0] = 0;

    pre=0;


    do {

        //front = (front+1)%n;

        front ++;

        if (front == n ) front =0; // front = front < n ? front : 0 ;

        i = cq[front];


        if(i <= pre) {

            mgl->ic[group] = icount;

            mgl->icmap[icount] = i;

            group++;

            icount++;

#if 0

            if ((IA[i+1]-IA[i]) > igold)

                iend = MIN(IA[i+1], (IA[i] + igold));

            else

#endif

                iend = IA[i+1];


            for (j= IA[i]; j< iend; j++)  newr[JA[j]] = group;

        }

        else if (newr[i] == group) {

            //rear = (rear +1)%n;

            rear ++;

            if (rear == n) rear = 0;

            cq[rear] = i;

        }

        else {

            mgl->icmap[icount] = i;

            icount++;

#if  0

            if ((IA[i+1] - IA[i]) > igold)  iend =MIN(IA[i+1], (IA[i] + igold));

            else

#endif

                iend = IA[i+1];

            for (j = IA[i]; j< iend; j++)  newr[JA[j]] =  group;

        }

        pre=i;


    } while(rear != front);


    mgl->ic[group] = icount;

    mgl->colors = group;

    *groups = group;


    free(cq);

    free(newr);


    fasp_mem_free(S.IA); S.IA = NULL;

    fasp_mem_free(S.JA); S.JA = NULL;


    return;

}


void topologic_sort_ILU (ILU_data *iludata)

{

    INT i, j, k, l;

    INT nlevL, nlevU;


    INT n = iludata->row;

    INT *ijlu = iludata->ijlu;


    INT *level = (INT *)fasp_mem_calloc(n, sizeof(INT));

    INT *jlevL = (INT *)fasp_mem_calloc(n, sizeof(INT));

    INT *ilevL = (INT *)fasp_mem_calloc(n+1, sizeof(INT));


    INT *jlevU = (INT *)fasp_mem_calloc(n, sizeof(INT));

    INT *ilevU = (INT *)fasp_mem_calloc(n+1, sizeof(INT));


    nlevL = 0;

    ilevL[0] = 0;


    // form level for each row of lower triangular matrix.

    for (i=0; i<n; i++) {

        l = 0;

        for(j=ijlu[i]; j<ijlu[i+1]; j++) if (ijlu[j]<=i) l = MAX(l, level[ijlu[j]]);

        level[i] = l+1;

        ilevL[l+1] ++;

        nlevL = MAX(nlevL, l+1);

    }


    for (i=1; i<=nlevL; i++) ilevL[i] += ilevL[i-1];


    for (i=0; i<n; i++) {

        k = ilevL[level[i]-1];

        jlevL[k] = i;

        ilevL[level[i]-1]++;

    }


    for (i=nlevL-1; i>0; i--) ilevL[i] = ilevL[i-1];


    // form level for each row of upper triangular matrix.

    nlevU = 0;

    ilevL[0] = 0;


    for (i=0; i<n; i++) level[i] = 0;


    ilevU[0] = 0;


    for (i=n-1; i>=0; i--) {

        l = 0;

        for (j=ijlu[i]; j<ijlu[i+1]; j++) if (ijlu[j]>=i) l = MAX(l, level[ijlu[j]]);

        level[i] = l+1;

        ilevU[l+1] ++;

        nlevU = MAX(nlevU, l+1);

    }


    for (i=1; i<=nlevU; i++) ilevU[i] += ilevU[i-1];


    for (i=n-1; i>=0; i--) {

        k = ilevU[level[i]-1];

        jlevU[k] = i;

        ilevU[level[i]-1]++;

    }


    for (i=nlevU-1; i>0; i--) ilevU[i] = ilevU[i-1];


    ilevU[0] = 0;


    iludata->nlevL = nlevL+1; iludata->ilevL = ilevL;iludata->jlevL = jlevL;

    iludata->nlevU = nlevU+1; iludata->ilevU = ilevU;iludata->jlevU = jlevU;


    fasp_mem_free(level); level = NULL;

}


void mulcol_independ_set (AMG_data *mgl,

                          INT       gslvl)

{


    INT Colors, rowmax, level, prtlvl = 0;


    REAL theta = 0.00;


    INT maxlvl = MIN(gslvl, mgl->num_levels-1);


#ifdef _OPENMP

#pragma omp parallel for private(level,rowmax,Colors) schedule(static, 1)

#endif

    for ( level=0; level<maxlvl; level++ ) {


        multicoloring(&mgl[level], theta, &rowmax, &Colors);


        // print

        if ( prtlvl > PRINT_MIN )

            printf("mgl[%3d].A.row = %12d rowmax = %5d rowavg = %7.2lf colors = %5d theta = %le\n",

                   level, mgl[level].A.row, rowmax, (double)mgl[level].A.nnz/mgl[level].A.row,

                   mgl[level].colors, theta);

    }

}


/*---------------------------------*/

/*--        End of File          --*/

/*---------------------------------*/

fasp_darray_set
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:41

fasp_iarray_cp
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
Definition: AuxArray.c:227

fasp_mem_free
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:152

fasp_mem_realloc
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
Definition: AuxMemory.c:113

fasp_mem_calloc
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65

fasp_gettime
void fasp_gettime(REAL *time)
Get system time.
Definition: AuxTiming.c:37

fasp_ilu_dbsr_setup_levsch_omp
SHORT fasp_ilu_dbsr_setup_levsch_omp(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A based on level schedule strategy.
Definition: BlaILUSetupBSR.c:456

fasp_ilu_dbsr_setup_mc_omp
SHORT fasp_ilu_dbsr_setup_mc_omp(dBSRmat *A, dCSRmat *Ap, ILU_data *iludata, ILU_param *iluparam)
Multi-thread ILU decoposition of a BSR matrix A based on graph coloring.
Definition: BlaILUSetupBSR.c:745

topologic_sort_ILU
void topologic_sort_ILU(ILU_data *iludata)
Reordering vertices according to level schedule strategy.
Definition: BlaILUSetupBSR.c:1870

fasp_ilu_dbsr_setup_step
SHORT fasp_ilu_dbsr_setup_step(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam, INT step)
Get ILU decoposition of a BSR matrix A.
Definition: BlaILUSetupBSR.c:187

fasp_ilu_dbsr_setup
SHORT fasp_ilu_dbsr_setup(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A.
Definition: BlaILUSetupBSR.c:55

mulcol_independ_set
void mulcol_independ_set(AMG_data *mgl, INT gslvl)
Multi-coloring vertices of adjacency graph of A.
Definition: BlaILUSetupBSR.c:1952

fasp_ilu_dbsr_setup_omp
SHORT fasp_ilu_dbsr_setup_omp(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Multi-thread ILU decoposition of a BSR matrix A based on graph coloring.
Definition: BlaILUSetupBSR.c:320

fasp_ilu_dbsr_setup_levsch_step
SHORT fasp_ilu_dbsr_setup_levsch_step(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam, INT step)
Get ILU decoposition of a BSR matrix A based on level schedule strategy.
Definition: BlaILUSetupBSR.c:597

fasp_symbfactor
void fasp_symbfactor(INT n, INT *colind, INT *rwptr, INT levfill, INT nzmax, INT *nzlu, INT *ijlu, INT *uptr, INT *ierr)
Symbolic factorization of a CSR matrix A in compressed sparse row format, with resulting factors stor...
Definition: BlaILU.c:1372

fasp_smat_inv_nc3
void fasp_smat_inv_nc3(REAL *a)
Compute the inverse matrix of a 3*3 full matrix A (in place)
Definition: BlaSmallMatInv.c:67

fasp_smat_inv_nc4
void fasp_smat_inv_nc4(REAL *a)
Compute the inverse matrix of a 4*4 full matrix A (in place)
Definition: BlaSmallMatInv.c:111

fasp_smat_invp_nc
SHORT fasp_smat_invp_nc(REAL *a, const INT n)
Compute the inverse of a matrix using Gauss Elimination with Pivoting.
Definition: BlaSmallMatInv.c:508

fasp_smat_inv_nc2
void fasp_smat_inv_nc2(REAL *a)
Compute the inverse matrix of a 2*2 full matrix A (in place)
Definition: BlaSmallMatInv.c:33

fasp_smat_inv_nc5
void fasp_smat_inv_nc5(REAL *a)
Compute the inverse matrix of a 5*5 full matrix A (in place)
Definition: BlaSmallMatInv.c:170

fasp_blas_smat_mul_nc4
void fasp_blas_smat_mul_nc4(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 4*4 matrices a and b, stored in c.
Definition: BlaSmallMat.c:350

fasp_blas_smat_mul
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: BlaSmallMat.c:596

fasp_blas_smat_mul_nc2
void fasp_blas_smat_mul_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 2* matrices a and b, stored in c.
Definition: BlaSmallMat.c:289

fasp_blas_smat_mul_nc3
void fasp_blas_smat_mul_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
Definition: BlaSmallMat.c:315

fasp_blas_smat_mul_nc7
void fasp_blas_smat_mul_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
Definition: BlaSmallMat.c:452

fasp_blas_smat_mul_nc5
void fasp_blas_smat_mul_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
Definition: BlaSmallMat.c:395

fasp_dbsr_perm
dBSRmat fasp_dbsr_perm(const dBSRmat *A, const INT *P)
Apply permutation of A, i.e. Aperm=PAP' by the orders given in P.
Definition: BlaSparseBSR.c:2106

fasp_dbsr_free
void fasp_dbsr_free(dBSRmat *A)
Free memory space for BSR format sparse matrix.
Definition: BlaSparseBSR.c:140

fasp_dcsr_create
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:47

fasp_dcsr_free
void fasp_dcsr_free(dCSRmat *A)
Free CSR sparse matrix data memory space.
Definition: BlaSparseCSR.c:184

fasp_dcsr_cp
void fasp_dcsr_cp(const dCSRmat *A, dCSRmat *B)
copy a dCSRmat to a new one B=A
Definition: BlaSparseCSR.c:851

fasp_dcsr_sympart
dCSRmat fasp_dcsr_sympart(dCSRmat *A)
Get symmetric part of a dCSRmat matrix.
Definition: BlaSparseCSR.c:1357

fasp_blas_dcsr_mxm
void fasp_blas_dcsr_mxm(const dCSRmat *A, const dCSRmat *B, dCSRmat *C)
Sparse matrix multiplication C=A*B.
Definition: BlaSpmvCSR.c:893

fasp_amg_data_create
AMG_data * fasp_amg_data_create(SHORT max_levels)
Create and initialize AMG_data for classical and SA AMG.
Definition: PreDataInit.c:64

fasp.h
Main header file for the FASP project.

MIN
#define MIN(a, b)
Definition: fasp.h:83

REAL
#define REAL
Definition: fasp.h:75

SHORT
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:71

ABS
#define ABS(a)
Definition: fasp.h:84

MAX
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:82

INT
#define INT
Definition: fasp.h:72

FASP_SUCCESS
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:19

ERROR_SOLVER_ILUSETUP
#define ERROR_SOLVER_ILUSETUP
Definition: fasp_const.h:46

ILUtp
#define ILUtp
Definition: fasp_const.h:151

PRINT_NONE
#define PRINT_NONE
Print level for all subroutines – not including DEBUG output.
Definition: fasp_const.h:73

PRINT_MIN
#define PRINT_MIN
Definition: fasp_const.h:74

AMG_data
Data for AMG methods.
Definition: fasp.h:804

AMG_data::A
dCSRmat A
pointer to the matrix at level level_num
Definition: fasp.h:817

AMG_data::colors
INT colors
number of colors
Definition: fasp.h:878

AMG_data::ic
INT * ic
indices for different colors
Definition: fasp.h:872

AMG_data::icmap
INT * icmap
mapping from vertex to color
Definition: fasp.h:875

AMG_data::num_levels
SHORT num_levels
number of levels in use <= max_levels
Definition: fasp.h:812

ILU_data
Data for ILU setup.
Definition: fasp.h:651

ILU_data::ijlu
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
Definition: fasp.h:669

ILU_data::luval
REAL * luval
nonzero entries of LU
Definition: fasp.h:672

ILU_data::col
INT col
column of matrix LU, n
Definition: fasp.h:663

ILU_data::jlevU
INT * jlevU
mapping from row to color for upper triangle
Definition: fasp.h:716

ILU_data::nlevU
INT nlevU
number of colors for upper triangle
Definition: fasp.h:704

ILU_data::nwork
INT nwork
work space size
Definition: fasp.h:678

ILU_data::nb
INT nb
block size for BSR type only
Definition: fasp.h:675

ILU_data::row
INT row
row number of matrix LU, m
Definition: fasp.h:660

ILU_data::nlevL
INT nlevL
number of colors for lower triangle
Definition: fasp.h:701

ILU_data::type
INT type
type of ILUdata
Definition: fasp.h:657

ILU_data::ic
INT * ic
indices for different colors
Definition: fasp.h:692

ILU_data::icmap
INT * icmap
mapping from vertex to color
Definition: fasp.h:695

ILU_data::iperm
INT * iperm
permutation arrays for ILUtp
Definition: fasp.h:684

ILU_data::nzlu
INT nzlu
number of nonzero entries
Definition: fasp.h:666

ILU_data::jlevL
INT * jlevL
mapping from row to color for lower triangle
Definition: fasp.h:713

ILU_data::work
REAL * work
work space
Definition: fasp.h:681

ILU_data::uptr
INT * uptr
temporary work space
Definition: fasp.h:698

ILU_data::ilevL
INT * ilevL
number of vertices in each color for lower triangle
Definition: fasp.h:707

ILU_data::A
dCSRmat * A
pointer to the original coefficient matrix
Definition: fasp.h:654

ILU_data::ilevU
INT * ilevU
number of vertices in each color for upper triangle
Definition: fasp.h:710

ILU_param
Parameters for ILU.
Definition: fasp.h:404

ILU_param::ILU_lfil
INT ILU_lfil
level of fill-in for ILUk
Definition: fasp.h:413

ILU_param::print_level
SHORT print_level
print level
Definition: fasp.h:407

ILU_param::ILU_type
SHORT ILU_type
ILU type for decomposition.
Definition: fasp.h:410

dBSRmat
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:34

dBSRmat::COL
INT COL
number of cols of sub-blocks in matrix A, N
Definition: fasp_block.h:40

dBSRmat::NNZ
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
Definition: fasp_block.h:43

dBSRmat::val
REAL * val
Definition: fasp_block.h:57

dBSRmat::nb
INT nb
dimension of each sub-block
Definition: fasp_block.h:46

dBSRmat::IA
INT * IA
integer array of row pointers, the size is ROW+1
Definition: fasp_block.h:60

dBSRmat::ROW
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:37

dBSRmat::JA
INT * JA
Definition: fasp_block.h:64

dCSRmat
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:151

dCSRmat::col
INT col
column of matrix A, n
Definition: fasp.h:157

dCSRmat::val
REAL * val
nonzero entries of A
Definition: fasp.h:169

dCSRmat::row
INT row
row number of matrix A, m
Definition: fasp.h:154

dCSRmat::IA
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:163

dCSRmat::nnz
INT nnz
number of nonzero entries
Definition: fasp.h:160

dCSRmat::JA
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:166

iCSRmat
Sparse matrix of INT type in CSR format.
Definition: fasp.h:190

iCSRmat::col
INT col
column of matrix A, n
Definition: fasp.h:196

iCSRmat::row
INT row
row number of matrix A, m
Definition: fasp.h:193

iCSRmat::IA
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:202

iCSRmat::nnz
INT nnz
number of nonzero entries
Definition: fasp.h:199

iCSRmat::JA
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:205

iCSRmat::val
INT * val
nonzero entries of A
Definition: fasp.h:208