17#include "fasp_functs.h"
46 memset(ind, 0,
sizeof(
INT)*(m+1));
47 for ( i=0; i<nnz; ++i ) ind[A->
rowind[i]+1]++;
50 for ( i=1; i<=m; ++i ) {
51 ia[i] = ia[i-1]+ind[i];
56 for ( i=0; i<nnz; ++i ) {
57 iind = A->
rowind[i]; jind = ind[iind];
94#pragma omp parallel for if(m>OPENMP_HOLDS) private(i, j)
97 for (j=A->
IA[i];j<A->IA[i+1];++j) B->
rowind[j]=i;
123 const INT nc = A->
nc;
132 const INT glo_row = nc*ngrid;
145 INT row_start,col_start;
150 INT val_L_start,val_R_start;
160 for (ROW = 0; ROW < ngrid; ++ROW) {
162 for (BAND = 0; BAND < nband; ++BAND) {
163 width = offsets[BAND];
166 if (COL >= 0) ++block;
169 if (COL < ngrid) ++block;
176 for (i = 0; i < nc; i ++) {
178 ia[row+1] = ia[row] + ncb;
183 glo_nnz = ia[glo_row];
188 for (ROW = 0; ROW < ngrid; ++ROW) {
190 val_L_start = ROW*nc2;
193 for (i = 0; i < nc; i ++) {
197 for (j = 0; j < nc; j ++) {
199 ja[pos] = row_start + j;
200 a[pos] = diag[val_L_start+nci+j];
206 for (BAND = 0; BAND < nband; ++BAND) {
207 width = offsets[BAND];
214 val_R_start = COL*nc2;
215 for (i = 0; i < nc; i ++) {
219 for (j = 0 ; j < nc; j ++) {
220 pos = start + ncb + j;
221 ja[pos] = col_start + j;
222 a[pos] = offdiag[BAND][val_R_start+nci+j];
230 for (i = 0; i < nc; i ++) {
234 for (j = 0; j < nc; j ++) {
235 pos = start + ncb + j;
236 ja[pos] = col_start + j;
237 a[pos] = offdiag[BAND][val_L_start+nci+j];
249 for (ROW = 0; ROW < ngrid; ++ROW) {
251 for (j = 1; j < nc; j ++) {
299 INT i,j,ij,ir,i1,length,
ilength,start,irmrow,irmrow1;
308 for (i=0;i<mb;++i) { m+=blockptr[i*nb]->
row; row[i+1]=m; }
309 for (i=0;i<nb;++i) { n+=blockptr[i]->
col; col[i+1]=n; }
312#pragma omp parallel for reduction(+:nnz) if (nbl>OPENMP_HOLDS) private(i)
314 for (i=0;i<nbl;++i) { nnz+=blockptr[i]->
nnz; }
323 for (ir=row[i];ir<row[i+1];ir++) {
325 for (length=j=0;j<nb;++j) {
326 ij=i*nb+j; blockptrij=blockptr[ij];
327 if (blockptrij->nnz>0) {
328 start=A.
IA[ir]+length;
329 irmrow=ir-row[i]; irmrow1=irmrow+1;
330 ilength=blockptrij->IA[irmrow1]-blockptrij->IA[irmrow];
332 memcpy(&(A.val[start]),&(blockptrij->val[blockptrij->IA[irmrow]]),
ilength*
sizeof(
REAL));
333 memcpy(&(A.JA[start]), &(blockptrij->JA[blockptrij->IA[irmrow]]),
ilength*
sizeof(
INT));
334 for (i1=0;i1<
ilength;i1++) A.JA[start+i1]+=col[j];
340 A.IA[ir+1]=A.IA[ir]+length;
365 REAL *DATA = A -> val;
368 INT num_rows = A -> row;
369 INT num_cols = A -> col;
370 INT num_nonzeros = A -> nnz;
374 INT *nzdifnum = NULL;
375 INT *rowstart = NULL;
383 INT *invnzdif = NULL;
385 INT i,j,k,cnt,maxnzrow;
392 for (i = 0; i < num_rows; i ++) {
393 nzrow[i] = IA[i+1] - IA[i];
394 if (nzrow[i] > maxnzrow) {
401 for (i = 0; i < num_rows; i ++) {
402 counter[nzrow[i]] ++;
409 for (dif = 0, i = 0; i < maxnzrow + 1; i ++) {
410 if (counter[i] > 0) dif ++;
421 for (cnt = 0, i = 0; i < maxnzrow + 1; i ++) {
422 if (counter[i] > 0) {
425 rowstart[cnt+1] = rowstart[cnt] + counter[i];
434 for (i = 0; i < num_rows; i ++) {
435 j = invnzdif[nzrow[i]];
436 rowindex[rowstart[j]] = i;
440 for (i = dif; i > 0; i --) {
441 rowstart[i] = rowstart[i-1];
449 for (cnt = 0, i = 0; i < num_rows; i ++) {
451 for (j = IA[k]; j < IA[k+1]; j ++) {
464 B -> nz_diff = nzdifnum;
465 B -> index = rowindex;
466 B -> start = rowstart;
523 INT rowstart0,rowstart,colstart0,colstart;
524 INT colblock,nzperrow;
533 INT stride_i,mybegin,myend,myid,nthreads;
536 nthreads = fasp_get_num_threads();
555 stride_i = ROW/nthreads;
556#pragma omp parallel private(myid, mybegin, myend, i, rowstart, colblock, nzperrow, j)
558 myid = omp_get_thread_num();
559 mybegin = myid*stride_i;
560 if(myid < nthreads-1) myend = mybegin+stride_i;
562 for (i=mybegin; i<myend; ++i)
565 colblock = IA[i+1] - IA[i];
566 nzperrow = colblock*nb;
567 for (j = 0; j < nb; ++j)
569 ia[rowstart+j] = nzperrow;
576 for (i = 0; i < ROW; ++i)
579 colblock = IA[i+1] - IA[i];
580 nzperrow = colblock*nb;
581 for (j = 0; j < nb; ++j)
583 ia[rowstart+j] = nzperrow;
593 for (i = 1; i <= rowA; ++i)
602 switch (storage_manner)
608#pragma omp parallel private(myid, mybegin, myend, i, k, j, rowstart, colstart, vp, mr, ap, jap, mc)
610 myid = omp_get_thread_num();
611 mybegin = myid*stride_i;
612 if(myid < nthreads-1) myend = mybegin+stride_i;
614 for (i=mybegin; i<myend; ++i)
616 for (k = IA[i]; k < IA[i+1]; ++k)
622 for (mr = 0; mr < nb; mr ++)
624 ap = &a[ia[rowstart]];
625 jap = &ja[ia[rowstart]];
626 for (mc = 0; mc < nb; mc ++)
629 *jap = colstart + mc;
630 vp ++; ap ++; jap ++;
641 for (i = 0; i < ROW; ++i)
643 for (k = IA[i]; k < IA[i+1]; ++k)
649 for (mr = 0; mr < nb; mr ++)
651 ap = &a[ia[rowstart]];
652 jap = &ja[ia[rowstart]];
653 for (mc = 0; mc < nb; mc ++)
656 *jap = colstart + mc;
657 vp ++; ap ++; jap ++;
670 for (i = 0; i < ROW; ++i)
672 for (k = IA[i]; k < IA[i+1]; ++k)
678 for (mc = 0; mc < nb; mc ++)
680 rowstart = rowstart0;
681 colstart = colstart0 + mc;
682 for (mr = 0; mr < nb; mr ++)
684 a[ia[rowstart]] = *vp;
685 ja[ia[rowstart]] = colstart;
686 vp ++; ia[rowstart]++; rowstart++;
699 for (i = rowA; i > 0; i --) {
726 INT i, j, k, ii, jj, kk, l, mod, nnz;
735 INT *col_flag, *ia, *ja;
738 if ((A->
row)%nb!=0) {
739 printf(
"### ERROR: A.row=%d is not a multiplication of nb=%d!\n",
744 if ((A->
col)%nb!=0) {
745 printf(
"### ERROR: A.col=%d is not a multiplication of nb=%d!\n",
763 for (i=0; i<row; ++i) {
765 for (j=0; j<nb; ++j) {
767 for (k=IA[jj]; k<IA[jj+1]; ++k) {
769 if (col_flag[kk]!=0) {
789 for (i=0; i<row; ++i) {
791 for(j=0; j<nb; ++j) {
793 for(k=IA[jj]; k<IA[jj+1]; ++k) {
795 if (col_flag[kk]!=0) {
807 for (i=0; i<row; ++i) {
809 for (j=0; j<nb; ++j) {
811 for (k=IA[jj]; k<IA[jj+1]; ++k) {
812 for (l=ia[i]; l<ia[i+1]; ++l) {
813 if (JA[k]/nb ==ja[l]) {
815 bval[l*nb2+j*nb+mod] = val[k];
864 INT ngridplus1 = ngrid + 1;
868 for (i = 0; i < nband; ++i) {
869 NNZ += (ngrid - abs(offsets[i]));
879 for (i = 1; i < ngridplus1; ++i) IA[i] = 1;
880 for (i = 0; i < nband; ++i) {
883 for (j = -k+1; j < ngridplus1; ++j) {
889 for (j = 1; j < m; ++j)
896 for (i = 1; i < ngridplus1; ++i) {
901 for (i = 0 ; i < ngrid; ++i) {
902 memcpy(val + IA[i]*nc2, diag + i*nc2, nc2*
sizeof(
REAL));
907 for (i = 0; i < nband; ++i) {
910 for (j = -k; j < ngrid; ++j) {
912 memcpy(val+IA[j]*nc2, offdiag[i]+m*nc2, nc2*
sizeof(
REAL));
919 for (j = 0; j < m; ++j) {
920 memcpy(val + IA[j]*nc2, offdiag[i] + j*nc2, nc2*
sizeof(
REAL));
928 for (i = ngrid; i > 0; i --) {
961 INT num_nonzeros = NNZ*nb2;
967 INT row_start, col_start;
975 A->
nnz = num_nonzeros;
984 for (i = 0; i < ROW; ++i) {
986 for (k = IA[i]; k < IA[i+1]; ++k) {
991 for (mr = 0; mr < nb; mr ++) {
992 for (mc = 0; mc < nb; mc ++) {
993 rowA[cnt] = row_start;
994 colA[cnt] = col_start + mc;
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
dBSRmat fasp_dbsr_create(const INT ROW, const INT COL, const INT NNZ, const INT nb, const INT storage_manner)
Create BSR sparse matrix data memory space.
dCSRLmat * fasp_dcsrl_create(const INT num_rows, const INT num_cols, const INT num_nonzeros)
Create a dCSRLmat object.
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
void fasp_dcsr_alloc(const INT m, const INT n, const INT nnz, dCSRmat *A)
Allocate CSR sparse matrix memory space.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
Header file for FASP block matrices.
#define FASP_SUCCESS
Definition of return status and error messages.
#define TRUE
Definition of logic type.
Block REAL CSR matrix format.
INT brow
row number of blocks in A, m
dCSRmat ** blocks
blocks of dCSRmat, point to blocks[brow][bcol]
INT bcol
column number of blocks A, n
Block sparse row storage matrix of REAL type.
INT COL
number of cols of sub-blocks in matrix A, N
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
INT nb
dimension of each sub-block
INT * IA
integer array of row pointers, the size is ROW+1
INT ROW
number of rows of sub-blocks in matrix A, M
INT storage_manner
storage manner for each sub-block
Sparse matrix of REAL type in COO (IJ) format.
INT * colind
integer array of column indices, the size is nnz
INT col
column of matrix A, n
INT * rowind
integer array of row indices, the size is nnz
REAL * val
nonzero entries of A
INT row
row number of matrix A, m
INT nnz
number of nonzero entries
Sparse matrix of REAL type in CSRL format.
Sparse matrix of REAL type in CSR format.
INT col
column of matrix A, n
REAL * val
nonzero entries of A
INT row
row number of matrix A, m
INT * IA
integer array of row pointers, the size is m+1
INT nnz
number of nonzero entries
INT * JA
integer array of column indexes, the size is nnz
Structure matrix of REAL type.
INT * offsets
offsets of the off-diagonals (length is nband)
INT nband
number of off-diag bands
REAL * diag
diagonal entries (length is ngrid*(nc^2))
INT nc
size of each block (number of components)
REAL ** offdiag
off-diagonal entries (dimension is nband * [(ngrid-|offsets|) * nc^2])