29#include "fasp_functs.h"
74 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
77 switch (interp_type) {
80 interp_DIR(A, vertices, P, param);
84 interp_STD(A, vertices, P, S, param);
88 interp_EXT(A, vertices, P, S, param);
96 interp_RDC(A, vertices, P, param);
104 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
130 const INT nnzold = P->
nnz;
136 REAL Min_neg, Max_pos;
137 REAL Fac_neg, Fac_pos;
138 REAL Sum_neg, TSum_neg;
139 REAL Sum_pos, TSum_pos;
141 INT index1 = 0, index2 = 0, begin, end;
145 printf(
"### DEBUG: [-Begin-] %s ...\n", __FUNCTION__);
148 for (i = 0; i < row; ++i) {
153 P->
IA[i] = num_nonzero;
154 Min_neg = Max_pos = 0;
155 Sum_neg = Sum_pos = 0;
156 TSum_neg = TSum_pos = 0;
159 for (j = begin; j < end; ++j) {
162 Sum_pos += P->
val[j];
163 Max_pos =
MAX(Max_pos, P->
val[j]);
167 Sum_neg += P->
val[j];
168 Min_neg =
MIN(Min_neg, P->
val[j]);
177 for (j = begin; j < end; ++j) {
179 if (P->
val[j] >= Max_pos) {
181 P->
JA[index1++] = P->
JA[j];
182 TSum_pos += P->
val[j];
185 else if (P->
val[j] <= Min_neg) {
187 P->
JA[index1++] = P->
JA[j];
188 TSum_neg += P->
val[j];
194 Fac_pos = Sum_pos / TSum_pos;
200 Fac_neg = Sum_neg / TSum_neg;
205 for (j = begin; j < end; ++j) {
207 if (P->
val[j] >= Max_pos)
208 P->
val[index2++] = P->
val[j] * Fac_pos;
210 else if (P->
val[j] <= Min_neg)
211 P->
val[index2++] = P->
val[j] * Fac_neg;
216 P->
nnz = P->
IA[row] = num_nonzero;
221 printf(
"NNZ in prolongator: before truncation = %10d, after = %10d\n", nnzold,
226 printf(
"### DEBUG: [--End--] %s ...\n", __FUNCTION__);
246 INT i, j, k, l, index, idiag;
247 const REAL alpha = 2.0 - 1.0 / param->
theta;
250 for (index = i = 0; i < row; ++i) {
251 if (vec[i] ==
CGPT) {
252 P->
val[index++] = 1.0;
254 for (j = A->
IA[i]; j < A->IA[i + 1]; ++j) {
260 REAL Dii = alpha * A->
val[idiag];
262 for (j = A->
IA[i]; j < A->IA[i + 1]; ++j) {
263 if (vec[A->
JA[j]] ==
CGPT) {
264 P->
val[index++] = -A->
val[j] / Dii;
273 for (index = i = 0; i < row; ++i) {
274 if (vec[i] ==
CGPT) cindex[i] = index++;
276 for (i = 0; i < P->
nnz; ++i) P->
JA[i] = cindex[P->
JA[i]];
310 INT begin_row, end_row;
311 INT i, j, k, l, index = 0, idiag;
314 REAL amN, amP, apN, apP;
315 REAL alpha, beta, aii = 0.0;
323 INT myid, mybegin, myend, stride_i, nthreads;
327 nthreads = fasp_get_num_threads();
334 stride_i = row / nthreads;
335#pragma omp parallel private(myid, mybegin, myend, i, begin_row, end_row, idiag, aii, \
336 amN, amP, apN, apP, num_pcouple, j, k, alpha, beta, l) \
337 num_threads(nthreads)
339 myid = omp_get_thread_num();
340 mybegin = myid * stride_i;
341 if (myid < nthreads - 1)
342 myend = mybegin + stride_i;
347 for (i = mybegin; i < myend; ++i) {
348 begin_row = A->
IA[i];
349 end_row = A->
IA[i + 1] - 1;
350 for (idiag = begin_row; idiag <= end_row; idiag++) {
351 if (A->
JA[idiag] == i) {
357 amN = 0, amP = 0, apN = 0, apP = 0, num_pcouple = 0;
358 for (j = begin_row; j <= end_row; ++j) {
359 if (j == idiag)
continue;
360 for (k = P->
IA[i]; k < P->IA[i + 1]; ++k) {
361 if (P->
JA[k] == A->
JA[j])
break;
365 if (k < P->IA[i + 1]) {
371 if (k < P->IA[i + 1]) {
381 if (num_pcouple > 0) {
387 for (j = P->
IA[i]; j < P->IA[i + 1]; ++j) {
389 for (l = A->
IA[i]; l < A->IA[i + 1]; l++) {
390 if (A->
JA[l] == k)
break;
393 P->
val[j] = -beta * A->
val[l] / aii;
395 P->
val[j] = -alpha * A->
val[l] / aii;
398 }
else if (vec[i] == 2)
402 P->
val[P->
IA[i]] = 1;
411 for (i = 0; i < row; ++i) {
413 begin_row = A->
IA[i];
414 end_row = A->
IA[i + 1];
417 for (idiag = begin_row; idiag < end_row; idiag++) {
418 if (A->
JA[idiag] == i) {
424 if (vec[i] ==
FGPT) {
426 amN = amP = apN = apP = 0.0;
430 for (j = begin_row; j < end_row; ++j) {
432 if (j == idiag)
continue;
436 for (k = P->
IA[i]; k < P->IA[i + 1]; ++k) {
437 if (P->
JA[k] == A->
JA[j]) {
462 if (num_pcouple > 0) {
470 for (j = P->
IA[i]; j < P->IA[i + 1]; ++j) {
472 for (l = A->
IA[i]; l < A->IA[i + 1]; l++) {
473 if (A->
JA[l] == k)
break;
476 P->
val[j] = -beta * A->
val[l] / aii;
478 P->
val[j] = -alpha * A->
val[l] / aii;
484 else if (vec[i] ==
CGPT) {
485 P->
val[P->
IA[i]] = 1.0;
491 for (index = i = 0; i < row; ++i) {
492 if (vec[i] ==
CGPT) cindex[i] = index++;
498 stride_i = P->
IA[P->
row] / nthreads;
499#pragma omp parallel private(myid, mybegin, myend, i, j) num_threads(nthreads)
501 myid = omp_get_thread_num();
502 mybegin = myid * stride_i;
503 if (myid < nthreads - 1)
504 myend = mybegin + stride_i;
506 myend = P->
IA[P->
row];
507 for (i = mybegin; i < myend; ++i) {
509 P->
JA[i] = cindex[j];
514 for (i = 0; i < P->
nnz; ++i) {
516 P->
JA[i] = cindex[j];
525 amg_interp_trunc(P, param);
553 INT i, j, k, l, m, index;
554 REAL alpha = 1.0, factor, alN, alP;
555 REAL akk, akl, aik, aki;
588 for (i = 0; i < row; i++) {
591 for (j = S->
IA[i]; j < S->IA[i + 1]; j++) {
593 if (vec[k] ==
CGPT) cindex[k] = i;
596 for (j = A->
IA[i]; j < A->IA[i + 1]; j++) {
599 if (cindex[k] == i) csum[i] += A->
val[j];
601 if (k == i) diag[i] = A->
val[j];
604 nsum[i] += A->
val[j];
605 if (vec[k] !=
ISPT) {
606 psum[i] += A->
val[j];
611 nsum[i] += A->
val[j];
617 for (i = 0; i < row; i++) {
619 if (vec[i] ==
FGPT) {
628 for (j = A->
IA[i]; j < A->IA[i + 1]; j++) rindi[A->
JA[j]] = j;
631 for (j = P->
IA[i]; j < P->IA[i + 1]; j++) Ahat[P->
JA[j]] = 0.0;
636 for (j = S->
IA[i]; j < S->IA[i + 1]; j++) {
639 aik = A->
val[rindi[k]];
644 else if (vec[k] ==
FGPT) {
649 for (m = A->
IA[k]; m < A->IA[k + 1]; m++) rindk[A->
JA[m]] = m;
657 for ( m = S->
IA[k]; m < S->IA[k+1]; m++ ) {
659 akl = A->
val[rindk[l]];
660 if ( vec[l] ==
CGPT ) Ahat[l] -= factor * akl;
662 aki = akl; Ahat[l] -= factor * aki;
666 for (m = A->
IA[k]; m < A->IA[k + 1]; m++) {
669 Ahat[i] -= factor * aki;
673 for (m = S->
IA[k]; m < S->IA[k + 1]; m++) {
675 akl = A->
val[rindk[l]];
676 if (vec[l] ==
CGPT) Ahat[l] -= factor * akl;
680 alN -= factor * (nsum[k] - aki + akk);
681 alP -= factor * csum[k];
688 if (P->
IA[i] < P->
IA[i + 1]) alpha = alN / alP;
691 for (j = P->
IA[i]; j < P->IA[i + 1]; j++) {
693 P->
val[j] = -alpha * Ahat[k] / Ahat[i];
698 else if (vec[i] ==
CGPT) {
699 P->
val[P->
IA[i]] = 1.0;
705 for (index = i = 0; i < row; ++i) {
706 if (vec[i] ==
CGPT) cindex[i] = index++;
711#pragma omp parallel for private(i, j) if (P->IA[P->row] > OPENMP_HOLDS)
713 for (i = 0; i < P->
IA[P->
row]; ++i) {
715 P->
JA[i] = cindex[j];
740 amg_interp_trunc(P, param);
766 INT i, j, k, l, m, index;
767 REAL alpha = 1.0, factor, alN, alP;
768 REAL akk, akl, aik, aki;
800 for (i = 0; i < row; i++) {
803 for (j = S->
IA[i]; j < S->IA[i + 1]; j++) {
805 if (vec[k] ==
CGPT) cindex[k] = i;
808 for (j = A->
IA[i]; j < A->IA[i + 1]; j++) {
811 if (cindex[k] == i) csum[i] += A->
val[j];
813 if (k == i) diag[i] = A->
val[j];
816 nsum[i] += A->
val[j];
817 if (vec[k] !=
ISPT) {
818 psum[i] += A->
val[j];
823 nsum[i] += A->
val[j];
829 for (i = 0; i < row; i++) {
831 if (vec[i] ==
FGPT) {
840 for (j = A->
IA[i]; j < A->IA[i + 1]; j++) rindi[A->
JA[j]] = j;
843 for (j = P->
IA[i]; j < P->IA[i + 1]; j++) Ahat[P->
JA[j]] = 0.0;
848 for (j = S->
IA[i]; j < S->IA[i + 1]; j++) {
851 aik = A->
val[rindi[k]];
856 else if (vec[k] ==
FGPT) {
861 for (m = A->
IA[k]; m < A->IA[k + 1]; m++) rindk[A->
JA[m]] = m;
869 for ( m = S->
IA[k]; m < S->IA[k+1]; m++ ) {
871 akl = A->
val[rindk[l]];
872 if ( vec[l] ==
CGPT ) Ahat[l] -= factor * akl;
874 aki = akl; Ahat[l] -= factor * aki;
878 for (m = A->
IA[k]; m < A->IA[k + 1]; m++) {
881 Ahat[i] -= factor * aki;
885 for (m = S->
IA[k]; m < S->IA[k + 1]; m++) {
887 akl = A->
val[rindk[l]];
888 if (vec[l] ==
CGPT) Ahat[l] -= factor * akl;
892 alN -= factor * (nsum[k] - aki + akk);
893 alP -= factor * csum[k];
900 if (P->
IA[i] < P->
IA[i + 1]) alpha = alN / alP;
903 for (j = P->
IA[i]; j < P->IA[i + 1]; j++) {
905 P->
val[j] = -alpha * Ahat[k] / Ahat[i];
910 else if (vec[i] ==
CGPT) {
911 P->
val[P->
IA[i]] = 1.0;
917 for (index = i = 0; i < row; ++i) {
918 if (vec[i] ==
CGPT) cindex[i] = index++;
923#pragma omp parallel for private(i, j) if (P->IA[P->row] > OPENMP_HOLDS)
925 for (i = 0; i < P->
IA[P->
row]; ++i) {
927 P->
JA[i] = cindex[j];
952 amg_interp_trunc(P, param);
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_amg_interp_em(dCSRmat *A, ivector *vertices, dCSRmat *P, AMG_param *param)
Energy-min interpolation.
void fasp_amg_interp(dCSRmat *A, ivector *vertices, dCSRmat *P, iCSRmat *S, AMG_param *param)
Generate interpolation operator P.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define MAX(a, b)
Definition of max, min, abs.
#define ERROR_AMG_INTERP_TYPE
#define INTERP_DIR
Definition of interpolation types.
#define TRUE
Definition of logic type.
Parameters for AMG methods.
SHORT interpolation_type
interpolation type
SHORT print_level
print level for AMG
SHORT coarsening_type
coarsening type
REAL theta
theta for reduction-based amg
REAL truncation_threshold
truncation threshold
Sparse matrix of REAL type in CSR format.
INT col
column of matrix A, n
REAL * val
nonzero entries of A
INT row
row number of matrix A, m
INT * IA
integer array of row pointers, the size is m+1
INT nnz
number of nonzero entries
INT * JA
integer array of column indexes, the size is nnz
Sparse matrix of INT type in CSR format.
INT * IA
integer array of row pointers, the size is m+1
INT * JA
integer array of column indexes, the size is nnz
Vector with n entries of INT type.
INT * val
actual vector entries