21#include "fasp_functs.h"
52 INT myid, mybegin, myend, nthreads;
55 nthreads = fasp_get_num_threads();
61#pragma omp parallel private(myid, mybegin, myend, i)
63 myid = omp_get_thread_num();
65 for (i = mybegin; i < myend; ++i) x[i] *= a;
69 for (i = 0; i < n; ++i) x[i] *= a;
96 INT myid, mybegin, myend, nthreads;
99 nthreads = fasp_get_num_threads();
106#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
108 myid = omp_get_thread_num();
110 for (i = mybegin; i < myend; ++i) y[i] += x[i];
114 for (i = 0; i < n; ++i) y[i] += x[i];
118 else if (a == -1.0) {
121#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
123 myid = omp_get_thread_num();
125 for (i = mybegin; i < myend; ++i) y[i] -= x[i];
129 for (i = 0; i < n; ++i) y[i] -= x[i];
136#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
138 myid = omp_get_thread_num();
140 for (i = mybegin; i < myend; ++i) y[i] += a * x[i];
144 for (i = 0; i < n; ++i) y[i] += a * x[i];
169 INT myid, mybegin, myend, nthreads;
172 nthreads = fasp_get_num_threads();
179#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
181 myid = omp_get_thread_num();
183 for (i = mybegin; i < myend; ++i) y[i] += x[i];
187 for (i = 0; i < n; ++i) y[i] += x[i];
191 else if (a == -1.0) {
194#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
196 myid = omp_get_thread_num();
198 for (i = mybegin; i < myend; ++i) y[i] -= x[i];
202 for (i = 0; i < n; ++i) y[i] -= x[i];
209#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
211 myid = omp_get_thread_num();
213 for (i = mybegin; i < myend; ++i) y[i] += a * x[i];
217 for (i = 0; i < n; ++i) y[i] += a * x[i];
410 INT myid, mybegin, myend, nthreads;
413 nthreads = fasp_get_num_threads();
419#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
421 myid = omp_get_thread_num();
423 for (i = mybegin; i < myend; ++i) z[i] = a * x[i] + y[i];
427 for (i = 0; i < n; ++i) z[i] = a * x[i] + y[i];
447 z[0] = a * x[0] + y[0];
448 z[1] = a * x[1] + y[1];
450 z[2] = a * x[2] + y[2];
451 z[3] = a * x[3] + y[3];
470 z[0] = a * x[0] + y[0];
471 z[1] = a * x[1] + y[1];
472 z[2] = a * x[2] + y[2];
474 z[3] = a * x[3] + y[3];
475 z[4] = a * x[4] + y[4];
476 z[5] = a * x[5] + y[5];
478 z[6] = a * x[6] + y[6];
479 z[7] = a * x[7] + y[7];
480 z[8] = a * x[8] + y[8];
499 z[0] = a * x[0] + y[0];
500 z[1] = a * x[1] + y[1];
501 z[2] = a * x[2] + y[2];
502 z[3] = a * x[3] + y[3];
503 z[4] = a * x[4] + y[4];
505 z[5] = a * x[5] + y[5];
506 z[6] = a * x[6] + y[6];
507 z[7] = a * x[7] + y[7];
508 z[8] = a * x[8] + y[8];
509 z[9] = a * x[9] + y[9];
511 z[10] = a * x[10] + y[10];
512 z[11] = a * x[11] + y[11];
513 z[12] = a * x[12] + y[12];
514 z[13] = a * x[13] + y[13];
515 z[14] = a * x[14] + y[14];
517 z[15] = a * x[15] + y[15];
518 z[16] = a * x[16] + y[16];
519 z[17] = a * x[17] + y[17];
520 z[18] = a * x[18] + y[18];
521 z[19] = a * x[19] + y[19];
523 z[20] = a * x[20] + y[20];
524 z[21] = a * x[21] + y[21];
525 z[22] = a * x[22] + y[22];
526 z[23] = a * x[23] + y[23];
527 z[24] = a * x[24] + y[24];
546 z[0] = a * x[0] + y[0];
547 z[1] = a * x[1] + y[1];
548 z[2] = a * x[2] + y[2];
549 z[3] = a * x[3] + y[3];
550 z[4] = a * x[4] + y[4];
551 z[5] = a * x[5] + y[5];
552 z[6] = a * x[6] + y[6];
554 z[7] = a * x[7] + y[7];
555 z[8] = a * x[8] + y[8];
556 z[9] = a * x[9] + y[9];
557 z[10] = a * x[10] + y[10];
558 z[11] = a * x[11] + y[11];
559 z[12] = a * x[12] + y[12];
560 z[13] = a * x[13] + y[13];
562 z[14] = a * x[14] + y[14];
563 z[15] = a * x[15] + y[15];
564 z[16] = a * x[16] + y[16];
565 z[17] = a * x[17] + y[17];
566 z[18] = a * x[18] + y[18];
567 z[19] = a * x[19] + y[19];
568 z[20] = a * x[20] + y[20];
570 z[21] = a * x[21] + y[21];
571 z[22] = a * x[22] + y[22];
572 z[23] = a * x[23] + y[23];
573 z[24] = a * x[24] + y[24];
574 z[25] = a * x[25] + y[25];
575 z[26] = a * x[26] + y[26];
576 z[27] = a * x[27] + y[27];
578 z[28] = a * x[28] + y[28];
579 z[29] = a * x[29] + y[29];
580 z[30] = a * x[30] + y[30];
581 z[31] = a * x[31] + y[31];
582 z[32] = a * x[32] + y[32];
583 z[33] = a * x[33] + y[33];
584 z[34] = a * x[34] + y[34];
586 z[35] = a * x[35] + y[35];
587 z[36] = a * x[36] + y[36];
588 z[37] = a * x[37] + y[37];
589 z[38] = a * x[38] + y[38];
590 z[39] = a * x[39] + y[39];
591 z[40] = a * x[40] + y[40];
592 z[41] = a * x[41] + y[41];
594 z[42] = a * x[42] + y[42];
595 z[43] = a * x[43] + y[43];
596 z[44] = a * x[44] + y[44];
597 z[45] = a * x[45] + y[45];
598 z[46] = a * x[46] + y[46];
599 z[47] = a * x[47] + y[47];
600 z[48] = a * x[48] + y[48];
627 INT myid, mybegin, myend, nthreads;
630 nthreads = fasp_get_num_threads();
636#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
638 myid = omp_get_thread_num();
640 for (i = mybegin; i < myend; ++i) y[i] = a * x[i] + b * y[i];
644 for (i = 0; i < n; ++i) y[i] = a * x[i] + b * y[i];
665 register REAL onenorm = 0.0;
669#pragma omp parallel for reduction(+ : onenorm) private(i)
671 for (i = 0; i < n; ++i) onenorm +=
ABS(x[i]);
693 register REAL twonorm = 0.0;
697#pragma omp parallel for reduction(+ : twonorm) private(i)
699 for (i = 0; i < n; ++i) twonorm += x[i] * x[i];
701 return sqrt(twonorm);
722 register REAL infnorm = 0.0;
726 INT myid, mybegin, myend, nthreads;
729 nthreads = fasp_get_num_threads();
735 REAL infnorm_loc = 0.0;
736#pragma omp parallel firstprivate(infnorm_loc) private(myid, mybegin, myend, i)
738 myid = omp_get_thread_num();
740 for (i = mybegin; i < myend; ++i) infnorm_loc =
MAX(infnorm_loc,
ABS(x[i]));
742 if (infnorm_loc > infnorm) {
744 infnorm =
MAX(infnorm_loc, infnorm);
749 for (i = 0; i < n; ++i) infnorm =
MAX(infnorm,
ABS(x[i]));
774 register REAL value = 0.0;
783#pragma omp parallel for reduction(+ : value) private(i)
785 for (i = 0; i < n; ++i) value += x[i] * y[i];
787 for (i = 0; i < n; ++i) value += x[i] * y[i];
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
REAL fasp_blas_darray_dotprod(const INT n, const REAL *x, const REAL *y)
Inner product of two arraies x and y.
void fasp_blas_darray_axpy_nc3(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 3
REAL fasp_blas_darray_norminf(const INT n, const REAL *x)
Linf norm of array x.
void fasp_blas_darray_axpyz(const INT n, const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y
REAL fasp_blas_darray_norm1(const INT n, const REAL *x)
L1 norm of array x.
void fasp_blas_ldarray_axpy(const INT n, const REAL a, const REAL *x, LONGREAL *y)
y = a*x + y
void fasp_blas_darray_axpy_nc7(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 7
void fasp_blas_darray_axpy_nc5(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 5
void fasp_blas_darray_axpby(const INT n, const REAL a, const REAL *x, const REAL b, REAL *y)
y = a*x + b*y
void fasp_blas_darray_axpyz_nc7(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 7
void fasp_blas_darray_axpy_nc2(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 2
void fasp_blas_darray_axpyz_nc2(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 2
REAL fasp_blas_darray_norm2(const INT n, const REAL *x)
L2 norm of array x.
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
void fasp_blas_darray_axpyz_nc3(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 3
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
void fasp_blas_darray_axpyz_nc5(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 5
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define MAX(a, b)
Definition of max, min, abs.
#define TRUE
Definition of logic type.