Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
Loading...
Searching...
No Matches
BlaArray.c
Go to the documentation of this file.
1
14#include <math.h>
15
16#ifdef _OPENMP
17#include <omp.h>
18#endif
19
20#include "fasp.h"
21#include "fasp_functs.h"
22
23/*---------------------------------*/
24/*-- Public Functions --*/
25/*---------------------------------*/
26
43void fasp_blas_darray_ax(const INT n, const REAL a, REAL* x)
44{
45 if (a == 1.0) return; // do nothing
46
47 {
48 SHORT use_openmp = FALSE;
49 INT i;
50
51#ifdef _OPENMP
52 INT myid, mybegin, myend, nthreads;
53 if (n > OPENMP_HOLDS) {
54 use_openmp = TRUE;
55 nthreads = fasp_get_num_threads();
56 }
57#endif
58
59 if (use_openmp) {
60#ifdef _OPENMP
61#pragma omp parallel private(myid, mybegin, myend, i)
62 {
63 myid = omp_get_thread_num();
64 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
65 for (i = mybegin; i < myend; ++i) x[i] *= a;
66 }
67#endif
68 } else {
69 for (i = 0; i < n; ++i) x[i] *= a;
70 }
71 }
72}
73
90void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL* x, REAL* y)
91{
92 SHORT use_openmp = FALSE;
93 INT i;
94
95#ifdef _OPENMP
96 INT myid, mybegin, myend, nthreads;
97 if (n > OPENMP_HOLDS) {
98 use_openmp = TRUE;
99 nthreads = fasp_get_num_threads();
100 }
101#endif
102
103 if (a == 1.0) {
104 if (use_openmp) {
105#ifdef _OPENMP
106#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
107 {
108 myid = omp_get_thread_num();
109 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
110 for (i = mybegin; i < myend; ++i) y[i] += x[i];
111 }
112#endif
113 } else {
114 for (i = 0; i < n; ++i) y[i] += x[i];
115 }
116 }
117
118 else if (a == -1.0) {
119 if (use_openmp) {
120#ifdef _OPENMP
121#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
122 {
123 myid = omp_get_thread_num();
124 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
125 for (i = mybegin; i < myend; ++i) y[i] -= x[i];
126 }
127#endif
128 } else {
129 for (i = 0; i < n; ++i) y[i] -= x[i];
130 }
131 }
132
133 else {
134 if (use_openmp) {
135#ifdef _OPENMP
136#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
137 {
138 myid = omp_get_thread_num();
139 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
140 for (i = mybegin; i < myend; ++i) y[i] += a * x[i];
141 }
142#endif
143 } else {
144 for (i = 0; i < n; ++i) y[i] += a * x[i];
145 }
146 }
147}
148
163void fasp_blas_ldarray_axpy(const INT n, const REAL a, const REAL* x, LONGREAL* y)
164{
165 SHORT use_openmp = FALSE;
166 INT i;
167
168#ifdef _OPENMP
169 INT myid, mybegin, myend, nthreads;
170 if (n > OPENMP_HOLDS) {
171 use_openmp = TRUE;
172 nthreads = fasp_get_num_threads();
173 }
174#endif
175
176 if (a == 1.0) {
177 if (use_openmp) {
178#ifdef _OPENMP
179#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
180 {
181 myid = omp_get_thread_num();
182 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
183 for (i = mybegin; i < myend; ++i) y[i] += x[i];
184 }
185#endif
186 } else {
187 for (i = 0; i < n; ++i) y[i] += x[i];
188 }
189 }
190
191 else if (a == -1.0) {
192 if (use_openmp) {
193#ifdef _OPENMP
194#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
195 {
196 myid = omp_get_thread_num();
197 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
198 for (i = mybegin; i < myend; ++i) y[i] -= x[i];
199 }
200#endif
201 } else {
202 for (i = 0; i < n; ++i) y[i] -= x[i];
203 }
204 }
205
206 else {
207 if (use_openmp) {
208#ifdef _OPENMP
209#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
210 {
211 myid = omp_get_thread_num();
212 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
213 for (i = mybegin; i < myend; ++i) y[i] += a * x[i];
214 }
215#endif
216 } else {
217 for (i = 0; i < n; ++i) y[i] += a * x[i];
218 }
219 }
220}
221
234void fasp_blas_darray_axpy_nc2(const REAL a, const REAL* x, REAL* y)
235{
236 y[0] += a * x[0];
237 y[1] += a * x[1];
238
239 y[2] += a * x[2];
240 y[3] += a * x[3];
241}
242
255void fasp_blas_darray_axpy_nc3(const REAL a, const REAL* x, REAL* y)
256{
257 y[0] += a * x[0];
258 y[1] += a * x[1];
259 y[2] += a * x[2];
260
261 y[3] += a * x[3];
262 y[4] += a * x[4];
263 y[5] += a * x[5];
264
265 y[6] += a * x[6];
266 y[7] += a * x[7];
267 y[8] += a * x[8];
268}
269
282void fasp_blas_darray_axpy_nc5(const REAL a, const REAL* x, REAL* y)
283{
284 y[0] += a * x[0];
285 y[1] += a * x[1];
286 y[2] += a * x[2];
287 y[3] += a * x[3];
288 y[4] += a * x[4];
289
290 y[5] += a * x[5];
291 y[6] += a * x[6];
292 y[7] += a * x[7];
293 y[8] += a * x[8];
294 y[9] += a * x[9];
295
296 y[10] += a * x[10];
297 y[11] += a * x[11];
298 y[12] += a * x[12];
299 y[13] += a * x[13];
300 y[14] += a * x[14];
301
302 y[15] += a * x[15];
303 y[16] += a * x[16];
304 y[17] += a * x[17];
305 y[18] += a * x[18];
306 y[19] += a * x[19];
307
308 y[20] += a * x[20];
309 y[21] += a * x[21];
310 y[22] += a * x[22];
311 y[23] += a * x[23];
312 y[24] += a * x[24];
313}
314
327void fasp_blas_darray_axpy_nc7(const REAL a, const REAL* x, REAL* y)
328{
329 y[0] += a * x[0];
330 y[1] += a * x[1];
331 y[2] += a * x[2];
332 y[3] += a * x[3];
333 y[4] += a * x[4];
334 y[5] += a * x[5];
335 y[6] += a * x[6];
336
337 y[7] += a * x[7];
338 y[8] += a * x[8];
339 y[9] += a * x[9];
340 y[10] += a * x[10];
341 y[11] += a * x[11];
342 y[12] += a * x[12];
343 y[13] += a * x[13];
344
345 y[14] += a * x[14];
346 y[15] += a * x[15];
347 y[16] += a * x[16];
348 y[17] += a * x[17];
349 y[18] += a * x[18];
350 y[19] += a * x[19];
351 y[20] += a * x[20];
352
353 y[21] += a * x[21];
354 y[22] += a * x[22];
355 y[23] += a * x[23];
356 y[24] += a * x[24];
357 y[25] += a * x[25];
358 y[26] += a * x[26];
359 y[27] += a * x[27];
360
361 y[28] += a * x[28];
362 y[29] += a * x[29];
363 y[30] += a * x[30];
364 y[31] += a * x[31];
365 y[32] += a * x[32];
366 y[33] += a * x[33];
367 y[34] += a * x[34];
368
369 y[35] += a * x[35];
370 y[36] += a * x[36];
371 y[37] += a * x[37];
372 y[38] += a * x[38];
373 y[39] += a * x[39];
374 y[40] += a * x[40];
375 y[41] += a * x[41];
376
377 y[42] += a * x[42];
378 y[43] += a * x[43];
379 y[44] += a * x[44];
380 y[45] += a * x[45];
381 y[46] += a * x[46];
382 y[47] += a * x[47];
383 y[48] += a * x[48];
384}
385
403void fasp_blas_darray_axpyz(const INT n, const REAL a, const REAL* x, const REAL* y,
404 REAL* z)
405{
406 SHORT use_openmp = FALSE;
407 INT i;
408
409#ifdef _OPENMP
410 INT myid, mybegin, myend, nthreads;
411 if (n > OPENMP_HOLDS) {
412 use_openmp = TRUE;
413 nthreads = fasp_get_num_threads();
414 }
415#endif
416
417 if (use_openmp) {
418#ifdef _OPENMP
419#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
420 {
421 myid = omp_get_thread_num();
422 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
423 for (i = mybegin; i < myend; ++i) z[i] = a * x[i] + y[i];
424 }
425#endif
426 } else {
427 for (i = 0; i < n; ++i) z[i] = a * x[i] + y[i];
428 }
429}
430
445void fasp_blas_darray_axpyz_nc2(const REAL a, const REAL* x, const REAL* y, REAL* z)
446{
447 z[0] = a * x[0] + y[0];
448 z[1] = a * x[1] + y[1];
449
450 z[2] = a * x[2] + y[2];
451 z[3] = a * x[3] + y[3];
452}
453
468void fasp_blas_darray_axpyz_nc3(const REAL a, const REAL* x, const REAL* y, REAL* z)
469{
470 z[0] = a * x[0] + y[0];
471 z[1] = a * x[1] + y[1];
472 z[2] = a * x[2] + y[2];
473
474 z[3] = a * x[3] + y[3];
475 z[4] = a * x[4] + y[4];
476 z[5] = a * x[5] + y[5];
477
478 z[6] = a * x[6] + y[6];
479 z[7] = a * x[7] + y[7];
480 z[8] = a * x[8] + y[8];
481}
482
497void fasp_blas_darray_axpyz_nc5(const REAL a, const REAL* x, const REAL* y, REAL* z)
498{
499 z[0] = a * x[0] + y[0];
500 z[1] = a * x[1] + y[1];
501 z[2] = a * x[2] + y[2];
502 z[3] = a * x[3] + y[3];
503 z[4] = a * x[4] + y[4];
504
505 z[5] = a * x[5] + y[5];
506 z[6] = a * x[6] + y[6];
507 z[7] = a * x[7] + y[7];
508 z[8] = a * x[8] + y[8];
509 z[9] = a * x[9] + y[9];
510
511 z[10] = a * x[10] + y[10];
512 z[11] = a * x[11] + y[11];
513 z[12] = a * x[12] + y[12];
514 z[13] = a * x[13] + y[13];
515 z[14] = a * x[14] + y[14];
516
517 z[15] = a * x[15] + y[15];
518 z[16] = a * x[16] + y[16];
519 z[17] = a * x[17] + y[17];
520 z[18] = a * x[18] + y[18];
521 z[19] = a * x[19] + y[19];
522
523 z[20] = a * x[20] + y[20];
524 z[21] = a * x[21] + y[21];
525 z[22] = a * x[22] + y[22];
526 z[23] = a * x[23] + y[23];
527 z[24] = a * x[24] + y[24];
528}
529
544void fasp_blas_darray_axpyz_nc7(const REAL a, const REAL* x, const REAL* y, REAL* z)
545{
546 z[0] = a * x[0] + y[0];
547 z[1] = a * x[1] + y[1];
548 z[2] = a * x[2] + y[2];
549 z[3] = a * x[3] + y[3];
550 z[4] = a * x[4] + y[4];
551 z[5] = a * x[5] + y[5];
552 z[6] = a * x[6] + y[6];
553
554 z[7] = a * x[7] + y[7];
555 z[8] = a * x[8] + y[8];
556 z[9] = a * x[9] + y[9];
557 z[10] = a * x[10] + y[10];
558 z[11] = a * x[11] + y[11];
559 z[12] = a * x[12] + y[12];
560 z[13] = a * x[13] + y[13];
561
562 z[14] = a * x[14] + y[14];
563 z[15] = a * x[15] + y[15];
564 z[16] = a * x[16] + y[16];
565 z[17] = a * x[17] + y[17];
566 z[18] = a * x[18] + y[18];
567 z[19] = a * x[19] + y[19];
568 z[20] = a * x[20] + y[20];
569
570 z[21] = a * x[21] + y[21];
571 z[22] = a * x[22] + y[22];
572 z[23] = a * x[23] + y[23];
573 z[24] = a * x[24] + y[24];
574 z[25] = a * x[25] + y[25];
575 z[26] = a * x[26] + y[26];
576 z[27] = a * x[27] + y[27];
577
578 z[28] = a * x[28] + y[28];
579 z[29] = a * x[29] + y[29];
580 z[30] = a * x[30] + y[30];
581 z[31] = a * x[31] + y[31];
582 z[32] = a * x[32] + y[32];
583 z[33] = a * x[33] + y[33];
584 z[34] = a * x[34] + y[34];
585
586 z[35] = a * x[35] + y[35];
587 z[36] = a * x[36] + y[36];
588 z[37] = a * x[37] + y[37];
589 z[38] = a * x[38] + y[38];
590 z[39] = a * x[39] + y[39];
591 z[40] = a * x[40] + y[40];
592 z[41] = a * x[41] + y[41];
593
594 z[42] = a * x[42] + y[42];
595 z[43] = a * x[43] + y[43];
596 z[44] = a * x[44] + y[44];
597 z[45] = a * x[45] + y[45];
598 z[46] = a * x[46] + y[46];
599 z[47] = a * x[47] + y[47];
600 z[48] = a * x[48] + y[48];
601}
602
620void fasp_blas_darray_axpby(const INT n, const REAL a, const REAL* x, const REAL b,
621 REAL* y)
622{
623 SHORT use_openmp = FALSE;
624 INT i;
625
626#ifdef _OPENMP
627 INT myid, mybegin, myend, nthreads;
628 if (n > OPENMP_HOLDS) {
629 use_openmp = TRUE;
630 nthreads = fasp_get_num_threads();
631 }
632#endif
633
634 if (use_openmp) {
635#ifdef _OPENMP
636#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
637 {
638 myid = omp_get_thread_num();
639 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
640 for (i = mybegin; i < myend; ++i) y[i] = a * x[i] + b * y[i];
641 }
642#endif
643 } else {
644 for (i = 0; i < n; ++i) y[i] = a * x[i] + b * y[i];
645 }
646}
647
664{
665 register REAL onenorm = 0.0;
666 INT i;
667
668#ifdef _OPENMP
669#pragma omp parallel for reduction(+ : onenorm) private(i)
670#endif
671 for (i = 0; i < n; ++i) onenorm += ABS(x[i]);
672
673 return onenorm;
674}
675
692{
693 register REAL twonorm = 0.0;
694 INT i;
695
696#ifdef _OPENMP
697#pragma omp parallel for reduction(+ : twonorm) private(i)
698#endif
699 for (i = 0; i < n; ++i) twonorm += x[i] * x[i];
700
701 return sqrt(twonorm);
702}
703
720{
721 SHORT use_openmp = FALSE;
722 register REAL infnorm = 0.0;
723 INT i;
724
725#ifdef _OPENMP
726 INT myid, mybegin, myend, nthreads;
727 if (n > OPENMP_HOLDS) {
728 use_openmp = TRUE;
729 nthreads = fasp_get_num_threads();
730 }
731#endif
732
733 if (use_openmp) {
734#ifdef _OPENMP
735 REAL infnorm_loc = 0.0;
736#pragma omp parallel firstprivate(infnorm_loc) private(myid, mybegin, myend, i)
737 {
738 myid = omp_get_thread_num();
739 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
740 for (i = mybegin; i < myend; ++i) infnorm_loc = MAX(infnorm_loc, ABS(x[i]));
741
742 if (infnorm_loc > infnorm) {
743#pragma omp critical
744 infnorm = MAX(infnorm_loc, infnorm);
745 }
746 }
747#endif
748 } else {
749 for (i = 0; i < n; ++i) infnorm = MAX(infnorm, ABS(x[i]));
750 }
751
752 return infnorm;
753}
754
771REAL fasp_blas_darray_dotprod(const INT n, const REAL* x, const REAL* y)
772{
773 SHORT use_openmp = FALSE;
774 register REAL value = 0.0;
775 INT i;
776
777#ifdef _OPENMP
778 if (n > OPENMP_HOLDS) use_openmp = TRUE;
779#endif
780
781 if (use_openmp) {
782#ifdef _OPENMP
783#pragma omp parallel for reduction(+ : value) private(i)
784#endif
785 for (i = 0; i < n; ++i) value += x[i] * y[i];
786 } else {
787 for (i = 0; i < n; ++i) value += x[i] * y[i];
788 }
789
790 return value;
791}
792
793/*---------------------------------*/
794/*-- End of File --*/
795/*---------------------------------*/
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:93
REAL fasp_blas_darray_dotprod(const INT n, const REAL *x, const REAL *y)
Inner product of two arraies x and y.
Definition: BlaArray.c:771
void fasp_blas_darray_axpy_nc3(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 3
Definition: BlaArray.c:255
REAL fasp_blas_darray_norminf(const INT n, const REAL *x)
Linf norm of array x.
Definition: BlaArray.c:719
void fasp_blas_darray_axpyz(const INT n, const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y
Definition: BlaArray.c:403
REAL fasp_blas_darray_norm1(const INT n, const REAL *x)
L1 norm of array x.
Definition: BlaArray.c:663
void fasp_blas_ldarray_axpy(const INT n, const REAL a, const REAL *x, LONGREAL *y)
y = a*x + y
Definition: BlaArray.c:163
void fasp_blas_darray_axpy_nc7(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 7
Definition: BlaArray.c:327
void fasp_blas_darray_axpy_nc5(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 5
Definition: BlaArray.c:282
void fasp_blas_darray_axpby(const INT n, const REAL a, const REAL *x, const REAL b, REAL *y)
y = a*x + b*y
Definition: BlaArray.c:620
void fasp_blas_darray_axpyz_nc7(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 7
Definition: BlaArray.c:544
void fasp_blas_darray_axpy_nc2(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 2
Definition: BlaArray.c:234
void fasp_blas_darray_axpyz_nc2(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 2
Definition: BlaArray.c:445
REAL fasp_blas_darray_norm2(const INT n, const REAL *x)
L2 norm of array x.
Definition: BlaArray.c:691
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
Definition: BlaArray.c:43
void fasp_blas_darray_axpyz_nc3(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 3
Definition: BlaArray.c:468
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
Definition: BlaArray.c:90
void fasp_blas_darray_axpyz_nc5(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 5
Definition: BlaArray.c:497
Main header file for the FASP project.
#define REAL
Definition: fasp.h:75
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:71
#define LONGREAL
Definition: fasp.h:76
#define ABS(a)
Definition: fasp.h:84
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:82
#define INT
Definition: fasp.h:72
#define OPENMP_HOLDS
Definition: fasp_const.h:269
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62