|
| 1 | +#include <stdio.h> |
| 2 | +#include <stdlib.h> |
| 3 | +#include <cstdint> |
| 4 | +#include <omp.h> |
| 5 | +#include <cstdlib> |
| 6 | +#include <ctime> |
| 7 | +#include <iostream> |
| 8 | +#include <iomanip> |
| 9 | +using namespace std; |
| 10 | + |
| 11 | +#define phi(A, i,j,k) A[(i) * jmax * kmax + (j) * kmax + (k)] |
| 12 | + |
| 13 | + |
| 14 | +void GaussSeidel(double *A, double osth , uint64_t iter, uint64_t imax, uint64_t jmax, uint64_t kmax) { |
| 15 | + for(uint64_t it = 0; it < iter; it++) { |
| 16 | + for(uint64_t k = 1; k < kmax-1; k++) { |
| 17 | + for(uint64_t j = 1; j < jmax-1; j++) { |
| 18 | + for(uint64_t i = 1; i < imax-1; i++) { |
| 19 | + phi(A, i, j, k) = ( phi(A, i-1, j, k) + phi(A, i+1, j, k) |
| 20 | + + phi(A, i, j-1, k) + phi(A, i, j+1, k) |
| 21 | + + phi(A, i, j, k-1) + phi(A, i, j, k+1) )* osth; |
| 22 | + } |
| 23 | + } |
| 24 | + } |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | + |
| 29 | +void GaussSeidelParallel(double *A, double osth , uint64_t iter, uint64_t imax, uint64_t jmax, uint64_t kmax) { |
| 30 | + int tid, numthreads; |
| 31 | + uint64_t it, i, j, k, jStart, jEnd; |
| 32 | + for(it = 0; it < iter; it++) { |
| 33 | + #pragma omp parallel private(tid, i, j, k, jStart, jEnd) |
| 34 | + { |
| 35 | + tid = omp_get_thread_num(); |
| 36 | + #pragma omp single |
| 37 | + { |
| 38 | + numthreads = omp_get_num_threads(); |
| 39 | + cout << "numthreads : " << numthreads << endl; |
| 40 | + }//default barrier for all threads |
| 41 | + jStart = jmax / numthreads * tid + 1; |
| 42 | + jEnd = jStart + jmax / numthreads; |
| 43 | + for(uint64_t l = 1; l < kmax + numthreads - 1; l++) { |
| 44 | + k = l - tid; |
| 45 | + if(1 <= k < kmax - 1) { |
| 46 | + for(j = jStart; j <= jEnd; j++) { |
| 47 | + for(i = 1; i < imax - 1; i++) { |
| 48 | + phi(A, i, j, k) = ( phi(A, i-1, j, k) + phi(A, i+1, j, k) |
| 49 | + + phi(A, i, j-1, k) + phi(A, i, j+1, k) |
| 50 | + + phi(A, i, j, k-1) + phi(A, i, j, k+1) ) * osth; |
| 51 | + } |
| 52 | + } |
| 53 | + } |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +double get_time(struct timespec *start, |
| 61 | + struct timespec *end) |
| 62 | +{ |
| 63 | + return end->tv_sec - start->tv_sec + |
| 64 | + (end->tv_nsec - start->tv_nsec) * 1e-9; |
| 65 | +} |
| 66 | + |
| 67 | +#define N (imax * jmax * kmax) |
| 68 | +int main(int argc, char* argv[]){ |
| 69 | + uint64_t imax = 1024, jmax = 1681, kmax = 1024; |
| 70 | + //uint64_t imax = 16, jmax = 17, kmax = 16; |
| 71 | + |
| 72 | + double *A = nullptr; |
| 73 | + A = (double *)malloc(N * sizeof(double)); |
| 74 | + |
| 75 | + double time_used, lups, perf; |
| 76 | + struct timespec start, end; |
| 77 | + |
| 78 | + #pragma omp for |
| 79 | + for(int i = 0; i < N; i++) { |
| 80 | + A[i] = random() % 100; |
| 81 | + } |
| 82 | + |
| 83 | + cout << "imax:" << imax << ", jmax:" << jmax << ", kmax:" << kmax << endl; |
| 84 | + cout << setw(12) << "threadsnum" << "\t" << setw(10) <<"lup" << endl; |
| 85 | + |
| 86 | + //serial GaussSeidel |
| 87 | + clock_gettime(CLOCK_MONOTONIC_RAW, &start); |
| 88 | + GaussSeidel(A, 1/6.0, 1, imax, jmax, kmax); |
| 89 | + clock_gettime(CLOCK_MONOTONIC_RAW, &end); |
| 90 | + time_used = get_time(&start, &end); |
| 91 | + lups = (imax - 2) * (jmax - 2) * (kmax - 2); |
| 92 | + perf = 1.0 * lups / time_used * 1e-6; // unit MLUP/s |
| 93 | + cout << setw(12) << "1" << "\t" << setprecision(4) << perf << endl; |
| 94 | + |
| 95 | + //parallel GaussSeidel |
| 96 | + for(int threadnum = 2; threadnum <= 16; threadnum +=2 ) { |
| 97 | + omp_set_num_threads(threadnum); |
| 98 | + clock_gettime(CLOCK_MONOTONIC_RAW, &start); |
| 99 | + GaussSeidelParallel(A, 1/6.0, 1, imax, jmax, kmax); |
| 100 | + clock_gettime(CLOCK_MONOTONIC_RAW, &end); |
| 101 | + time_used = get_time(&start, &end); |
| 102 | + lups = (imax - 2) * (jmax - 2) * (kmax - 2); |
| 103 | + perf = 1.0 * lups / time_used * 1e-6; // unit MLUP/s |
| 104 | + cout << setw(12) << threadnum << "\t" << setprecision(4) << perf << endl; |
| 105 | + } |
| 106 | + return 0; |
| 107 | +} |
| 108 | + |
0 commit comments