add missed files for lec09

CSWater · CSWater · commit 4ddbc53d0f69 · 2024-07-11T20:50:52.000+08:00
diff --git a/src/code/lec09-openmp/gauss_seidel.cpp b/src/code/lec09-openmp/gauss_seidel.cpp
@@ -0,0 +1,108 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <cstdint>
+#include <omp.h>
+#include <cstdlib>
+#include <ctime>
+#include <iostream>
+#include <iomanip>
+using namespace std;
+
+#define phi(A, i,j,k) A[(i) * jmax * kmax + (j) * kmax + (k)]
+
+
+void  GaussSeidel(double *A, double osth , uint64_t iter, uint64_t imax, uint64_t jmax, uint64_t kmax) {
+  for(uint64_t it = 0; it < iter; it++) {
+    for(uint64_t k = 1; k < kmax-1; k++) {
+      for(uint64_t j = 1; j < jmax-1; j++) {
+        for(uint64_t i = 1; i < imax-1; i++) {
+          phi(A, i, j, k) = ( phi(A, i-1, j,   k) +   phi(A, i+1, j,   k)
+                            + phi(A, i,   j-1, k) +   phi(A, i,   j+1, k)
+                            + phi(A, i,   j,   k-1) + phi(A, i,   j,   k+1) )* osth;
+        }
+      }
+    }
+  }
+}
+
+
+void  GaussSeidelParallel(double *A,   double osth , uint64_t iter, uint64_t imax, uint64_t jmax, uint64_t kmax) {
+  int tid, numthreads;
+  uint64_t it, i, j, k, jStart, jEnd;
+  for(it = 0; it < iter; it++) {
+    #pragma omp parallel private(tid, i, j, k, jStart, jEnd) 
+    {
+      tid = omp_get_thread_num();
+      #pragma omp single
+      {
+        numthreads = omp_get_num_threads();
+        cout << "numthreads : " << numthreads << endl;
+      }//default barrier for all threads
+      jStart = jmax / numthreads * tid + 1;
+      jEnd = jStart + jmax / numthreads;
+      for(uint64_t l = 1; l < kmax + numthreads - 1; l++) {
+        k = l - tid;
+        if(1 <= k < kmax - 1) {
+          for(j = jStart; j <= jEnd; j++) { 
+            for(i = 1; i < imax - 1; i++) {
+              phi(A, i, j, k) = ( phi(A, i-1, j,  k)    + phi(A, i+1, j,   k)
+                                + phi(A, i,   j-1, k)   + phi(A, i,   j+1, k)
+                                + phi(A, i,   j,   k-1) + phi(A, i,   j,   k+1) ) * osth;
+            }
+          }
+        }
+      }
+    }
+    
+  }
+}
+
+double get_time(struct timespec *start,
+  struct timespec *end)
+{
+  return end->tv_sec - start->tv_sec +
+    (end->tv_nsec - start->tv_nsec) * 1e-9;
+}
+
+#define N (imax * jmax * kmax)
+int main(int argc, char* argv[]){
+  uint64_t imax = 1024, jmax = 1681, kmax = 1024;
+  //uint64_t imax = 16, jmax = 17, kmax = 16;
+
+  double *A = nullptr;
+  A = (double *)malloc(N * sizeof(double));
+
+  double time_used, lups, perf;
+  struct timespec start, end;
+
+  #pragma omp for
+  for(int i = 0; i < N; i++) {
+    A[i] = random() % 100;
+  }
+
+  cout << "imax:" << imax << ", jmax:" << jmax << ", kmax:" << kmax << endl;
+  cout << setw(12) << "threadsnum" << "\t" << setw(10) <<"lup" << endl;
+
+  //serial GaussSeidel
+  clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+  GaussSeidel(A, 1/6.0, 1, imax, jmax, kmax);
+  clock_gettime(CLOCK_MONOTONIC_RAW, &end);
+  time_used = get_time(&start, &end);
+  lups = (imax - 2) * (jmax - 2) * (kmax - 2);
+  perf = 1.0 * lups / time_used * 1e-6;  // unit MLUP/s
+  cout << setw(12) << "1" << "\t" << setprecision(4) << perf << endl;
+
+  //parallel GaussSeidel
+  for(int threadnum = 2; threadnum <= 16; threadnum +=2 ) {
+    omp_set_num_threads(threadnum);
+    clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+    GaussSeidelParallel(A, 1/6.0, 1, imax, jmax, kmax);
+    clock_gettime(CLOCK_MONOTONIC_RAW, &end);
+    time_used = get_time(&start, &end);
+    lups = (imax - 2) * (jmax - 2) * (kmax - 2);
+    perf = 1.0 * lups / time_used * 1e-6;  // unit MLUP/s
+    cout << setw(12) << threadnum << "\t" << setprecision(4) << perf << endl;
+  }
+  return 0;
+}
+
diff --git a/src/code/lec09-openmp/histogram.cpp b/src/code/lec09-openmp/histogram.cpp
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#define N 100
+int main(int argc, char* argv[]){
+    int IND, ID, NT;
+    int S[16][8];
+    int A[N];
+    for(int i = 0; i < N; i++) {
+      A[i] = rand() % 8;
+    }
+    omp_set_num_threads(16);
+    #pragma omp parallel private(ID, IND)
+    {
+      ID = omp_get_thread_num();
+      #pragma omp for nowait
+      for(int i = 0; i < N; i++) {
+        IND = A[i];
+        S[ID][IND] = S[ID][IND] + 1;
+      }
+      #pragma critical 
+      {
+        for(int i = 0; i < 8; i++) {
+          S[0][i]  = S[0][i] + S[ID][i];
+        }
+      }
+    }
+
+    return 0;
+}
+
diff --git a/src/code/lec09-openmp/loop_overhead.cpp b/src/code/lec09-openmp/loop_overhead.cpp
@@ -0,0 +1,53 @@
+#include <iostream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <time.h>
+
+using namespace std;
+
+#define N 512
+#define LOOP 100000000
+
+//get time
+double get_time(struct timespec *start,
+  struct timespec *end)
+{
+  return end->tv_sec - start->tv_sec +
+    (end->tv_nsec - start->tv_nsec) * 1e-9;
+}
+
+int main(int argc, char* argv[]){
+    float A[N], B[N], C[N], D[N];
+    for(int i = 0; i < N; i++) {
+      A[i] = B[i] = C[i] = D[i];
+    }
+   
+  struct timespec start, end;
+  clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+    for(int iter = 1; iter < LOOP; iter++) {
+      #pragma omp parallel 
+      {
+      #pragma omp parallel for 
+        for(int i = 0; i < N; i++) {
+          A[i] = B[i] + C[i] * D[i];
+        }
+      }
+    }
+  clock_gettime(CLOCK_MONOTONIC_RAW, &end);
+  double used_time = get_time(&start, &end);
+  double flops = 1.0 * N * 2 * LOOP;
+  cout << "achieved flops:" << flops / used_time << endl;
+    return 0;
+}
+
diff --git a/src/code/lec09-openmp/numa_test.cpp b/src/code/lec09-openmp/numa_test.cpp
@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#define N 10000000
+int main(int argc, char* argv[]){
+    double A[N] = {0}, B[N] = {0};
+#pragma omp for
+    for(int i = 0; i < N; i++) {
+      A[i] = random() % 100;
+      B[i] = A[i] * A[i];
+    }
+    return 0;
+}
+
diff --git a/src/code/lec09-openmp/page_migration.cpp b/src/code/lec09-openmp/page_migration.cpp
@@ -0,0 +1,71 @@
+#include <stdio.h>
+#include <omp.h>
+#include <cstdint>
+#include <cstdlib>
+#include <ctime>
+#include <iostream>
+#include <iomanip>
+using namespace std;
+
+const int64_t N = 30000;
+const int64_t M = 30000;
+
+double get_time(struct timespec *start,
+  struct timespec *end)
+{
+  return end->tv_sec - start->tv_sec +
+    (end->tv_nsec - start->tv_nsec) * 1e-9;
+}
+
+void dmvm(uint64_t n, uint64_t m, double *lhs, double *rhs, double *mat) {
+  uint64_t offset, r, c;
+  #pragma omp parallel for private(offset,c) schedule(static) 
+  for(r = 0; r < n; ++r) {
+    offset = m * r;
+    for(c = 0; c < m; ++c) {
+      lhs[r] += mat[c + offset]* rhs[c]; 
+    }
+  }
+}
+
+
+int main(int argc, char* argv[]){
+  double *mat = nullptr, *x = nullptr, *y = nullptr;
+
+  //malloc data
+  mat = (double *)malloc(M * N * sizeof(double));
+  x = (double *)malloc(N * sizeof(double));
+  y = (double *)malloc(N * sizeof(double));
+
+  //serial init, data should on one numa memory
+  for(uint64_t i = 0; i < M * N; i++) {
+    mat[i] = i % 100;
+  }
+  for(uint64_t i = 0; i < N; i++) {
+    x[i] = y[i] = i % 10;
+  }
+
+  double time_used, flops;
+  struct timespec start, end;
+  int iters[] = {200, 500, 1000, 2000};
+  cout << setw(10) << "iter" << "\t" << setw(10) << "threads" << "\t" << setw(10) << "perf" << endl;
+  for(int nthreads = 2; nthreads <=32; nthreads = nthreads * 2) {
+    for(int i = 0; i < 4; i++) {
+      for(int j = 0; j < iters[i]; j++) {
+        omp_set_num_threads(nthreads);
+        clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+        dmvm(N, M, y, x, mat);
+        clock_gettime(CLOCK_MONOTONIC_RAW, &end);
+        time_used = get_time(&start, &end);
+        flops = M * N * 2 / time_used;
+        cout << setw(10) << iters[i] << "\t" << setw(10) << nthreads << "\t" << setw(10) << setprecision(4) << flops << endl;
+      }
+    }
+  }
+
+  free(mat);
+  free(x);
+  free(y);
+  return 0;
+}
+