Skip to content

Commit 99f3b75

Browse files
committed
Added min&max of result vector. Updated samples
1 parent 3c02462 commit 99f3b75

File tree

4 files changed

+49
-23
lines changed

4 files changed

+49
-23
lines changed

sample_output/OUT_WHAMO_HIP_CC

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@ srun -n 1 -p amd ./sw4ck sw4ck.in
22
Reading from file sw4ck.in
33
Launching sw4 kernels
44

5-
Kernel 1 time 58.0426
6-
Kernel 2 time 24.1525
7-
Kernel 3 time 23.6964
8-
Kernel 4 time 17.5566
9-
Kernel 5 time 50.5733
5+
Kernel 1 time 57.4456
6+
Kernel 2 time 24.1552
7+
Kernel 3 time 23.7349
8+
Kernel 4 time 17.6128
9+
Kernel 5 time 50.536
1010

11-
Total kernel runtime = 174
11+
Total kernel runtime = 173
12+
13+
MIN = -1.573545887801061974e-05
14+
MAX = 0.010639705916308216105
1215

1316
Norm of output 0x1.941a40ae9d6fap+7
1417
Norm of output 202.05127473518206216

sample_output/OUT_WHAMO_HIP_HIPCC

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
srun -n 1 -p amd ./sw4ck sw4ck.in
2-
Reading from file sw4ck.in
1+
srun -n 1 -p amd bin/sw4ck ../src/sw4ck.in
2+
Reading from file ../src/sw4ck.in
33
Launching sw4 kernels
44

5-
Kernel 1 time 15.3942
6-
Kernel 2 time 4.6632
7-
Kernel 3 time 3.43617
8-
Kernel 4 time 3.74249
9-
Kernel 5 time 15.4687
5+
Kernel 1 time 15.5369
6+
Kernel 2 time 4.70258
7+
Kernel 3 time 3.47073
8+
Kernel 4 time 3.778
9+
Kernel 5 time 15.4499
1010

11-
Total kernel runtime = 42
11+
Total kernel runtime = 43
12+
13+
MIN = -1.5735458151501329865e-05
14+
MAX = 0.010639705916308216105
1215

1316
Norm of output 0x1.941a40aec142ep+7
1417
Norm of output 202.0512747393526638

sample_output/OUT_WHAMO_HIP_RAJA

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
1+
!s
12
srun -n 1 -p amd bin/sw4ck ../src/sw4ck.in
23
Reading from file ../src/sw4ck.in
34
Launching sw4 kernels
45

5-
Kernel 1 time 21.0319
6-
Kernel 2 time 30.7601
7-
Kernel 3 time 37.5654
8-
Kernel 4 time 28.6686
9-
Kernel 5 time 22.607
6+
Kernel 1 time 20.4535
7+
Kernel 2 time 30.6751
8+
Kernel 3 time 37.4144
9+
Kernel 4 time 28.5405
10+
Kernel 5 time 22.2167
1011

11-
Total kernel runtime = 140
12+
Total kernel runtime = 139
13+
14+
MIN = -1.5735458151501329865e-05
15+
MAX = 0.010639705916308216105
1216

1317
Norm of output 0x1.941a40aec142ep+7
1418
Norm of output 202.0512747393526638

src/curvitest.C

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
#include <map>
55
#include <sstream>
66
#include <vector>
7+
#include <tuple>
78
#include <chrono>
9+
#include <limits>
810
#define float_sw4 double
911
#include "SW4CKConfig.h"
1012
#include "foralls.h"
@@ -20,6 +22,7 @@ class Sarray {
2022
void init();
2123
void init2();
2224
double norm();
25+
std::tuple<double,double> minmax();
2326
int m_nc, m_ni, m_nj, m_nk;
2427
int m_ib, m_ie, m_jb, m_je, m_kb, m_ke;
2528
ssize_t m_base;
@@ -104,6 +107,15 @@ double Sarray::norm() {
104107
for (size_t i = 0; i < size / 8; i++) ret += m_data[i] * m_data[i];
105108
return ret;
106109
}
110+
std::tuple<double,double> Sarray::minmax(){
111+
double min = std::numeric_limits<double>::max();
112+
double max = std::numeric_limits<double>::min();
113+
for (size_t i = 0; i < size / 8; i++) {
114+
min=std::min(min,m_data[i]);
115+
max=std::max(max,m_data[i]);
116+
}
117+
return std::make_tuple(min,max);
118+
}
107119

108120
void curvilinear4sg_ci(
109121
int ifirst, int ilast, int jfirst, int jlast, int kfirst, int klast,
@@ -253,13 +265,17 @@ int main(int argc, char* argv[]) {
253265
#endif
254266
auto stop = std::chrono::high_resolution_clock::now();
255267
std::cout<<"\nTotal kernel runtime = "<<std::chrono::duration_cast<std::chrono::milliseconds>(stop-start).count()<<"\n\n";
268+
auto minmax =arrays[i]["a_Uacc"]->minmax();
269+
std::cout << "MIN = " << std::defaultfloat << std::setprecision(20)
270+
<< std::get<0>(minmax)<<"\nMAX = "<<std::get<1>(minmax) << "\n\n";
271+
double norm=arrays[i]["a_Uacc"]->norm();
256272
std::cout << "Norm of output " << std::hexfloat
257-
<< arrays[i]["a_Uacc"]->norm() << "\n";
273+
<< norm << "\n";
258274
std::cout << "Norm of output " << std::defaultfloat << std::setprecision(20)
259-
<< arrays[i]["a_Uacc"]->norm() << "\n";
275+
<< norm << "\n";
260276
//const double exact_norm = 9.86238393426104e+17;
261277
const double exact_norm = 202.0512747393526638; // for init2
262-
double err = (arrays[i]["a_Uacc"]->norm() - exact_norm) / exact_norm * 100;
278+
double err = (norm - exact_norm) / exact_norm * 100;
263279
std::cout << "Error = " << std::setprecision(2) << err << " %\n";
264280
}
265281
}

0 commit comments

Comments
 (0)