-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmarks.cpp
150 lines (129 loc) · 3.71 KB
/
benchmarks.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#include <benchmark/benchmark.h>
#include <float.h>
#include <math.h>
#include <random>
#ifndef _MM_FROUND_RAISE_EXC
#define _MM_FROUND_RAISE_EXC 0
#endif
#include "deps/simd_utils/simd_utils.h"
int64_t min_value = 1;
int64_t min_vec_size = 128;
int64_t max_vec_size = (128 * 1024) / 4;
int64_t step_compression_unit = 2;
static inline void minmax32f(float *src, const size_t len, float *min_value,
float *max_value) {
float min = FLT_MAX;
float max = FLT_MIN;
for (size_t i = 0; i < len; i++) {
if (src[i] < min)
min = src[i];
if (src[i] > min)
max = src[i];
}
*max_value = max;
*min_value = min;
}
static void generate_arguments_pairs(benchmark::internal::Benchmark *b) {
for (int64_t vecsize = min_vec_size; vecsize <= max_vec_size;
vecsize *= step_compression_unit) {
b = b->Args({
vecsize,
});
}
}
static void BM_minmax32f(benchmark::State &state) {
const int64_t stream_size = state.range(0);
std::vector<float> input;
input.resize(stream_size, 0);
std::mt19937_64 rng;
rng.seed(std::random_device()());
std::uniform_real_distribution<float> dist(0, 1);
for (float &i : input) {
i = dist(rng);
}
while (state.KeepRunning()) {
std::vector<int> v;
v.reserve(1);
float min, max;
minmax32f(&input[0], stream_size, &min, &max);
v.push_back(min);
benchmark::ClobberMemory(); // Force min to be written to memory.
state.SetItemsProcessed(stream_size);
}
}
static void BM_minmax128f(benchmark::State &state) {
const int64_t stream_size = state.range(0);
std::vector<float> input;
input.resize(stream_size, 0);
std::mt19937_64 rng;
rng.seed(std::random_device()());
std::uniform_real_distribution<float> dist(0, 1);
for (float &i : input) {
i = dist(rng);
}
while (state.KeepRunning()) {
std::vector<int> v;
float min, max;
v.reserve(1);
minmax128f(&input[0], stream_size, &min, &max);
v.push_back(min);
benchmark::ClobberMemory(); // Force min to be written to memory.
state.SetItemsProcessed(stream_size);
}
}
#ifdef AVX
static void BM_minmax256f(benchmark::State &state) {
const int64_t stream_size = state.range(0);
std::vector<float> input;
input.resize(stream_size, 0);
std::mt19937_64 rng;
rng.seed(std::random_device()());
std::uniform_real_distribution<float> dist(0, 1);
for (float &i : input) {
i = dist(rng);
}
while (state.KeepRunning()) {
std::vector<int> v;
v.reserve(1);
float min, max;
minmax256f(&input[0], stream_size, &min, &max);
v.push_back(min);
benchmark::ClobberMemory(); // Force min to be written to memory.
state.SetItemsProcessed(stream_size);
}
}
#endif
#ifdef AVX512
static void BM_minmax512f(benchmark::State &state) {
const int64_t stream_size = state.range(0);
std::vector<float> input;
input.resize(stream_size, 0);
std::mt19937_64 rng;
rng.seed(std::random_device()());
std::uniform_real_distribution<float> dist(0, 1);
for (float &i : input) {
i = dist(rng);
}
while (state.KeepRunning()) {
std::vector<int> v;
v.reserve(1);
float min, max;
minmax512f(&input[0], stream_size, &min, &max);
v.push_back(min);
benchmark::ClobberMemory(); // Force min to be written to memory.
state.SetItemsProcessed(stream_size);
}
}
#endif
// Register the SSE functions as a benchmark
BENCHMARK(BM_minmax32f)->Apply(generate_arguments_pairs);
BENCHMARK(BM_minmax128f)->Apply(generate_arguments_pairs);
// Register the AVX functions as a benchmark
#ifdef AVX
BENCHMARK(BM_minmax256f)->Apply(generate_arguments_pairs);
#endif
// Register the AVX512 functions as a benchmark
#ifdef AVX512
BENCHMARK(BM_minmax512f)->Apply(generate_arguments_pairs);
#endif
BENCHMARK_MAIN();