Skip to content

Commit 82f1a2b

Browse files
committed
skip more halfway cases
1 parent 6347dd1 commit 82f1a2b

File tree

2 files changed

+196
-50
lines changed

2 files changed

+196
-50
lines changed

tests/test_gemm_3.cpp

+100-27
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
2323
for (int i = 0; i < h; i++)
2424
{
2525
float* p = m.dims == 3 ? m.channel(i) : m.row(i);
26-
const float randabsmax = RandomFloat(absmax * 0.5f, absmax);
26+
float randabsmax = RandomFloat(absmax * 0.5f, absmax);
27+
randabsmax = ncnn::float16_to_float32(ncnn::float32_to_float16(randabsmax));
28+
randabsmax = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(randabsmax));
29+
2730
for (int j = 0; j < m.w; j++)
2831
{
2932
p[j] = RandomFloat(-randabsmax, randabsmax);
@@ -33,16 +36,33 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
3336
p[RandomInt(0, m.w - 1)] = -randabsmax;
3437
p[RandomInt(0, m.w - 1)] = randabsmax;
3538

36-
// drop 0.4 ~ 0.6
39+
// drop 0.45 ~ 0.55
3740
for (int j = 0; j < m.w; j++)
3841
{
39-
float v = p[j] / randabsmax * 127.f;
42+
float v = p[j] * (127.f / randabsmax);
4043
float vv = fabs(v - (int)v);
41-
while (vv > 0.4f && vv < 0.6f)
44+
45+
float hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
46+
float hv = hp * (127.f / randabsmax);
47+
float hvv = fabs(hv - (int)hv);
48+
49+
float bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
50+
float bv = bp * (127.f / randabsmax);
51+
float bvv = fabs(bv - (int)bv);
52+
53+
while ((vv > 0.45f && vv < 0.55f) || (hvv > 0.45f && hvv < 0.55f) || (bvv > 0.45f && bvv < 0.55f))
4254
{
4355
p[j] = RandomFloat(-randabsmax, randabsmax);
44-
v = p[j] / randabsmax * 127.f;
56+
v = p[j] * (127.f / randabsmax);
4557
vv = fabs(v - (int)v);
58+
59+
hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
60+
hv = hp * (127.f / randabsmax);
61+
hvv = fabs(hv - (int)hv);
62+
63+
bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
64+
bv = bp * (127.f / randabsmax);
65+
bvv = fabs(bv - (int)bv);
4666
}
4767
}
4868
}
@@ -52,7 +72,10 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
5272
std::vector<float> randabsmaxes(m.w);
5373
for (int j = 0; j < m.w; j++)
5474
{
55-
randabsmaxes[j] = RandomFloat(absmax * 0.5f, absmax);
75+
float randabsmax = RandomFloat(absmax * 0.5f, absmax);
76+
randabsmax = ncnn::float16_to_float32(ncnn::float32_to_float16(randabsmax));
77+
randabsmax = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(randabsmax));
78+
randabsmaxes[j] = randabsmax;
5679
}
5780

5881
const int h = m.dims == 3 ? m.c : m.h;
@@ -65,17 +88,34 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
6588
p[j] = RandomFloat(-randabsmax, randabsmax);
6689
}
6790

68-
// drop 0.4 ~ 0.6
91+
// drop 0.45 ~ 0.55
6992
for (int j = 0; j < m.w; j++)
7093
{
7194
const float randabsmax = randabsmaxes[j];
72-
float v = p[j] / randabsmax * 127.f;
95+
float v = p[j] * (127.f / randabsmax);
7396
float vv = fabs(v - (int)v);
74-
while (vv > 0.4f && vv < 0.6f)
97+
98+
float hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
99+
float hv = hp * (127.f / randabsmax);
100+
float hvv = fabs(hv - (int)hv);
101+
102+
float bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
103+
float bv = bp * (127.f / randabsmax);
104+
float bvv = fabs(bv - (int)bv);
105+
106+
while ((vv > 0.45f && vv < 0.55f) || (hvv > 0.45f && hvv < 0.55f) || (bvv > 0.45f && bvv < 0.55f))
75107
{
76108
p[j] = RandomFloat(-randabsmax, randabsmax);
77-
v = p[j] / randabsmax * 127.f;
109+
v = p[j] * (127.f / randabsmax);
78110
vv = fabs(v - (int)v);
111+
112+
hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
113+
hv = hp * (127.f / randabsmax);
114+
hvv = fabs(hv - (int)hv);
115+
116+
bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
117+
bv = bp * (127.f / randabsmax);
118+
bvv = fabs(bv - (int)bv);
79119
}
80120
}
81121
}
@@ -98,25 +138,58 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
98138

99139
static void RandomizeB(ncnn::Mat& m, float absmax)
100140
{
141+
absmax = ncnn::float16_to_float32(ncnn::float32_to_float16(absmax));
142+
absmax = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(absmax));
143+
144+
const int h = m.dims == 3 ? m.c : m.h;
101145
float* p = m;
102-
for (int i = 0; i < m.total(); i++)
146+
for (int i = 0; i < h; i++)
103147
{
104-
p[i] = RandomFloat(-absmax, absmax);
148+
float* p = m.dims == 3 ? m.channel(i) : m.row(i);
149+
for (int j = 0; j < m.w; j++)
150+
{
151+
p[j] = RandomFloat(-absmax, absmax);
105152

106-
// set random a and b
107-
p[RandomInt(0, m.total() - 1)] = -absmax;
108-
p[RandomInt(0, m.total() - 1)] = absmax;
153+
// drop 0.45 ~ 0.55
154+
float v = p[j] * (127.f / absmax);
155+
float vv = fabs(v - (int)v);
109156

110-
// drop 0.4 ~ 0.6
111-
float v = p[i] / absmax * 127.f;
112-
float vv = fabs(v - (int)v);
113-
while (vv > 0.4f && vv < 0.6f)
114-
{
115-
p[i] = RandomFloat(-absmax, absmax);
116-
v = p[i] / absmax * 127.f;
117-
vv = fabs(v - (int)v);
157+
float hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
158+
float hv = hp * (127.f / absmax);
159+
float hvv = fabs(hv - (int)hv);
160+
161+
float bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
162+
float bv = bp * (127.f / absmax);
163+
float bvv = fabs(bv - (int)bv);
164+
165+
while ((vv > 0.45f && vv < 0.55f) || (hvv > 0.45f && hvv < 0.55f) || (bvv > 0.45f && bvv < 0.55f))
166+
{
167+
p[j] = RandomFloat(-absmax, absmax);
168+
v = p[j] * (127.f / absmax);
169+
vv = fabs(v - (int)v);
170+
171+
hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
172+
hv = hp * (127.f / absmax);
173+
hvv = fabs(hv - (int)hv);
174+
175+
bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
176+
bv = bp * (127.f / absmax);
177+
bvv = fabs(bv - (int)bv);
178+
}
118179
}
119180
}
181+
182+
// set random a and b
183+
if (m.dims == 3)
184+
{
185+
m.channel(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = -absmax;
186+
m.channel(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = absmax;
187+
}
188+
else
189+
{
190+
m.row(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = -absmax;
191+
m.row(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = absmax;
192+
}
120193
}
121194

122195
static int test_gemm_int8(int M, int N, int K, float alpha, int transA, int transB, int output_elemtype, int output_transpose, int constantA, int constantB, int output_N1M)
@@ -139,8 +212,8 @@ static int test_gemm_int8(int M, int N, int K, float alpha, int transA, int tran
139212
pd.set(18, 2); // int8_scale_term
140213

141214
std::vector<ncnn::Mat> weights;
142-
if (constantA) weights.push_back(transA ? (output_N1M ? RandomS8Mat(M, 1, K) : RandomS8Mat(M, K)) : (output_N1M ? RandomS8Mat(K, 1, M) : RandomS8Mat(K, M)));
143-
if (constantB) weights.push_back(transB ? (output_N1M ? RandomS8Mat(K, 1, N) : RandomS8Mat(K, N)) : (output_N1M ? RandomS8Mat(N, 1, K) : RandomS8Mat(N, K)));
215+
if (constantA) weights.push_back(transA ? RandomS8Mat(M, K) : RandomS8Mat(K, M));
216+
if (constantB) weights.push_back(transB ? RandomS8Mat(K, N) : RandomS8Mat(N, K));
144217
if (constantA) weights.push_back(RandomMat(M, 10.f, 20.f));
145218
if (constantB) weights.push_back(RandomMat(1, 10.f, 20.f));
146219

@@ -266,8 +339,8 @@ static int test_gemm_int8_fp16s(int M, int N, int K, float alpha, int transA, in
266339
pd.set(18, 2); // int8_scale_term
267340

268341
std::vector<ncnn::Mat> weights;
269-
if (constantA) weights.push_back(transA ? (output_N1M ? RandomS8Mat(M, 1, K) : RandomS8Mat(M, K)) : (output_N1M ? RandomS8Mat(K, 1, M) : RandomS8Mat(K, M)));
270-
if (constantB) weights.push_back(transB ? (output_N1M ? RandomS8Mat(K, 1, N) : RandomS8Mat(K, N)) : (output_N1M ? RandomS8Mat(N, 1, K) : RandomS8Mat(N, K)));
342+
if (constantA) weights.push_back(transA ? RandomS8Mat(M, K) : RandomS8Mat(K, M));
343+
if (constantB) weights.push_back(transB ? RandomS8Mat(K, N) : RandomS8Mat(N, K));
271344
if (constantA) weights.push_back(RandomMat(M, 10.f, 20.f));
272345
if (constantB) weights.push_back(RandomMat(1, 10.f, 20.f));
273346

tests/test_gemm_4.cpp

+96-23
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
2323
for (int i = 0; i < h; i++)
2424
{
2525
float* p = m.dims == 3 ? m.channel(i) : m.row(i);
26-
const float randabsmax = RandomFloat(absmax * 0.5f, absmax);
26+
float randabsmax = RandomFloat(absmax * 0.5f, absmax);
27+
randabsmax = ncnn::float16_to_float32(ncnn::float32_to_float16(randabsmax));
28+
randabsmax = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(randabsmax));
29+
2730
for (int j = 0; j < m.w; j++)
2831
{
2932
p[j] = RandomFloat(-randabsmax, randabsmax);
@@ -33,16 +36,33 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
3336
p[RandomInt(0, m.w - 1)] = -randabsmax;
3437
p[RandomInt(0, m.w - 1)] = randabsmax;
3538

36-
// drop 0.4 ~ 0.6
39+
// drop 0.45 ~ 0.55
3740
for (int j = 0; j < m.w; j++)
3841
{
39-
float v = p[j] / randabsmax * 127.f;
42+
float v = p[j] * (127.f / randabsmax);
4043
float vv = fabs(v - (int)v);
41-
while (vv > 0.4f && vv < 0.6f)
44+
45+
float hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
46+
float hv = hp * (127.f / randabsmax);
47+
float hvv = fabs(hv - (int)hv);
48+
49+
float bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
50+
float bv = bp * (127.f / randabsmax);
51+
float bvv = fabs(bv - (int)bv);
52+
53+
while ((vv > 0.45f && vv < 0.55f) || (hvv > 0.45f && hvv < 0.55f) || (bvv > 0.45f && bvv < 0.55f))
4254
{
4355
p[j] = RandomFloat(-randabsmax, randabsmax);
44-
v = p[j] / randabsmax * 127.f;
56+
v = p[j] * (127.f / randabsmax);
4557
vv = fabs(v - (int)v);
58+
59+
hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
60+
hv = hp * (127.f / randabsmax);
61+
hvv = fabs(hv - (int)hv);
62+
63+
bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
64+
bv = bp * (127.f / randabsmax);
65+
bvv = fabs(bv - (int)bv);
4666
}
4767
}
4868
}
@@ -52,7 +72,10 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
5272
std::vector<float> randabsmaxes(m.w);
5373
for (int j = 0; j < m.w; j++)
5474
{
55-
randabsmaxes[j] = RandomFloat(absmax * 0.5f, absmax);
75+
float randabsmax = RandomFloat(absmax * 0.5f, absmax);
76+
randabsmax = ncnn::float16_to_float32(ncnn::float32_to_float16(randabsmax));
77+
randabsmax = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(randabsmax));
78+
randabsmaxes[j] = randabsmax;
5679
}
5780

5881
const int h = m.dims == 3 ? m.c : m.h;
@@ -65,17 +88,34 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
6588
p[j] = RandomFloat(-randabsmax, randabsmax);
6689
}
6790

68-
// drop 0.4 ~ 0.6
91+
// drop 0.45 ~ 0.55
6992
for (int j = 0; j < m.w; j++)
7093
{
7194
const float randabsmax = randabsmaxes[j];
72-
float v = p[j] / randabsmax * 127.f;
95+
float v = p[j] * (127.f / randabsmax);
7396
float vv = fabs(v - (int)v);
74-
while (vv > 0.4f && vv < 0.6f)
97+
98+
float hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
99+
float hv = hp * (127.f / randabsmax);
100+
float hvv = fabs(hv - (int)hv);
101+
102+
float bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
103+
float bv = bp * (127.f / randabsmax);
104+
float bvv = fabs(bv - (int)bv);
105+
106+
while ((vv > 0.45f && vv < 0.55f) || (hvv > 0.45f && hvv < 0.55f) || (bvv > 0.45f && bvv < 0.55f))
75107
{
76108
p[j] = RandomFloat(-randabsmax, randabsmax);
77-
v = p[j] / randabsmax * 127.f;
109+
v = p[j] * (127.f / randabsmax);
78110
vv = fabs(v - (int)v);
111+
112+
hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
113+
hv = hp * (127.f / randabsmax);
114+
hvv = fabs(hv - (int)hv);
115+
116+
bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
117+
bv = bp * (127.f / randabsmax);
118+
bvv = fabs(bv - (int)bv);
79119
}
80120
}
81121
}
@@ -98,25 +138,58 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
98138

99139
static void RandomizeB(ncnn::Mat& m, float absmax)
100140
{
141+
absmax = ncnn::float16_to_float32(ncnn::float32_to_float16(absmax));
142+
absmax = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(absmax));
143+
144+
const int h = m.dims == 3 ? m.c : m.h;
101145
float* p = m;
102-
for (int i = 0; i < m.total(); i++)
146+
for (int i = 0; i < h; i++)
103147
{
104-
p[i] = RandomFloat(-absmax, absmax);
148+
float* p = m.dims == 3 ? m.channel(i) : m.row(i);
149+
for (int j = 0; j < m.w; j++)
150+
{
151+
p[j] = RandomFloat(-absmax, absmax);
105152

106-
// set random a and b
107-
p[RandomInt(0, m.total() - 1)] = -absmax;
108-
p[RandomInt(0, m.total() - 1)] = absmax;
153+
// drop 0.45 ~ 0.55
154+
float v = p[j] * (127.f / absmax);
155+
float vv = fabs(v - (int)v);
109156

110-
// drop 0.4 ~ 0.6
111-
float v = p[i] / absmax * 127.f;
112-
float vv = fabs(v - (int)v);
113-
while (vv > 0.4f && vv < 0.6f)
114-
{
115-
p[i] = RandomFloat(-absmax, absmax);
116-
v = p[i] / absmax * 127.f;
117-
vv = fabs(v - (int)v);
157+
float hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
158+
float hv = hp * (127.f / absmax);
159+
float hvv = fabs(hv - (int)hv);
160+
161+
float bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
162+
float bv = bp * (127.f / absmax);
163+
float bvv = fabs(bv - (int)bv);
164+
165+
while ((vv > 0.45f && vv < 0.55f) || (hvv > 0.45f && hvv < 0.55f) || (bvv > 0.45f && bvv < 0.55f))
166+
{
167+
p[j] = RandomFloat(-absmax, absmax);
168+
v = p[j] * (127.f / absmax);
169+
vv = fabs(v - (int)v);
170+
171+
hp = ncnn::float16_to_float32(ncnn::float32_to_float16(p[j]));
172+
hv = hp * (127.f / absmax);
173+
hvv = fabs(hv - (int)hv);
174+
175+
bp = ncnn::bfloat16_to_float32(ncnn::float32_to_bfloat16(p[j]));
176+
bv = bp * (127.f / absmax);
177+
bvv = fabs(bv - (int)bv);
178+
}
118179
}
119180
}
181+
182+
// set random a and b
183+
if (m.dims == 3)
184+
{
185+
m.channel(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = -absmax;
186+
m.channel(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = absmax;
187+
}
188+
else
189+
{
190+
m.row(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = -absmax;
191+
m.row(RandomInt(0, h - 1))[RandomInt(0, m.w - 1)] = absmax;
192+
}
120193
}
121194

122195
static int test_gemm_int8(int M, int N, int K, int TILE_M, int TILE_N, int TILE_K, float alpha, int transA, int transB, int output_transpose)

0 commit comments

Comments
 (0)