@@ -23,7 +23,10 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
23
23
for (int i = 0 ; i < h; i++)
24
24
{
25
25
float * p = m.dims == 3 ? m.channel (i) : m.row (i);
26
- const float randabsmax = RandomFloat (absmax * 0 .5f , absmax);
26
+ float randabsmax = RandomFloat (absmax * 0 .5f , absmax);
27
+ randabsmax = ncnn::float16_to_float32 (ncnn::float32_to_float16 (randabsmax));
28
+ randabsmax = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (randabsmax));
29
+
27
30
for (int j = 0 ; j < m.w ; j++)
28
31
{
29
32
p[j] = RandomFloat (-randabsmax, randabsmax);
@@ -33,16 +36,33 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
33
36
p[RandomInt (0 , m.w - 1 )] = -randabsmax;
34
37
p[RandomInt (0 , m.w - 1 )] = randabsmax;
35
38
36
- // drop 0.4 ~ 0.6
39
+ // drop 0.45 ~ 0.55
37
40
for (int j = 0 ; j < m.w ; j++)
38
41
{
39
- float v = p[j] / randabsmax * 127 .f ;
42
+ float v = p[j] * ( 127 .f / randabsmax) ;
40
43
float vv = fabs (v - (int )v);
41
- while (vv > 0 .4f && vv < 0 .6f )
44
+
45
+ float hp = ncnn::float16_to_float32 (ncnn::float32_to_float16 (p[j]));
46
+ float hv = hp * (127 .f / randabsmax);
47
+ float hvv = fabs (hv - (int )hv);
48
+
49
+ float bp = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (p[j]));
50
+ float bv = bp * (127 .f / randabsmax);
51
+ float bvv = fabs (bv - (int )bv);
52
+
53
+ while ((vv > 0 .45f && vv < 0 .55f ) || (hvv > 0 .45f && hvv < 0 .55f ) || (bvv > 0 .45f && bvv < 0 .55f ))
42
54
{
43
55
p[j] = RandomFloat (-randabsmax, randabsmax);
44
- v = p[j] / randabsmax * 127 .f ;
56
+ v = p[j] * ( 127 .f / randabsmax) ;
45
57
vv = fabs (v - (int )v);
58
+
59
+ hp = ncnn::float16_to_float32 (ncnn::float32_to_float16 (p[j]));
60
+ hv = hp * (127 .f / randabsmax);
61
+ hvv = fabs (hv - (int )hv);
62
+
63
+ bp = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (p[j]));
64
+ bv = bp * (127 .f / randabsmax);
65
+ bvv = fabs (bv - (int )bv);
46
66
}
47
67
}
48
68
}
@@ -52,7 +72,10 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
52
72
std::vector<float > randabsmaxes (m.w );
53
73
for (int j = 0 ; j < m.w ; j++)
54
74
{
55
- randabsmaxes[j] = RandomFloat (absmax * 0 .5f , absmax);
75
+ float randabsmax = RandomFloat (absmax * 0 .5f , absmax);
76
+ randabsmax = ncnn::float16_to_float32 (ncnn::float32_to_float16 (randabsmax));
77
+ randabsmax = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (randabsmax));
78
+ randabsmaxes[j] = randabsmax;
56
79
}
57
80
58
81
const int h = m.dims == 3 ? m.c : m.h ;
@@ -65,17 +88,34 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
65
88
p[j] = RandomFloat (-randabsmax, randabsmax);
66
89
}
67
90
68
- // drop 0.4 ~ 0.6
91
+ // drop 0.45 ~ 0.55
69
92
for (int j = 0 ; j < m.w ; j++)
70
93
{
71
94
const float randabsmax = randabsmaxes[j];
72
- float v = p[j] / randabsmax * 127 .f ;
95
+ float v = p[j] * ( 127 .f / randabsmax) ;
73
96
float vv = fabs (v - (int )v);
74
- while (vv > 0 .4f && vv < 0 .6f )
97
+
98
+ float hp = ncnn::float16_to_float32 (ncnn::float32_to_float16 (p[j]));
99
+ float hv = hp * (127 .f / randabsmax);
100
+ float hvv = fabs (hv - (int )hv);
101
+
102
+ float bp = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (p[j]));
103
+ float bv = bp * (127 .f / randabsmax);
104
+ float bvv = fabs (bv - (int )bv);
105
+
106
+ while ((vv > 0 .45f && vv < 0 .55f ) || (hvv > 0 .45f && hvv < 0 .55f ) || (bvv > 0 .45f && bvv < 0 .55f ))
75
107
{
76
108
p[j] = RandomFloat (-randabsmax, randabsmax);
77
- v = p[j] / randabsmax * 127 .f ;
109
+ v = p[j] * ( 127 .f / randabsmax) ;
78
110
vv = fabs (v - (int )v);
111
+
112
+ hp = ncnn::float16_to_float32 (ncnn::float32_to_float16 (p[j]));
113
+ hv = hp * (127 .f / randabsmax);
114
+ hvv = fabs (hv - (int )hv);
115
+
116
+ bp = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (p[j]));
117
+ bv = bp * (127 .f / randabsmax);
118
+ bvv = fabs (bv - (int )bv);
79
119
}
80
120
}
81
121
}
@@ -98,25 +138,58 @@ static void RandomizeA(ncnn::Mat& m, int transA, float absmax)
98
138
99
139
static void RandomizeB (ncnn::Mat& m, float absmax)
100
140
{
141
+ absmax = ncnn::float16_to_float32 (ncnn::float32_to_float16 (absmax));
142
+ absmax = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (absmax));
143
+
144
+ const int h = m.dims == 3 ? m.c : m.h ;
101
145
float * p = m;
102
- for (int i = 0 ; i < m. total () ; i++)
146
+ for (int i = 0 ; i < h ; i++)
103
147
{
104
- p[i] = RandomFloat (-absmax, absmax);
148
+ float * p = m.dims == 3 ? m.channel (i) : m.row (i);
149
+ for (int j = 0 ; j < m.w ; j++)
150
+ {
151
+ p[j] = RandomFloat (-absmax, absmax);
105
152
106
- // set random a and b
107
- p[ RandomInt ( 0 , m. total () - 1 )] = - absmax;
108
- p[ RandomInt ( 0 , m. total () - 1 )] = absmax ;
153
+ // drop 0.45 ~ 0.55
154
+ float v = p[j] * ( 127 . f / absmax) ;
155
+ float vv = fabs (v - ( int )v) ;
109
156
110
- // drop 0.4 ~ 0.6
111
- float v = p[i] / absmax * 127 .f ;
112
- float vv = fabs (v - (int )v);
113
- while (vv > 0 .4f && vv < 0 .6f )
114
- {
115
- p[i] = RandomFloat (-absmax, absmax);
116
- v = p[i] / absmax * 127 .f ;
117
- vv = fabs (v - (int )v);
157
+ float hp = ncnn::float16_to_float32 (ncnn::float32_to_float16 (p[j]));
158
+ float hv = hp * (127 .f / absmax);
159
+ float hvv = fabs (hv - (int )hv);
160
+
161
+ float bp = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (p[j]));
162
+ float bv = bp * (127 .f / absmax);
163
+ float bvv = fabs (bv - (int )bv);
164
+
165
+ while ((vv > 0 .45f && vv < 0 .55f ) || (hvv > 0 .45f && hvv < 0 .55f ) || (bvv > 0 .45f && bvv < 0 .55f ))
166
+ {
167
+ p[j] = RandomFloat (-absmax, absmax);
168
+ v = p[j] * (127 .f / absmax);
169
+ vv = fabs (v - (int )v);
170
+
171
+ hp = ncnn::float16_to_float32 (ncnn::float32_to_float16 (p[j]));
172
+ hv = hp * (127 .f / absmax);
173
+ hvv = fabs (hv - (int )hv);
174
+
175
+ bp = ncnn::bfloat16_to_float32 (ncnn::float32_to_bfloat16 (p[j]));
176
+ bv = bp * (127 .f / absmax);
177
+ bvv = fabs (bv - (int )bv);
178
+ }
118
179
}
119
180
}
181
+
182
+ // set random a and b
183
+ if (m.dims == 3 )
184
+ {
185
+ m.channel (RandomInt (0 , h - 1 ))[RandomInt (0 , m.w - 1 )] = -absmax;
186
+ m.channel (RandomInt (0 , h - 1 ))[RandomInt (0 , m.w - 1 )] = absmax;
187
+ }
188
+ else
189
+ {
190
+ m.row (RandomInt (0 , h - 1 ))[RandomInt (0 , m.w - 1 )] = -absmax;
191
+ m.row (RandomInt (0 , h - 1 ))[RandomInt (0 , m.w - 1 )] = absmax;
192
+ }
120
193
}
121
194
122
195
static int test_gemm_int8 (int M, int N, int K, float alpha, int transA, int transB, int output_elemtype, int output_transpose, int constantA, int constantB, int output_N1M)
@@ -139,8 +212,8 @@ static int test_gemm_int8(int M, int N, int K, float alpha, int transA, int tran
139
212
pd.set (18 , 2 ); // int8_scale_term
140
213
141
214
std::vector<ncnn::Mat> weights;
142
- if (constantA) weights.push_back (transA ? (output_N1M ? RandomS8Mat (M, 1 , K) : RandomS8Mat (M, K)) : (output_N1M ? RandomS8Mat (K, 1 , M) : RandomS8Mat (K, M) ));
143
- if (constantB) weights.push_back (transB ? (output_N1M ? RandomS8Mat (K, 1 , N) : RandomS8Mat (K, N)) : (output_N1M ? RandomS8Mat (N, 1 , K) : RandomS8Mat (N, K) ));
215
+ if (constantA) weights.push_back (transA ? RandomS8Mat (M, K) : RandomS8Mat (K, M ));
216
+ if (constantB) weights.push_back (transB ? RandomS8Mat (K, N) : RandomS8Mat (N, K ));
144
217
if (constantA) weights.push_back (RandomMat (M, 10 .f , 20 .f ));
145
218
if (constantB) weights.push_back (RandomMat (1 , 10 .f , 20 .f ));
146
219
@@ -266,8 +339,8 @@ static int test_gemm_int8_fp16s(int M, int N, int K, float alpha, int transA, in
266
339
pd.set (18 , 2 ); // int8_scale_term
267
340
268
341
std::vector<ncnn::Mat> weights;
269
- if (constantA) weights.push_back (transA ? (output_N1M ? RandomS8Mat (M, 1 , K) : RandomS8Mat (M, K)) : (output_N1M ? RandomS8Mat (K, 1 , M) : RandomS8Mat (K, M) ));
270
- if (constantB) weights.push_back (transB ? (output_N1M ? RandomS8Mat (K, 1 , N) : RandomS8Mat (K, N)) : (output_N1M ? RandomS8Mat (N, 1 , K) : RandomS8Mat (N, K) ));
342
+ if (constantA) weights.push_back (transA ? RandomS8Mat (M, K) : RandomS8Mat (K, M ));
343
+ if (constantB) weights.push_back (transB ? RandomS8Mat (K, N) : RandomS8Mat (N, K ));
271
344
if (constantA) weights.push_back (RandomMat (M, 10 .f , 20 .f ));
272
345
if (constantB) weights.push_back (RandomMat (1 , 10 .f , 20 .f ));
273
346
0 commit comments