Skip to content

Commit c31ed2a

Browse files
authored
vulkan: define all quant data structures in types.comp (#10440)
1 parent 5b3466b commit c31ed2a

File tree

1 file changed

+76
-71
lines changed

1 file changed

+76
-71
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/types.comp

+76-71
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,8 @@
3030
#endif
3131
#endif
3232

33-
#if defined(DATA_A_Q4_0)
34-
#extension GL_EXT_shader_16bit_storage : require
35-
#define QUANT_K 32
36-
#define QUANT_R 2
33+
#define QUANT_K_Q4_0 32
34+
#define QUANT_R_Q4_0 2
3735

3836
struct block_q4_0
3937
{
@@ -46,14 +44,15 @@ struct block_q4_0_packed16
4644
uint16_t qs[16/2];
4745
};
4846

47+
#if defined(DATA_A_Q4_0)
48+
#define QUANT_K QUANT_K_Q4_0
49+
#define QUANT_R QUANT_R_Q4_0
4950
#define A_TYPE block_q4_0
5051
#define A_TYPE_PACKED16 block_q4_0_packed16
5152
#endif
5253

53-
#if defined(DATA_A_Q4_1)
54-
#extension GL_EXT_shader_16bit_storage : require
55-
#define QUANT_K 32
56-
#define QUANT_R 2
54+
#define QUANT_K_Q4_1 32
55+
#define QUANT_R_Q4_1 2
5756

5857
struct block_q4_1
5958
{
@@ -69,15 +68,15 @@ struct block_q4_1_packed16
6968
uint16_t qs[16/2];
7069
};
7170

71+
#if defined(DATA_A_Q4_1)
72+
#define QUANT_K QUANT_K_Q4_1
73+
#define QUANT_R QUANT_R_Q4_1
7274
#define A_TYPE block_q4_1
7375
#define A_TYPE_PACKED16 block_q4_1_packed16
7476
#endif
7577

76-
#if defined(DATA_A_Q5_0)
77-
#extension GL_EXT_shader_16bit_storage : require
78-
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
79-
#define QUANT_K 32
80-
#define QUANT_R 2
78+
#define QUANT_K_Q5_0 32
79+
#define QUANT_R_Q5_0 2
8180

8281
struct block_q5_0
8382
{
@@ -93,15 +92,15 @@ struct block_q5_0_packed16
9392
uint16_t qs[16/2];
9493
};
9594

95+
#if defined(DATA_A_Q5_0)
96+
#define QUANT_K QUANT_K_Q5_0
97+
#define QUANT_R QUANT_R_Q5_0
9698
#define A_TYPE block_q5_0
9799
#define A_TYPE_PACKED16 block_q5_0_packed16
98100
#endif
99101

100-
#if defined(DATA_A_Q5_1)
101-
#extension GL_EXT_shader_16bit_storage : require
102-
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
103-
#define QUANT_K 32
104-
#define QUANT_R 2
102+
#define QUANT_K_Q5_1 32
103+
#define QUANT_R_Q5_1 2
105104

106105
struct block_q5_1
107106
{
@@ -119,14 +118,15 @@ struct block_q5_1_packed16
119118
uint16_t qs[16/2];
120119
};
121120

121+
#if defined(DATA_A_Q5_1)
122+
#define QUANT_K QUANT_K_Q5_1
123+
#define QUANT_R QUANT_R_Q5_1
122124
#define A_TYPE block_q5_1
123125
#define A_TYPE_PACKED16 block_q5_1_packed16
124126
#endif
125127

126-
#if defined(DATA_A_Q8_0)
127-
#extension GL_EXT_shader_16bit_storage : require
128-
#define QUANT_K 32
129-
#define QUANT_R 1
128+
#define QUANT_K_Q8_0 32
129+
#define QUANT_R_Q8_0 1
130130

131131
struct block_q8_0
132132
{
@@ -139,164 +139,164 @@ struct block_q8_0_packed16
139139
uint16_t qs[32/2];
140140
};
141141

142+
#if defined(DATA_A_Q8_0)
143+
#define QUANT_K QUANT_K_Q8_0
144+
#define QUANT_R QUANT_R_Q8_0
142145
#define A_TYPE block_q8_0
143146
#define A_TYPE_PACKED16 block_q8_0_packed16
144147
#endif
145148

146149
// K-quants
147-
#if defined(DATA_A_Q2_K)
148-
#extension GL_EXT_shader_16bit_storage : require
149-
#define QUANT_K 256
150+
#define QUANT_K_Q2_K 256
150151

151152
struct block_q2_K
152153
{
153-
uint8_t scales[QUANT_K/16];
154-
uint8_t qs[QUANT_K/4];
154+
uint8_t scales[QUANT_K_Q2_K/16];
155+
uint8_t qs[QUANT_K_Q2_K/4];
155156
f16vec2 d;
156157
};
157158

158159
struct block_q2_K_packed16
159160
{
160-
uint16_t scales[QUANT_K/16/2];
161-
uint16_t qs[QUANT_K/4/2];
161+
uint16_t scales[QUANT_K_Q2_K/16/2];
162+
uint16_t qs[QUANT_K_Q2_K/4/2];
162163
f16vec2 d;
163164
};
164165

165166
struct block_q2_K_packed32
166167
{
167-
uint32_t scales[QUANT_K/16/4];
168-
uint32_t qs[QUANT_K/4/4];
168+
uint32_t scales[QUANT_K_Q2_K/16/4];
169+
uint32_t qs[QUANT_K_Q2_K/4/4];
169170
f16vec2 d;
170171
};
171172

173+
#if defined(DATA_A_Q2_K)
174+
#define QUANT_K QUANT_K_Q2_K
172175
#define A_TYPE block_q2_K
173176
#define A_TYPE_PACKED16 block_q2_K_packed16
174177
#define A_TYPE_PACKED32 block_q2_K_packed32
175178
#endif
176179

177-
#if defined(DATA_A_Q3_K)
178-
#extension GL_EXT_shader_16bit_storage : require
179-
#define QUANT_K 256
180+
#define QUANT_K_Q3_K 256
180181

181182
struct block_q3_K
182183
{
183-
uint8_t hmask[QUANT_K/8];
184-
uint8_t qs[QUANT_K/4];
184+
uint8_t hmask[QUANT_K_Q3_K/8];
185+
uint8_t qs[QUANT_K_Q3_K/4];
185186
uint8_t scales[12];
186187
float16_t d;
187188
};
188189

189190
struct block_q3_K_packed16
190191
{
191-
uint16_t hmask[QUANT_K/8/2];
192-
uint16_t qs[QUANT_K/4/2];
192+
uint16_t hmask[QUANT_K_Q3_K/8/2];
193+
uint16_t qs[QUANT_K_Q3_K/4/2];
193194
uint16_t scales[12/2];
194195
float16_t d;
195196
};
196197

198+
#if defined(DATA_A_Q3_K)
199+
#define QUANT_K QUANT_K_Q3_K
197200
#define A_TYPE block_q3_K
198201
#define A_TYPE_PACKED16 block_q3_K_packed16
199202
#endif
200203

201-
#if defined(DATA_A_Q4_K)
202-
#extension GL_EXT_shader_16bit_storage : require
203-
#define QUANT_K 256
204+
#define QUANT_K_Q4_K 256
204205

205206
struct block_q4_K
206207
{
207208
f16vec2 d;
208-
uint8_t scales[3*QUANT_K/64];
209-
uint8_t qs[QUANT_K/2];
209+
uint8_t scales[3*QUANT_K_Q4_K/64];
210+
uint8_t qs[QUANT_K_Q4_K/2];
210211
};
211212

212213
struct block_q4_K_packed16
213214
{
214215
f16vec2 d;
215-
uint16_t scales[3*QUANT_K/64/2];
216-
uint16_t qs[QUANT_K/2/2];
216+
uint16_t scales[3*QUANT_K_Q4_K/64/2];
217+
uint16_t qs[QUANT_K_Q4_K/2/2];
217218
};
218219

219220
struct block_q4_K_packed32
220221
{
221222
f16vec2 d;
222-
uint32_t scales[3*QUANT_K/64/4];
223-
uint32_t qs[QUANT_K/2/4];
223+
uint32_t scales[3*QUANT_K_Q4_K/64/4];
224+
uint32_t qs[QUANT_K_Q4_K/2/4];
224225
};
225226

227+
#if defined(DATA_A_Q4_K)
228+
#define QUANT_K QUANT_K_Q4_K
226229
#define A_TYPE block_q4_K
227230
#define A_TYPE_PACKED16 block_q4_K_packed16
228231
#define A_TYPE_PACKED32 block_q4_K_packed32
229232
#endif
230233

231-
#if defined(DATA_A_Q5_K)
232-
#extension GL_EXT_shader_16bit_storage : require
233-
#define QUANT_K 256
234+
#define QUANT_K_Q5_K 256
234235

235236
struct block_q5_K
236237
{
237238
f16vec2 d;
238239
uint8_t scales[12];
239-
uint8_t qh[QUANT_K/8];
240-
uint8_t qs[QUANT_K/2];
240+
uint8_t qh[QUANT_K_Q5_K/8];
241+
uint8_t qs[QUANT_K_Q5_K/2];
241242
};
242243

243244
struct block_q5_K_packed16
244245
{
245246
f16vec2 d;
246247
uint16_t scales[12/2];
247-
uint16_t qh[QUANT_K/8/2];
248-
uint16_t qs[QUANT_K/2/2];
248+
uint16_t qh[QUANT_K_Q5_K/8/2];
249+
uint16_t qs[QUANT_K_Q5_K/2/2];
249250
};
250251

252+
#if defined(DATA_A_Q5_K)
253+
#define QUANT_K QUANT_K_Q5_K
251254
#define A_TYPE block_q5_K
252255
#define A_TYPE_PACKED16 block_q5_K_packed16
253256
#endif
254257

255-
#if defined(DATA_A_Q6_K)
256-
#extension GL_EXT_shader_16bit_storage : require
257-
#define QUANT_K 256
258+
#define QUANT_K_Q6_K 256
258259

259260
struct block_q6_K
260261
{
261-
uint8_t ql[QUANT_K/2];
262-
uint8_t qh[QUANT_K/4];
263-
int8_t scales[QUANT_K/16];
262+
uint8_t ql[QUANT_K_Q6_K/2];
263+
uint8_t qh[QUANT_K_Q6_K/4];
264+
int8_t scales[QUANT_K_Q6_K/16];
264265
float16_t d;
265266
};
266267

267268
struct block_q6_K_packed16
268269
{
269-
uint16_t ql[QUANT_K/2/2];
270-
uint16_t qh[QUANT_K/4/2];
271-
int8_t scales[QUANT_K/16];
270+
uint16_t ql[QUANT_K_Q6_K/2/2];
271+
uint16_t qh[QUANT_K_Q6_K/4/2];
272+
int8_t scales[QUANT_K_Q6_K/16];
272273
float16_t d;
273274
};
274275

276+
#if defined(DATA_A_Q6_K)
277+
#define QUANT_K QUANT_K_Q6_K
275278
#define A_TYPE block_q6_K
276279
#define A_TYPE_PACKED16 block_q6_K_packed16
277280
#endif
278281

279282
// IQuants
280283

281-
#if defined(DATA_A_IQ4_NL)
282-
#extension GL_EXT_shader_16bit_storage : require
283-
#define QUANT_K 32
284-
#define QUANT_R 2
284+
#define QUANT_K_IQ4_NL 32
285+
#define QUANT_R_IQ4_NL 2
285286

286287
struct block_iq4_nl
287288
{
288289
float16_t d;
289-
uint8_t qs[QUANT_K/2];
290+
uint8_t qs[QUANT_K_IQ4_NL/2];
290291
};
291292

292293
struct block_iq4_nl_packed16
293294
{
294295
float16_t d;
295-
uint16_t qs[QUANT_K/2/2];
296+
uint16_t qs[QUANT_K_IQ4_NL/2/2];
296297
};
297298

298-
#define A_TYPE block_iq4_nl
299-
#define A_TYPE_PACKED16 block_iq4_nl_packed16
299+
#if defined(DATA_A_IQ4_NL)
300300

301301
const int8_t kvalues_iq4nl_const[16] = {
302302
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
@@ -313,6 +313,11 @@ void init_iq4nl_shmem()
313313
}
314314
barrier();
315315
}
316+
317+
#define QUANT_K QUANT_K_IQ4_NL
318+
#define QUANT_R QUANT_R_IQ4_NL
319+
#define A_TYPE block_iq4_nl
320+
#define A_TYPE_PACKED16 block_iq4_nl_packed16
316321
#endif
317322

318323
#endif // !defined(GGML_TYPES_COMP)

0 commit comments

Comments
 (0)