30
30
#endif
31
31
#endif
32
32
33
- #if defined(DATA_A_Q4_0)
34
- #extension GL_EXT_shader_16bit_storage : require
35
- #define QUANT_K 32
36
- #define QUANT_R 2
33
+ #define QUANT_K_Q4_0 32
34
+ #define QUANT_R_Q4_0 2
37
35
38
36
struct block_q4_0
39
37
{
@@ -46,14 +44,15 @@ struct block_q4_0_packed16
46
44
uint16_t qs[16/2];
47
45
};
48
46
47
+ #if defined(DATA_A_Q4_0)
48
+ #define QUANT_K QUANT_K_Q4_0
49
+ #define QUANT_R QUANT_R_Q4_0
49
50
#define A_TYPE block_q4_0
50
51
#define A_TYPE_PACKED16 block_q4_0_packed16
51
52
#endif
52
53
53
- #if defined(DATA_A_Q4_1)
54
- #extension GL_EXT_shader_16bit_storage : require
55
- #define QUANT_K 32
56
- #define QUANT_R 2
54
+ #define QUANT_K_Q4_1 32
55
+ #define QUANT_R_Q4_1 2
57
56
58
57
struct block_q4_1
59
58
{
@@ -69,15 +68,15 @@ struct block_q4_1_packed16
69
68
uint16_t qs[16/2];
70
69
};
71
70
71
+ #if defined(DATA_A_Q4_1)
72
+ #define QUANT_K QUANT_K_Q4_1
73
+ #define QUANT_R QUANT_R_Q4_1
72
74
#define A_TYPE block_q4_1
73
75
#define A_TYPE_PACKED16 block_q4_1_packed16
74
76
#endif
75
77
76
- #if defined(DATA_A_Q5_0)
77
- #extension GL_EXT_shader_16bit_storage : require
78
- #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
79
- #define QUANT_K 32
80
- #define QUANT_R 2
78
+ #define QUANT_K_Q5_0 32
79
+ #define QUANT_R_Q5_0 2
81
80
82
81
struct block_q5_0
83
82
{
@@ -93,15 +92,15 @@ struct block_q5_0_packed16
93
92
uint16_t qs[16/2];
94
93
};
95
94
95
+ #if defined(DATA_A_Q5_0)
96
+ #define QUANT_K QUANT_K_Q5_0
97
+ #define QUANT_R QUANT_R_Q5_0
96
98
#define A_TYPE block_q5_0
97
99
#define A_TYPE_PACKED16 block_q5_0_packed16
98
100
#endif
99
101
100
- #if defined(DATA_A_Q5_1)
101
- #extension GL_EXT_shader_16bit_storage : require
102
- #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
103
- #define QUANT_K 32
104
- #define QUANT_R 2
102
+ #define QUANT_K_Q5_1 32
103
+ #define QUANT_R_Q5_1 2
105
104
106
105
struct block_q5_1
107
106
{
@@ -119,14 +118,15 @@ struct block_q5_1_packed16
119
118
uint16_t qs[16/2];
120
119
};
121
120
121
+ #if defined(DATA_A_Q5_1)
122
+ #define QUANT_K QUANT_K_Q5_1
123
+ #define QUANT_R QUANT_R_Q5_1
122
124
#define A_TYPE block_q5_1
123
125
#define A_TYPE_PACKED16 block_q5_1_packed16
124
126
#endif
125
127
126
- #if defined(DATA_A_Q8_0)
127
- #extension GL_EXT_shader_16bit_storage : require
128
- #define QUANT_K 32
129
- #define QUANT_R 1
128
+ #define QUANT_K_Q8_0 32
129
+ #define QUANT_R_Q8_0 1
130
130
131
131
struct block_q8_0
132
132
{
@@ -139,164 +139,164 @@ struct block_q8_0_packed16
139
139
uint16_t qs[32/2];
140
140
};
141
141
142
+ #if defined(DATA_A_Q8_0)
143
+ #define QUANT_K QUANT_K_Q8_0
144
+ #define QUANT_R QUANT_R_Q8_0
142
145
#define A_TYPE block_q8_0
143
146
#define A_TYPE_PACKED16 block_q8_0_packed16
144
147
#endif
145
148
146
149
// K-quants
147
- #if defined(DATA_A_Q2_K)
148
- #extension GL_EXT_shader_16bit_storage : require
149
- #define QUANT_K 256
150
+ #define QUANT_K_Q2_K 256
150
151
151
152
struct block_q2_K
152
153
{
153
- uint8_t scales[QUANT_K /16];
154
- uint8_t qs[QUANT_K /4];
154
+ uint8_t scales[QUANT_K_Q2_K /16];
155
+ uint8_t qs[QUANT_K_Q2_K /4];
155
156
f16vec2 d;
156
157
};
157
158
158
159
struct block_q2_K_packed16
159
160
{
160
- uint16_t scales[QUANT_K /16/2];
161
- uint16_t qs[QUANT_K /4/2];
161
+ uint16_t scales[QUANT_K_Q2_K /16/2];
162
+ uint16_t qs[QUANT_K_Q2_K /4/2];
162
163
f16vec2 d;
163
164
};
164
165
165
166
struct block_q2_K_packed32
166
167
{
167
- uint32_t scales[QUANT_K /16/4];
168
- uint32_t qs[QUANT_K /4/4];
168
+ uint32_t scales[QUANT_K_Q2_K /16/4];
169
+ uint32_t qs[QUANT_K_Q2_K /4/4];
169
170
f16vec2 d;
170
171
};
171
172
173
+ #if defined(DATA_A_Q2_K)
174
+ #define QUANT_K QUANT_K_Q2_K
172
175
#define A_TYPE block_q2_K
173
176
#define A_TYPE_PACKED16 block_q2_K_packed16
174
177
#define A_TYPE_PACKED32 block_q2_K_packed32
175
178
#endif
176
179
177
- #if defined(DATA_A_Q3_K)
178
- #extension GL_EXT_shader_16bit_storage : require
179
- #define QUANT_K 256
180
+ #define QUANT_K_Q3_K 256
180
181
181
182
struct block_q3_K
182
183
{
183
- uint8_t hmask[QUANT_K /8];
184
- uint8_t qs[QUANT_K /4];
184
+ uint8_t hmask[QUANT_K_Q3_K /8];
185
+ uint8_t qs[QUANT_K_Q3_K /4];
185
186
uint8_t scales[12];
186
187
float16_t d;
187
188
};
188
189
189
190
struct block_q3_K_packed16
190
191
{
191
- uint16_t hmask[QUANT_K /8/2];
192
- uint16_t qs[QUANT_K /4/2];
192
+ uint16_t hmask[QUANT_K_Q3_K /8/2];
193
+ uint16_t qs[QUANT_K_Q3_K /4/2];
193
194
uint16_t scales[12/2];
194
195
float16_t d;
195
196
};
196
197
198
+ #if defined(DATA_A_Q3_K)
199
+ #define QUANT_K QUANT_K_Q3_K
197
200
#define A_TYPE block_q3_K
198
201
#define A_TYPE_PACKED16 block_q3_K_packed16
199
202
#endif
200
203
201
- #if defined(DATA_A_Q4_K)
202
- #extension GL_EXT_shader_16bit_storage : require
203
- #define QUANT_K 256
204
+ #define QUANT_K_Q4_K 256
204
205
205
206
struct block_q4_K
206
207
{
207
208
f16vec2 d;
208
- uint8_t scales[3*QUANT_K /64];
209
- uint8_t qs[QUANT_K /2];
209
+ uint8_t scales[3*QUANT_K_Q4_K /64];
210
+ uint8_t qs[QUANT_K_Q4_K /2];
210
211
};
211
212
212
213
struct block_q4_K_packed16
213
214
{
214
215
f16vec2 d;
215
- uint16_t scales[3*QUANT_K /64/2];
216
- uint16_t qs[QUANT_K /2/2];
216
+ uint16_t scales[3*QUANT_K_Q4_K /64/2];
217
+ uint16_t qs[QUANT_K_Q4_K /2/2];
217
218
};
218
219
219
220
struct block_q4_K_packed32
220
221
{
221
222
f16vec2 d;
222
- uint32_t scales[3*QUANT_K /64/4];
223
- uint32_t qs[QUANT_K /2/4];
223
+ uint32_t scales[3*QUANT_K_Q4_K /64/4];
224
+ uint32_t qs[QUANT_K_Q4_K /2/4];
224
225
};
225
226
227
+ #if defined(DATA_A_Q4_K)
228
+ #define QUANT_K QUANT_K_Q4_K
226
229
#define A_TYPE block_q4_K
227
230
#define A_TYPE_PACKED16 block_q4_K_packed16
228
231
#define A_TYPE_PACKED32 block_q4_K_packed32
229
232
#endif
230
233
231
- #if defined(DATA_A_Q5_K)
232
- #extension GL_EXT_shader_16bit_storage : require
233
- #define QUANT_K 256
234
+ #define QUANT_K_Q5_K 256
234
235
235
236
struct block_q5_K
236
237
{
237
238
f16vec2 d;
238
239
uint8_t scales[12];
239
- uint8_t qh[QUANT_K /8];
240
- uint8_t qs[QUANT_K /2];
240
+ uint8_t qh[QUANT_K_Q5_K /8];
241
+ uint8_t qs[QUANT_K_Q5_K /2];
241
242
};
242
243
243
244
struct block_q5_K_packed16
244
245
{
245
246
f16vec2 d;
246
247
uint16_t scales[12/2];
247
- uint16_t qh[QUANT_K /8/2];
248
- uint16_t qs[QUANT_K /2/2];
248
+ uint16_t qh[QUANT_K_Q5_K /8/2];
249
+ uint16_t qs[QUANT_K_Q5_K /2/2];
249
250
};
250
251
252
+ #if defined(DATA_A_Q5_K)
253
+ #define QUANT_K QUANT_K_Q5_K
251
254
#define A_TYPE block_q5_K
252
255
#define A_TYPE_PACKED16 block_q5_K_packed16
253
256
#endif
254
257
255
- #if defined(DATA_A_Q6_K)
256
- #extension GL_EXT_shader_16bit_storage : require
257
- #define QUANT_K 256
258
+ #define QUANT_K_Q6_K 256
258
259
259
260
struct block_q6_K
260
261
{
261
- uint8_t ql[QUANT_K /2];
262
- uint8_t qh[QUANT_K /4];
263
- int8_t scales[QUANT_K /16];
262
+ uint8_t ql[QUANT_K_Q6_K /2];
263
+ uint8_t qh[QUANT_K_Q6_K /4];
264
+ int8_t scales[QUANT_K_Q6_K /16];
264
265
float16_t d;
265
266
};
266
267
267
268
struct block_q6_K_packed16
268
269
{
269
- uint16_t ql[QUANT_K /2/2];
270
- uint16_t qh[QUANT_K /4/2];
271
- int8_t scales[QUANT_K /16];
270
+ uint16_t ql[QUANT_K_Q6_K /2/2];
271
+ uint16_t qh[QUANT_K_Q6_K /4/2];
272
+ int8_t scales[QUANT_K_Q6_K /16];
272
273
float16_t d;
273
274
};
274
275
276
+ #if defined(DATA_A_Q6_K)
277
+ #define QUANT_K QUANT_K_Q6_K
275
278
#define A_TYPE block_q6_K
276
279
#define A_TYPE_PACKED16 block_q6_K_packed16
277
280
#endif
278
281
279
282
// IQuants
280
283
281
- #if defined(DATA_A_IQ4_NL)
282
- #extension GL_EXT_shader_16bit_storage : require
283
- #define QUANT_K 32
284
- #define QUANT_R 2
284
+ #define QUANT_K_IQ4_NL 32
285
+ #define QUANT_R_IQ4_NL 2
285
286
286
287
struct block_iq4_nl
287
288
{
288
289
float16_t d;
289
- uint8_t qs[QUANT_K /2];
290
+ uint8_t qs[QUANT_K_IQ4_NL /2];
290
291
};
291
292
292
293
struct block_iq4_nl_packed16
293
294
{
294
295
float16_t d;
295
- uint16_t qs[QUANT_K /2/2];
296
+ uint16_t qs[QUANT_K_IQ4_NL /2/2];
296
297
};
297
298
298
- #define A_TYPE block_iq4_nl
299
- #define A_TYPE_PACKED16 block_iq4_nl_packed16
299
+ #if defined(DATA_A_IQ4_NL)
300
300
301
301
const int8_t kvalues_iq4nl_const[16] = {
302
302
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
@@ -313,6 +313,11 @@ void init_iq4nl_shmem()
313
313
}
314
314
barrier();
315
315
}
316
+
317
+ #define QUANT_K QUANT_K_IQ4_NL
318
+ #define QUANT_R QUANT_R_IQ4_NL
319
+ #define A_TYPE block_iq4_nl
320
+ #define A_TYPE_PACKED16 block_iq4_nl_packed16
316
321
#endif
317
322
318
323
#endif // !defined(GGML_TYPES_COMP)
0 commit comments