-
Notifications
You must be signed in to change notification settings - Fork 33
/
inflate.c
512 lines (473 loc) · 14.9 KB
/
inflate.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
/* Slightly modified from its original form so as not to exit the
* program on errors. The resulting file remains in the public
* domain for all to use. */
/* --- GZIP file format uncompression routines --- */
/* The following routines (notably the unzip()) function below
* uncompress gzipped data. They are terribly slow at the task, but
* it is presumed that they work reasonably well. They don't do any
* error checking, but they're probably not too vulnerable to buggy
* data either. Another important limitation (but it would be pretty
* easy to get around) is that the data must reside in memory, it is
* not read as a stream. They have been very little tested. Anyway,
* whatever these functions are good for, I put them in the public
* domain. -- David Madore <[email protected]> 1999/11/21 */
static unsigned int
peek_bits (const unsigned char *data, long p, int q)
/* Read q bits starting from bit p from the data pointed to by
* data. Data is in little-endian format. */
{
unsigned int answer;
int cnt; /* Number of bits already placed in answer */
char ob, lb; /* Offset and length of bit field within current byte */
answer = 0;
for ( cnt=0 ; cnt<q ; /* cnt updated in body */ )
{
ob = (p+cnt)%8;
lb = 8-ob;
if ( cnt+lb > q )
lb = q-cnt;
answer |= ((unsigned int)((data[(p+cnt)/8]>>ob)&((1U<<lb)-1)))<<cnt;
cnt += lb;
}
return answer;
}
static unsigned int
read_bits (const unsigned char *data, long *p, int q)
/* Read q bits as per peek_bits(), but also increase p by q. */
{
unsigned int answer;
answer = peek_bits (data, *p, q);
*p += q;
return answer;
}
static void
make_code_table (const char size_table[], int table_length,
unsigned int code_table[], int maxbits)
/* Make a code table from a length table. See rfc1951, section
* 3.2.2, for details on what this means. The size_table
* contains the length of the Huffman codes for each letter, and
* the code_table receives the computed codes themselves.
* table_length is the size of the tables (alphabet length) and
* maxbits is the maximal allowed code length. */
{
int i, j;
unsigned int code;
code = 0;
for ( i=1 ; i<=maxbits ; i++ )
{
for ( j=0 ; j<table_length ; j++ )
{
if ( size_table[j]==i )
code_table[j] = code++;
}
code <<= 1;
}
}
static int
decode_one (const unsigned char *data, long *p,
const char size_table[], int table_length,
const unsigned int code_table[], int maxbits)
/* Decode one alphabet letter from the data, starting at bit p
* (which will be increased by the appropriate amount) using
* size_table and code_table to decipher the Huffman encoding. */
{
unsigned int code;
int i, j;
code = 0;
/* Read as many bits as are likely to be necessary - backward, of
* course. */
for ( i=0 ; i<maxbits ; i++ )
code = (code<<1) + peek_bits (data, (*p)+i, 1);
/* Now examine each symbol of the table to find one that matches the
* first bits of the code read. */
for ( j=0 ; j<table_length ; j++ )
{
if ( size_table[j]
&& ( (code>>(maxbits-size_table[j])) == code_table[j] ) )
{
*p += size_table[j];
return j;
}
}
return -1;
}
/* I don't know what these should be. The rfc1951 doesn't seem to say
* (it only mentions them in the last paragraph of section 3.2.1). 15
* is almost certainly safe, and it is the largest I can put given the
* constraints on the size of integers in the C standard. */
#define CLEN_MAXBITS 15
#define HLIT_MAXBITS 15
#define HDIST_MAXBITS 15
/* The magical table sizes... */
#define CLEN_TSIZE 19
#define HLIT_TSIZE 288
#define HDIST_TSIZE 30
static int
get_tables (const unsigned char *data, long *p,
char hlit_size_table[HLIT_TSIZE],
unsigned int hlit_code_table[HLIT_TSIZE],
char hdist_size_table[HDIST_TSIZE],
unsigned int hdist_code_table[HDIST_TSIZE])
/* Fill the Huffman tables (first the code lengths table, and
* then, using it, the literal/length table and the distance
* table). See section 3.2.7 of rfc1951 for details. */
{
char hlit, hdist, hclen;
const int clen_weird_tangle[CLEN_TSIZE]
= { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
char clen_size_table[CLEN_TSIZE];
unsigned int clen_code_table[CLEN_TSIZE];
int j;
unsigned int b;
int remainder; /* See note at end of section 3.2.7 of rfc1951. */
char rem_val;
hlit = read_bits (data, p, 5);
hdist = read_bits (data, p, 5);
hclen = read_bits (data, p, 4);
for ( j=0 ; j<4+hclen ; j++ )
clen_size_table[clen_weird_tangle[j]]
= read_bits (data, p, 3);
for ( ; j<CLEN_TSIZE ; j++ )
clen_size_table[clen_weird_tangle[j]] = 0;
make_code_table (clen_size_table, CLEN_TSIZE,
clen_code_table, CLEN_MAXBITS);
remainder = 0;
rem_val = 0;
for ( j=0 ; j<257+hlit ; j++ )
{
b = decode_one (data, p, clen_size_table, CLEN_TSIZE,
clen_code_table, CLEN_MAXBITS);
if ( b<0 ) return -1;
if ( b<16 )
hlit_size_table[j] = b;
else if ( b == 16 )
{
int k, l;
k = read_bits (data, p, 2);
for ( l=0 ; l<k+3 && j+l<257+hlit ; l++ )
hlit_size_table[j+l] = hlit_size_table[j-1];
j += l-1;
remainder = k+3-l; /* THIS IS SO UGLY! */
rem_val = hlit_size_table[j-1];
}
else if ( b == 17 )
{
int k, l;
k = read_bits (data, p, 3);
for ( l=0 ; l<k+3 && j+l<257+hlit ; l++ )
hlit_size_table[j+l] = 0;
j += l-1;
remainder = k+3-l;
rem_val = 0;
}
else if ( b == 18 )
{
int k, l;
k = read_bits (data, p, 7);
for ( l=0 ; l<k+11 && j+l<257+hlit ; l++ )
hlit_size_table[j+l] = 0;
j += l-1;
remainder = k+11-l;
rem_val = 0;
}
}
for ( ; j<HLIT_TSIZE ; j++ )
hlit_size_table[j] = 0;
make_code_table (hlit_size_table, HLIT_TSIZE,
hlit_code_table, HLIT_MAXBITS);
for ( j=0 ; j<remainder ; j++ )
hdist_size_table[j] = rem_val;
for ( ; j<1+hdist ; j++ )
/* Can you spell: ``copy-paste''? */
{
b = decode_one (data, p, clen_size_table, CLEN_TSIZE,
clen_code_table, CLEN_MAXBITS);
if ( b<0 ) return -1;
if ( b<16 )
hdist_size_table[j] = b;
else if ( b == 16 )
{
int k, l;
k = read_bits (data, p, 2);
for ( l=0 ; l<k+3 && j+l<1+hdist ; l++ )
hdist_size_table[j+l] = hdist_size_table[j-1];
j += l-1;
}
else if ( b == 17 )
{
int k, l;
k = read_bits (data, p, 3);
for ( l=0 ; l<k+3 && j+l<1+hdist ; l++ )
hdist_size_table[j+l] = 0;
j += l-1;
}
else if ( b == 18 )
{
int k, l;
k = read_bits (data, p, 7);
for ( l=0 ; l<k+11 && j+l<1+hdist ; l++ )
hdist_size_table[j+l] = 0;
j += l-1;
}
}
for ( ; j<HDIST_TSIZE ; j++ )
hdist_size_table[j] = 0;
make_code_table (hdist_size_table, HDIST_TSIZE,
hdist_code_table, HDIST_MAXBITS);
return 0;
}
/* The (circular) output buffer. This lets us track
* backreferences. */
/* Minimal buffer size. Also the only useful value. */
#define BUFFER_SIZE 32768
/* Pointer to the character to be added to the buffer */
static unsigned int buffer_ptr = 0;
/* The buffer itself */
static unsigned char buffer[BUFFER_SIZE];
static void
pushout (unsigned char ch)
/* Store one byte in the output buffer so it may be retrieved if
* it is referenced again. */
{
buffer[buffer_ptr++] = ch;
buffer_ptr %= BUFFER_SIZE;
}
static unsigned char
pushin (unsigned int dist)
/* Retrieve one byte, dist bytes away, from the output buffer. */
{
return buffer[(buffer_ptr+(BUFFER_SIZE-dist))%BUFFER_SIZE];
}
static int
get_data (const unsigned char *data, long *p,
const char hlit_size_table[HLIT_TSIZE],
const unsigned int hlit_code_table[HLIT_TSIZE],
const char hdist_size_table[HDIST_TSIZE],
const unsigned int hdist_code_table[HDIST_TSIZE],
int (* callback) (unsigned char d))
/* Do the actual uncompressing. Call callback on each character
* uncompressed. */
{
unsigned int b;
while ( 1 ) {
b = decode_one (data, p, hlit_size_table, HLIT_TSIZE,
hlit_code_table, HLIT_MAXBITS);
if ( b<0 ) return -1;
if ( b < 256 )
/* Literal */
{
pushout ((unsigned char) b);
if(callback ((unsigned char) b)) return -1;
}
else if ( b == 256 )
/* End of block */
return 0;
else if ( b >= 257 )
/* Back reference */
{
unsigned int bb;
unsigned int length, dist;
unsigned int l;
switch ( b )
{
case 257: length = 3; break;
case 258: length = 4; break;
case 259: length = 5; break;
case 260: length = 6; break;
case 261: length = 7; break;
case 262: length = 8; break;
case 263: length = 9; break;
case 264: length = 10; break;
case 265: length = 11 + read_bits (data, p, 1); break;
case 266: length = 13 + read_bits (data, p, 1); break;
case 267: length = 15 + read_bits (data, p, 1); break;
case 268: length = 17 + read_bits (data, p, 1); break;
case 269: length = 19 + read_bits (data, p, 2); break;
case 270: length = 23 + read_bits (data, p, 2); break;
case 271: length = 27 + read_bits (data, p, 2); break;
case 272: length = 31 + read_bits (data, p, 2); break;
case 273: length = 35 + read_bits (data, p, 3); break;
case 274: length = 43 + read_bits (data, p, 3); break;
case 275: length = 51 + read_bits (data, p, 3); break;
case 276: length = 59 + read_bits (data, p, 3); break;
case 277: length = 67 + read_bits (data, p, 4); break;
case 278: length = 83 + read_bits (data, p, 4); break;
case 279: length = 99 + read_bits (data, p, 4); break;
case 280: length = 115 + read_bits (data, p, 4); break;
case 281: length = 131 + read_bits (data, p, 5); break;
case 282: length = 163 + read_bits (data, p, 5); break;
case 283: length = 195 + read_bits (data, p, 5); break;
case 284: length = 227 + read_bits (data, p, 5); break;
case 285: length = 258; break;
default:
return -1;
}
bb = decode_one (data, p, hdist_size_table, HDIST_TSIZE,
hdist_code_table, HDIST_MAXBITS);
switch ( bb )
{
case 0: dist = 1; break;
case 1: dist = 2; break;
case 2: dist = 3; break;
case 3: dist = 4; break;
case 4: dist = 5 + read_bits (data, p, 1); break;
case 5: dist = 7 + read_bits (data, p, 1); break;
case 6: dist = 9 + read_bits (data, p, 2); break;
case 7: dist = 13 + read_bits (data, p, 2); break;
case 8: dist = 17 + read_bits (data, p, 3); break;
case 9: dist = 25 + read_bits (data, p, 3); break;
case 10: dist = 33 + read_bits (data, p, 4); break;
case 11: dist = 49 + read_bits (data, p, 4); break;
case 12: dist = 65 + read_bits (data, p, 5); break;
case 13: dist = 97 + read_bits (data, p, 5); break;
case 14: dist = 129 + read_bits (data, p, 6); break;
case 15: dist = 193 + read_bits (data, p, 6); break;
case 16: dist = 257 + read_bits (data, p, 7); break;
case 17: dist = 385 + read_bits (data, p, 7); break;
case 18: dist = 513 + read_bits (data, p, 8); break;
case 19: dist = 769 + read_bits (data, p, 8); break;
case 20: dist = 1025 + read_bits (data, p, 9); break;
case 21: dist = 1537 + read_bits (data, p, 9); break;
case 22: dist = 2049 + read_bits (data, p, 10); break;
case 23: dist = 3073 + read_bits (data, p, 10); break;
case 24: dist = 4097 + read_bits (data, p, 11); break;
case 25: dist = 6145 + read_bits (data, p, 11); break;
case 26: dist = 8193 + read_bits (data, p, 12); break;
case 27: dist = 12289 + read_bits (data, p, 12); break;
case 28: dist = 16385 + read_bits (data, p, 13); break;
case 29: dist = 24577 + read_bits (data, p, 13); break;
default:
return -1;
}
for ( l=0 ; l<length ; l++ )
{
unsigned char ch;
ch = pushin (dist);
pushout (ch);
if (callback (ch)) return -1;
}
}
}
return 0;
}
int
inflate (const unsigned char *data, long *p,
int (* callback) (unsigned char d))
/* Main uncompression function for the deflate method */
{
char blast, btype;
char hlit_size_table[HLIT_TSIZE];
unsigned int hlit_code_table[HLIT_TSIZE];
char hdist_size_table[HDIST_TSIZE];
unsigned int hdist_code_table[HDIST_TSIZE];
again:
blast = read_bits (data, p, 1);
btype = read_bits (data, p, 2);
if ( btype == 1 || btype == 2 )
{
if ( btype == 2 )
{
/* Dynamic Huffman tables */
if (get_tables (data, p,
hlit_size_table, hlit_code_table,
hdist_size_table, hdist_code_table) < 0) return -1;
}
else
/* Fixed Huffman codes */
{
int j;
for ( j=0 ; j<144 ; j++ )
hlit_size_table[j] = 8;
for ( ; j<256 ; j++ )
hlit_size_table[j] = 9;
for ( ; j<280 ; j++ )
hlit_size_table[j] = 7;
for ( ; j<HLIT_TSIZE ; j++ )
hlit_size_table[j] = 8;
make_code_table (hlit_size_table, HLIT_TSIZE,
hlit_code_table, HLIT_MAXBITS);
for ( j=0 ; j<HDIST_TSIZE ; j++ )
hdist_size_table[j] = 5;
make_code_table (hdist_size_table, HDIST_TSIZE,
hdist_code_table, HDIST_MAXBITS);
}
if (get_data (data, p,
hlit_size_table, hlit_code_table,
hdist_size_table, hdist_code_table,
callback) < 0) return -1;;
}
else if ( btype == 0 )
/* Non compressed block */
{
unsigned int len;
unsigned int l;
unsigned char b;
*p = (*p+7)/8; /* Jump to next byte boundary */
len = read_bits (data, p, 16);
/* nlen = */ (void) read_bits (data, p, 16);
for ( l=0 ; l<len ; l++ )
{
b = read_bits (data, p, 8);
pushout (b);
if (callback (b)) return -1;
}
}
else
{
return -1;
}
if ( ! blast )
goto again;
return 0;
}
int
unzip (const unsigned char *data, long *p,
int (* callback) (unsigned char d))
/* Uncompress gzipped data. data is a pointer to the data, p is
* a pointer to a long that is initialized to 0 (unless for some
* reason you want to start uncompressing further down the data),
* and callback is a function taking an unsigned char and
* returning void that will be called successively for every
* uncompressed byte. */
{
unsigned char cm, flg;
if ( read_bits (data, p, 8) != 0x1f
|| read_bits (data, p, 8) != 0x8b )
{
return -1;
}
cm = read_bits (data, p, 8);
if ( cm != 0x8 )
{
return -1;
}
flg = read_bits (data, p, 8);
if ( flg & 0xe0 )
/* fprintf (stderr, "Warning: unknown bits are set in flags.\n") */ ;
read_bits (data, p, 32); /* Ignore modification time */
read_bits (data, p, 8); /* Ignore extra flags */
read_bits (data, p, 8); /* Ignore OS type */
if ( flg & 0x4 )
{
/* Skip over extra data */
unsigned int xlen;
xlen = read_bits (data, p, 16);
*p += ((long)xlen)*8;
}
if ( flg & 0x8 )
{
/* Skip over file name */
while ( read_bits (data, p, 8) );
}
if ( flg & 0x10 )
{
/* Skip over comment */
while ( read_bits (data, p, 8) );
}
if ( flg & 0x2 )
/* Ignore CRC16 */
read_bits (data, p, 16);
return inflate (data, p, callback);
/* CRC32 and ISIZE are at the end. We don't even bother to look at
* them. */
}