Skip to content

Commit 9cfea77

Browse files
committed
Init commit.
1 parent 996a4ae commit 9cfea77

14 files changed

+25963
-0
lines changed

data/compressed.dat

319 KB
Binary file not shown.

data/reuncompressed.txt

Lines changed: 12648 additions & 0 deletions
Large diffs are not rendered by default.

data/uncompressed.txt

Lines changed: 12648 additions & 0 deletions
Large diffs are not rendered by default.

src/ac/ArithmeticDecoder.java

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package ac;
2+
3+
import java.io.IOException;
4+
5+
import io.BitSource;
6+
import io.InsufficientBitsLeftException;
7+
8+
public class ArithmeticDecoder<T> {
9+
private SourceModel<T> _model;
10+
private long _low;
11+
private long _high;
12+
private int _range_bit_width;
13+
private long _range_mask;
14+
private long _input_buffer;
15+
private boolean _first_fill;
16+
private int _bits_consumed;
17+
private boolean _tracing;
18+
19+
public ArithmeticDecoder(SourceModel<T> model, int rangeBitWidth) {
20+
this(model, rangeBitWidth, false);
21+
}
22+
23+
public ArithmeticDecoder(SourceModel<T> model, int rangeBitWidth, boolean tracing) {
24+
assert rangeBitWidth < 63;
25+
assert model != null;
26+
27+
_range_bit_width = rangeBitWidth;
28+
_model = model;
29+
_low = 0;
30+
_high = (0x1L << rangeBitWidth) - 1L;
31+
_range_mask = ~(0xffffffffffffffffL << _range_bit_width);
32+
_first_fill = true;
33+
_input_buffer = 0;
34+
_bits_consumed = 0;
35+
_tracing = tracing;
36+
}
37+
38+
public T decode(BitSource bit_source) throws InsufficientBitsLeftException, IOException {
39+
if (_first_fill) {
40+
for (int i=0; i<_range_bit_width; i++) {
41+
_input_buffer = ((_input_buffer << 1) & _range_mask) | ((long) bit_source.next(1));
42+
}
43+
_first_fill = false;
44+
}
45+
46+
long range_width = _high - _low + 1;
47+
int sym_idx = -1;
48+
long sym_low = 0;
49+
long sym_high = 0;
50+
51+
// Find the next symbol according to the symbol model.
52+
53+
for (int i=0; i<_model.size(); i++) {
54+
sym_low = _low + ((long) (range_width * _model.cdfLow(i)));
55+
sym_high =_low + ((long) (range_width * _model.cdfHigh(i))) -1L;
56+
57+
if (_input_buffer >= sym_low && _input_buffer <= sym_high) {
58+
// Found it
59+
sym_idx = i;
60+
break;
61+
}
62+
}
63+
assert sym_idx != -1;
64+
65+
_low = sym_low;
66+
_high = sym_high;
67+
68+
int high_bit = highBit(_input_buffer);
69+
70+
while((highBit(_low) == high_bit) &&
71+
(highBit(_high) == high_bit)) {
72+
73+
_input_buffer = ((_input_buffer << 1) & _range_mask) | ((long) bit_source.next(1));
74+
_low = (_low << 1) & _range_mask;
75+
_high =((_high << 1) & _range_mask) | 0x1L;
76+
77+
high_bit = highBit(_input_buffer);
78+
_bits_consumed++;
79+
}
80+
81+
// Are we in the middle?
82+
long one_quarter_mark = (0x1L << _range_bit_width) / 4L;
83+
long three_quarter_mark = one_quarter_mark * 3L;
84+
85+
86+
while (_low > three_quarter_mark && _high < one_quarter_mark) {
87+
// Yes, so shift out the second highest bits until we are not.
88+
89+
_input_buffer = ((_input_buffer & (_range_mask>>2)) << 1) | // Mask out top two bits and shift left
90+
((long) bit_source.next(1)) | // Bring in next bit from source
91+
(((long) high_bit) << (_range_bit_width-1)); // Restore top bit to old value
92+
93+
_low = ((_low & (_range_mask>>2)) << 1); // Mask out top two bits and shift left
94+
// Low order bit already 0 from shift
95+
// Top bit still 0 after mask
96+
97+
_high = ((_high & (_range_mask>>2)) << 1) | // Mask out top two bits and shift left
98+
0x1L | // Set low order bit to 1
99+
(0x1L << (_range_bit_width-1)); // Restore top bit as 1.
100+
101+
_bits_consumed++;
102+
}
103+
104+
T symbol = _model.get(sym_idx);
105+
106+
if (_tracing) {
107+
System.out.println("Decoded: " + symbol.toString());
108+
System.out.println(" High: " + String.format("%16x", _high));
109+
System.out.println(" Low: " + String.format("%16x", _low));
110+
}
111+
return symbol;
112+
}
113+
114+
public int getBitsConsumed() {
115+
return _bits_consumed;
116+
}
117+
118+
private int highBit(long value) {
119+
return (int) ((value >> (_range_bit_width-1)) & 0x1L);
120+
}
121+
}

src/ac/ArithmeticEncoder.java

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package ac;
2+
3+
import java.io.IOException;
4+
5+
import io.BitSink;
6+
7+
public class ArithmeticEncoder<T> {
8+
private SourceModel<T> _model;
9+
private long _low;
10+
private long _high;
11+
private int _pending;
12+
private int _range_bit_width;
13+
private long _range_mask;
14+
private boolean _tracing;
15+
16+
public ArithmeticEncoder(SourceModel<T> model, int rangeBitWidth) {
17+
this(model, rangeBitWidth, false);
18+
}
19+
20+
public ArithmeticEncoder(SourceModel<T> model, int rangeBitWidth, boolean tracing) {
21+
assert rangeBitWidth < 63;
22+
assert model != null;
23+
24+
_range_bit_width = rangeBitWidth;
25+
_model = model;
26+
_low = 0;
27+
_high = (0x1L << rangeBitWidth) - 1L;
28+
_pending = 0;
29+
_range_mask = ~(0xffffffffffffffffL << _range_bit_width);
30+
_tracing = tracing;
31+
}
32+
33+
public int encode(T symbol, BitSink bitSink) throws IOException {
34+
int num_bits_emitted = 0;
35+
long range_width = _high - _low + 1;
36+
37+
long old_low = _low;
38+
39+
_low = old_low + ((long) (range_width * _model.cdfLow(symbol)));
40+
_high = old_low + ((long) (range_width * _model.cdfHigh(symbol))) -1L;
41+
42+
assert _high > _low;
43+
44+
// While top bit matches, emit bits
45+
46+
while (highBit(_low) == highBit(_high)) {
47+
int high_bit = highBit(_low);
48+
bitSink.write(high_bit, 1);
49+
num_bits_emitted++;
50+
51+
// Write out pending bits if we have any.
52+
53+
while (_pending > 0) {
54+
bitSink.write(1-high_bit, 1);
55+
num_bits_emitted++;
56+
_pending--;
57+
}
58+
59+
// Shift low and high
60+
61+
_low = (_low << 1) & _range_mask;
62+
_high =((_high << 1) | 0x1L) & _range_mask;
63+
}
64+
65+
// Are we in the middle?
66+
long one_quarter_mark = (0x1L << _range_bit_width) / 4L;
67+
long three_quarter_mark = one_quarter_mark * 3L;
68+
69+
while (_low > three_quarter_mark && _high < one_quarter_mark) {
70+
// Yes, so shift out the second highest bit and accumulate pending bits
71+
72+
// We know that:
73+
// _low must be in form 01xxxx...
74+
// _high must be in form 10xxxx...
75+
//
76+
// To shift out the second bit, we'll mask out everything but the top two bits
77+
// (i.e., the xxxx... part of above) and shift it over by 1 and then fix up the
78+
// top bit and low order bit as follows:
79+
//
80+
// _low needs its top bit to remain 0 and new low order bit to be 0
81+
// _high needs its top bit to remain 1 and new low order bit to be 1
82+
83+
84+
_low = ((_low & (_range_mask>>2)) << 1); // Mask out all but top two bits and shift left one.
85+
// 0 comes in bottom as part of shift left.
86+
// Top bit still 0 after the mask as before.
87+
88+
89+
_high = ((_high & (_range_mask>>2)) << 1) | // Mask out all but top two bits and shift left one
90+
0x1L | // Shift in 1 at the bottom.
91+
(0x1L << (_range_bit_width-1)); // Restore top bit to be a 1
92+
93+
// Accumulate pending bits
94+
_pending++;
95+
96+
}
97+
98+
if (_tracing) {
99+
System.out.println("Encoded: " + symbol.toString());
100+
System.out.println(" High: " + String.format("%16x", _high));
101+
System.out.println(" Low: " + String.format("%16x", _low));
102+
System.out.println("Emitted: " + num_bits_emitted);
103+
}
104+
return num_bits_emitted;
105+
}
106+
107+
public void emitMiddle(BitSink bitSink) throws IOException {
108+
bitSink.write("1");
109+
for (int i=1; i<_range_bit_width; i++) {
110+
bitSink.write("0");
111+
}
112+
}
113+
114+
private int highBit(long value) {
115+
return (int) ((value >> (_range_bit_width-1)) & 0x1L);
116+
}
117+
}

src/ac/SourceModel.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package ac;
2+
3+
public interface SourceModel<T> {
4+
5+
int size();
6+
7+
T get(int index);
8+
9+
default int lookup(T symbol) {
10+
for (int i=0; i<size(); i++) {
11+
if (get(i).equals(symbol)) {
12+
return i;
13+
}
14+
}
15+
throw new RuntimeException("Symbol " + symbol.toString() + " not in model");
16+
}
17+
18+
double cdfLow(int index);
19+
20+
default double cdfHigh(int index) {
21+
if (index < 0 || index > size()-1) {
22+
throw new IndexOutOfBoundsException();
23+
}
24+
25+
if (index == size()-1) {
26+
return 1.0;
27+
} else {
28+
return cdfLow(index+1);
29+
}
30+
}
31+
32+
default double cdfLow(T symbol) {
33+
return cdfLow(lookup(symbol));
34+
}
35+
36+
default double cdfHigh(T symbol) {
37+
return cdfHigh(lookup(symbol));
38+
}
39+
40+
default String cdfTable() {
41+
String s = "";
42+
43+
for (int i=0; i<size(); i++) {
44+
s += "Symbol: " + get(i).toString() + " cdfLevel: " + cdfLow(i) + "\n";
45+
}
46+
47+
return s;
48+
}
49+
}

src/app/ACDecodeTextFile.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package app;
2+
3+
import java.io.FileInputStream;
4+
import java.io.FileOutputStream;
5+
import java.io.IOException;
6+
import java.util.Scanner;
7+
8+
import ac.ArithmeticDecoder;
9+
import io.InputStreamBitSource;
10+
import io.InsufficientBitsLeftException;
11+
12+
public class ACDecodeTextFile {
13+
14+
public static void main(String[] args) throws InsufficientBitsLeftException, IOException {
15+
String input_file_name = "data/compressed.dat";
16+
String output_file_name = "data/reuncompressed.txt";
17+
18+
FileInputStream fis = new FileInputStream(input_file_name);
19+
20+
InputStreamBitSource bit_source = new InputStreamBitSource(fis);
21+
22+
// Read in symbol counts and set up model
23+
24+
int[] symbol_counts = new int[256];
25+
Integer[] symbols = new Integer[256];
26+
27+
for (int i=0; i<256; i++) {
28+
symbol_counts[i] = bit_source.next(32);
29+
symbols[i] = i;
30+
}
31+
32+
FreqCountIntegerSymbolModel model = new FreqCountIntegerSymbolModel(symbols, symbol_counts);
33+
System.out.println(model.cdfTable());
34+
35+
// Read in number of symbols encoded
36+
37+
int num_symbols = bit_source.next(32);
38+
39+
System.out.println("File has " + num_symbols + " symbols encoded");
40+
41+
// Read in range bit width and setup the decoder
42+
43+
int range_bit_width = bit_source.next(8);
44+
ArithmeticDecoder<Integer> decoder = new ArithmeticDecoder<Integer>(model, range_bit_width);
45+
46+
// Decode and produce output.
47+
48+
FileOutputStream fos = new FileOutputStream(output_file_name);
49+
50+
// Scanner s = new Scanner(System.in);
51+
for (int i=0; i<num_symbols; i++) {
52+
// s.next();
53+
int sym = decoder.decode(bit_source);
54+
fos.write(sym);
55+
56+
if (i%10000 == 0) {
57+
System.out.println("After " + i + " symbols, consumed " + decoder.getBitsConsumed());
58+
}
59+
}
60+
61+
fos.flush();
62+
fos.close();
63+
fis.close();
64+
}
65+
}

0 commit comments

Comments
 (0)