Skip to content

Commit

Permalink
compress.gzip: add compression flags (fix #14994) (#23370)
Browse files Browse the repository at this point in the history
  • Loading branch information
kbkpbot authored Jan 5, 2025
1 parent af1ef92 commit 7040514
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 3 deletions.
76 changes: 73 additions & 3 deletions vlib/compress/gzip/gzip.v
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,64 @@ module gzip
import compress as compr
import hash.crc32

// CompressFlags
// TODO: These flags have no use now
@[flag]
pub enum CompressFlags {
// The low 12 bits will be overwritten by `compression_level`
compression_level_overwrite_flag01
compression_level_overwrite_flag02
compression_level_overwrite_flag03
compression_level_overwrite_flag04
compression_level_overwrite_flag05
compression_level_overwrite_flag06
compression_level_overwrite_flag07
compression_level_overwrite_flag08
compression_level_overwrite_flag09
compression_level_overwrite_flag10
compression_level_overwrite_flag11
compression_level_overwrite_flag12

// If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
write_zlib_header //= 0x01000
// Always compute the adler-32 of the input data (even when not writing zlib headers).
compute_adler32 //= 0x02000
// Set to use faster greedy parsing, instead of more efficient lazy parsing.
greedy_parsing_flag //= 0x04000
// Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
nondeterministic_parsing_flag //= 0x08000
// Only look for RLE matches (matches with a distance of 1)
rle_matches //= 0x10000
// Discards matches <= 5 chars if enabled.
filter_matches //= 0x20000
// Disable usage of optimized Huffman tables.
force_all_static_blocks //= 0x40000
// Only use raw (uncompressed) deflate blocks.
force_all_raw_blocks //= 0x80000
}

// CompressParams set compression_level for compression:
// 0: Huffman only;
// 1: Huffman+LZ (fastest/crap compression);
// 128: default_max_probes;
// 4095: Huffman+LZ (slowest/best compression)
@[params]
pub struct CompressParams {
pub:
compression_level int = 128 // 0~4095
flags CompressFlags
}

// compresses an array of bytes using gzip and returns the compressed bytes in a new array
// Example: compressed := gzip.compress(b)!
pub fn compress(data []u8) ![]u8 {
compressed := compr.compress(data, 0)!
// Example: compressed := gzip.compress(b, compression_level:4095)!
// Note: compression_level 0~4095
pub fn compress(data []u8, params CompressParams) ![]u8 {
if params.compression_level !in 0..4096 {
return error('compression level should in [0,4095]')
}
// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup.
flags := params.compression_level | (int(params.flags) & ~int(4095))
compressed := compr.compress(data, flags)!
// header
mut result := [
u8(0x1f), // magic numbers (1F 8B)
Expand Down Expand Up @@ -40,12 +94,28 @@ pub fn compress(data []u8) ![]u8 {
return result
}

// DecompressFlags
// TODO: These flags have no use now
@[flag]
pub enum DecompressFlags {
// If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
parse_zlib_header
// If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
has_more_input
// If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
using_non_wrapping_output_buf
// Force adler-32 checksum computation of the decompressed bytes.
compute_adler32
}

// DecompressParams set flags for decompression:
@[params]
pub struct DecompressParams {
pub:
verify_header_checksum bool = true
verify_length bool = true
verify_checksum bool = true
flags DecompressFlags
}

pub const reserved_bits = 0b1110_0000
Expand Down
21 changes: 21 additions & 0 deletions vlib/compress/gzip/read_gz_files_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,24 @@ fn test_reading_gzip_files_compressed_with_different_options() {
assert content9 == content5
assert content5 == content1
}

fn test_compress_with_deferent_level() {
compressed := os.read_bytes(s('readme_level_9.gz'))!
content9 := gzip.decompress(compressed)!

// compression: Huffman only=0
compress_0 := gzip.compress(content9, compression_level: 0)!
decompress_0 := gzip.decompress(compress_0)!

// compression: default_max_probes=128
compress_128 := gzip.compress(content9)!
decompress_128 := gzip.decompress(compress_128)!

// compression: Huffman+LZ=4095(slowest/best compression)
compress_4095 := gzip.compress(content9, compression_level: 4095)!
decompress_4095 := gzip.decompress(compress_4095)!

assert content9 == decompress_0
assert content9 == decompress_128
assert content9 == decompress_4095
}

0 comments on commit 7040514

Please sign in to comment.