From 8cba36d87376c0ea49e3473b87a76a4844f070eb Mon Sep 17 00:00:00 2001 From: Dmitri Tikhonov Date: Mon, 13 May 2019 08:51:39 -0400 Subject: [PATCH] Release 1.21.2 - [OPTIMIZATION] HPACK: use history to improve compression performance --- CHANGELOG | 4 + include/lsquic.h | 2 +- src/liblsquic/lsquic_frame_reader.c | 2 +- src/lshpack/lshpack.c | 249 +++++++++++++++++++++++----- src/lshpack/lshpack.h | 41 +++-- test/unittests/test_frame_writer.c | 1 + 6 files changed, 241 insertions(+), 58 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a87d48d24..e59f928a3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +2019-05-13 + - 1.21.2 + - [OPTIMIZATION] HPACK: use history to improve compression performance + 2019-05-06 - 1.21.1 - [BUGFIX] If FIN or RST not received, don't delay stream destruction. diff --git a/include/lsquic.h b/include/lsquic.h index 2a8fbd27c..ff0463c1b 100644 --- a/include/lsquic.h +++ b/include/lsquic.h @@ -25,7 +25,7 @@ extern "C" { #define LSQUIC_MAJOR_VERSION 1 #define LSQUIC_MINOR_VERSION 21 -#define LSQUIC_PATCH_VERSION 1 +#define LSQUIC_PATCH_VERSION 2 /** * Engine flags: diff --git a/src/liblsquic/lsquic_frame_reader.c b/src/liblsquic/lsquic_frame_reader.c index 9d53924a4..55d43079a 100644 --- a/src/liblsquic/lsquic_frame_reader.c +++ b/src/liblsquic/lsquic_frame_reader.c @@ -515,7 +515,7 @@ decode_and_pass_payload (struct lsquic_frame_reader *fr) enum frame_reader_error err; int s; uint32_t name_idx; - lshpack_strlen_t name_len, val_len; + unsigned name_len, val_len; char *buf; struct uncompressed_headers *uh = NULL; void *hset = NULL; diff --git a/src/lshpack/lshpack.c b/src/lshpack/lshpack.c index 2858c5cb7..1284d1dd4 100644 --- a/src/lshpack/lshpack.c +++ b/src/lshpack/lshpack.c @@ -54,8 +54,8 @@ SOFTWARE. static const struct { - lshpack_strlen_t name_len; - lshpack_strlen_t val_len; + unsigned name_len; + unsigned val_len; const char *name; const char *val; } @@ -5352,8 +5352,8 @@ struct lshpack_enc_table_entry unsigned ete_id; unsigned ete_nameval_hash; unsigned ete_name_hash; - lshpack_strlen_t ete_name_len; - lshpack_strlen_t ete_val_len; + unsigned ete_name_len; + unsigned ete_val_len; char ete_buf[0]; }; @@ -5364,6 +5364,20 @@ struct lshpack_enc_table_entry #define N_BUCKETS(n_bits) (1U << (n_bits)) #define BUCKNO(n_bits, hash) ((hash) & (N_BUCKETS(n_bits) - 1)) + +/* We estimate average number of entries in the dynamic table to be 1/3 + * of the theoretical maximum. This number is used to size the history + * buffer: we want it large enough to cover recent entries, yet not too + * large to cover entries that appear with a period larger than the + * dynamic table. + */ +static unsigned +henc_hist_size (unsigned max_capacity) +{ + return max_capacity / DYNAMIC_ENTRY_OVERHEAD / 3; +} + + int lshpack_enc_init (struct lshpack_enc *enc) { @@ -5407,10 +5421,58 @@ lshpack_enc_cleanup (struct lshpack_enc *enc) next = STAILQ_NEXT(entry, ete_next_all); free(entry); } + free(enc->hpe_hist_buf); free(enc->hpe_buckets); } +static int +henc_use_hist (struct lshpack_enc *enc) +{ + unsigned hist_size; + + if (enc->hpe_hist_buf) + return 0; + + hist_size = henc_hist_size(INITIAL_DYNAMIC_TABLE_SIZE); + if (!hist_size) + return 0; + + enc->hpe_hist_buf = malloc(sizeof(enc->hpe_hist_buf[0]) * (hist_size + 1)); + if (!enc->hpe_hist_buf) + return -1; + + enc->hpe_hist_size = hist_size; + enc->hpe_flags |= LSHPACK_ENC_USE_HIST; + return 0; +} + + +int +lshpack_enc_use_hist (struct lshpack_enc *enc, int on) +{ + if (on) + return henc_use_hist(enc); + else + { + enc->hpe_flags &= ~LSHPACK_ENC_USE_HIST; + free(enc->hpe_hist_buf); + enc->hpe_hist_buf = NULL; + enc->hpe_hist_size = 0; + enc->hpe_hist_idx = 0; + enc->hpe_hist_wrapped = 0; + return 0; + } +} + + +int +lshpack_enc_hist_used (const struct lshpack_enc *enc) +{ + return (enc->hpe_flags & LSHPACK_ENC_USE_HIST) != 0; +} + + #define LSHPACK_XXH_SEED 0 #define XXH_NAMEVAL_WIDTH 9 #define XXH_NAMEVAL_SHIFT 2 @@ -5446,7 +5508,7 @@ static #endif unsigned lshpack_enc_get_static_nameval (uint32_t nameval_hash, const char *name, - lshpack_strlen_t name_len, const char *val, lshpack_strlen_t val_len) + unsigned name_len, const char *val, unsigned val_len) { unsigned i; @@ -5472,7 +5534,7 @@ static #endif unsigned lshpack_enc_get_static_name (uint32_t name_hash, const char *name, - lshpack_strlen_t name_len) + unsigned name_len) { unsigned i; @@ -5492,8 +5554,8 @@ lshpack_enc_get_static_name (uint32_t name_hash, const char *name, unsigned -lshpack_enc_get_stx_tab_id (const char *name, lshpack_strlen_t name_len, - const char *val, lshpack_strlen_t val_len) +lshpack_enc_get_stx_tab_id (const char *name, unsigned name_len, + const char *val, unsigned val_len) { uint32_t name_hash, nameval_hash; unsigned i; @@ -5543,8 +5605,8 @@ henc_calc_table_id (const struct lshpack_enc *enc, static unsigned henc_find_table_id (struct lshpack_enc *enc, uint32_t name_hash, uint32_t nameval_hash, const char *name, - lshpack_strlen_t name_len, const char *value, - lshpack_strlen_t value_len, int *val_matched) + unsigned name_len, const char *value, + unsigned value_len, int *val_matched) { struct lshpack_enc_table_entry *entry; unsigned buckno, static_table_id; @@ -5593,9 +5655,12 @@ henc_find_table_id (struct lshpack_enc *enc, uint32_t name_hash, } -static unsigned char * -henc_enc_int (unsigned char *dst, unsigned char *const end, uint32_t value, - uint8_t prefix_bits) +#if !LS_HPACK_EMIT_TEST_CODE +static +#endif + unsigned char * +lshpack_enc_enc_int (unsigned char *dst, unsigned char *const end, + uint32_t value, uint8_t prefix_bits) { unsigned char *const dst_orig = dst; @@ -5674,7 +5739,7 @@ static #endif int lshpack_enc_enc_str (unsigned char *const dst, size_t dst_len, - const unsigned char *str, lshpack_strlen_t str_len) + const unsigned char *str, unsigned str_len) { unsigned char size_buf[4]; unsigned char *p; @@ -5727,7 +5792,7 @@ lshpack_enc_enc_str (unsigned char *const dst, size_t dst_len, /* The guess of one-byte size was incorrect. Perform necessary * adjustments. */ - p = henc_enc_int(size_buf, size_buf + sizeof(size_buf), str_len, 7); + p = lshpack_enc_enc_int(size_buf, size_buf + sizeof(size_buf), str_len, 7); if (p == size_buf) return -1; @@ -5828,8 +5893,8 @@ static #endif int lshpack_enc_push_entry (struct lshpack_enc *enc, uint32_t name_hash, - uint32_t nameval_hash, const char *name, lshpack_strlen_t name_len, - const char *value, lshpack_strlen_t value_len) + uint32_t nameval_hash, const char *name, unsigned name_len, + const char *value, unsigned value_len) { struct lshpack_enc_table_entry *entry; unsigned buckno; @@ -5867,10 +5932,78 @@ lshpack_enc_push_entry (struct lshpack_enc *enc, uint32_t name_hash, } +static void +henc_resize_history (struct lshpack_enc *enc) +{ + uint32_t *hist_buf; + unsigned hist_size, first, count, i, j; + + hist_size = henc_hist_size(enc->hpe_max_capacity); + + if (hist_size == enc->hpe_hist_size) + return; + + if (hist_size == 0) + { + free(enc->hpe_hist_buf); + enc->hpe_hist_buf = NULL; + enc->hpe_hist_size = 0; + enc->hpe_hist_idx = 0; + enc->hpe_hist_wrapped = 0; + return; + } + + hist_buf = malloc(sizeof(hist_buf[0]) * (hist_size + 1)); + if (!hist_buf) + return; + + if (enc->hpe_hist_wrapped) + { + first = (enc->hpe_hist_idx + 1) % enc->hpe_hist_size; + count = enc->hpe_hist_size; + } + else + { + first = 0; + count = enc->hpe_hist_idx; + } + for (i = 0, j = 0; count > 0 && j < hist_size; ++i, ++j, --count) + hist_buf[j] = enc->hpe_hist_buf[ (first + i) % enc->hpe_hist_size ]; + enc->hpe_hist_size = hist_size; + enc->hpe_hist_idx = j % hist_size; + enc->hpe_hist_wrapped = enc->hpe_hist_idx == 0; + free(enc->hpe_hist_buf); + enc->hpe_hist_buf = hist_buf; +} + + +/* Returns true if `nameval_hash' was already in history, false otherwise. */ +static int +henc_hist_add (struct lshpack_enc *enc, uint32_t nameval_hash) +{ + unsigned last; + uint32_t *p; + + if (enc->hpe_hist_wrapped) + last = enc->hpe_hist_size; + else + last = enc->hpe_hist_idx; + + enc->hpe_hist_buf[ last ] = nameval_hash; + for (p = enc->hpe_hist_buf; *p != nameval_hash; ++p) + ; + enc->hpe_hist_buf[ enc->hpe_hist_idx ] = nameval_hash; + enc->hpe_hist_idx = (enc->hpe_hist_idx + 1) % enc->hpe_hist_size; + enc->hpe_hist_wrapped |= enc->hpe_hist_idx == 0; + + return p < enc->hpe_hist_buf + last; +} + + unsigned char * lshpack_enc_encode2 (struct lshpack_enc *enc, unsigned char *dst, - unsigned char *dst_end, const char *name, lshpack_strlen_t name_len, - const char *value, lshpack_strlen_t value_len, int indexed_type) + unsigned char *dst_end, const char *name, unsigned name_len, + const char *value, unsigned value_len, int indexed_type) { //indexed_type: 0, Add, 1,: without, 2: never static const char indexed_prefix_number[] = {0x40, 0x00, 0x10}; @@ -5891,6 +6024,13 @@ lshpack_enc_encode2 (struct lshpack_enc *enc, unsigned char *dst, XXH32_update(&hash_state, value, value_len); nameval_hash = XXH32_digest(&hash_state); + if (enc->hpe_hist_buf) + { + rc = henc_hist_add(enc, nameval_hash); + if (!rc && enc->hpe_hist_wrapped && indexed_type == 0) + indexed_type = 1; + } + table_id = henc_find_table_id(enc, name_hash, nameval_hash, name, name_len, value, value_len, &val_matched); if (table_id > 0) @@ -5898,7 +6038,7 @@ lshpack_enc_encode2 (struct lshpack_enc *enc, unsigned char *dst, if (val_matched) { *dst = 0x80; - dst = henc_enc_int(dst, dst_end, table_id, 7); + dst = lshpack_enc_enc_int(dst, dst_end, table_id, 7); /* No need to check return value: we pass it up as-is because * the behavior is the same. */ @@ -5907,7 +6047,7 @@ lshpack_enc_encode2 (struct lshpack_enc *enc, unsigned char *dst, else { *dst = indexed_prefix_number[indexed_type]; - dst = henc_enc_int(dst, dst_end, table_id, + dst = lshpack_enc_enc_int(dst, dst_end, table_id, ((indexed_type == 0) ? 6 : 4)); if (dst == dst_org) return dst_org; @@ -5956,6 +6096,8 @@ lshpack_enc_set_max_capacity (struct lshpack_enc *enc, unsigned max_capacity) { enc->hpe_max_capacity = max_capacity; henc_remove_overflow_entries(enc); + if (lshpack_enc_hist_used(enc)) + henc_resize_history(enc); } #if LS_HPACK_EMIT_TEST_CODE @@ -5992,8 +6134,8 @@ lshpack_enc_iter_next (struct lshpack_enc *enc, void **iter, /* Dynamic table entry: */ struct dec_table_entry { - uint16_t dte_name_len; - uint16_t dte_val_len; + unsigned dte_name_len; + unsigned dte_val_len; uint8_t dte_name_idx; char dte_buf[0]; /* Contains both name and value */ }; @@ -6039,37 +6181,60 @@ lshpack_dec_cleanup (struct lshpack_dec *dec) } +/* Maximum number of bytes required to encode a 32-bit integer */ +#define LSHPACK_UINT32_ENC_SZ 6 + + +/* Assumption: we have at least one byte to work with */ #if !LS_HPACK_EMIT_TEST_CODE static #endif int -lshpack_dec_dec_int (const unsigned char **src, const unsigned char *src_end, - uint8_t prefix_bits, uint32_t *value) +lshpack_dec_dec_int (const unsigned char **src_p, const unsigned char *src_end, + unsigned prefix_bits, uint32_t *value_p) { - uint32_t B, M; - uint8_t prefix_max = (1 << prefix_bits) - 1; + const unsigned char *const orig_src = *src_p; + const unsigned char *src; + unsigned prefix_max, M; + uint32_t val, B; - *value = (*(*src)++ & prefix_max); + src = *src_p; - if (*value < prefix_max) + prefix_max = (1 << prefix_bits) - 1; + val = *src++; + val &= prefix_max; + + if (val < prefix_max) + { + *src_p = src; + *value_p = val; return 0; + } - /* To optimize the loop for the normal case, the overflow is checked - * outside the loop. The decoder is limited to 28-bit integer values, - * which is far above limitations imposed by the APIs (16-bit integers). - */ M = 0; do { - if ((*src) >= src_end) + if (src < src_end) + { + B = *src++; + val = val + ((B & 0x7f) << M); + M += 7; + } + else if (src - orig_src < LSHPACK_UINT32_ENC_SZ) return -1; - B = *(*src)++; - *value = *value + ((B & 0x7f) << M); - M += 7; + else + return -2; } while (B & 0x80); - return -(M > sizeof(*value) * 8); + if (M <= 28 || (M == 35 && src[-1] <= 0xF && val - (src[-1] << 28) < val)) + { + *src_p = src; + *value_p = val; + return 0; + } + else + return -2; } @@ -6229,7 +6394,7 @@ static #endif int lshpack_dec_push_entry (struct lshpack_dec *dec, uint8_t name_idx, const char *name, - uint16_t name_len, const char *val, uint16_t val_len) + unsigned name_len, const char *val, unsigned val_len) { struct dec_table_entry *entry; size_t size; @@ -6258,7 +6423,7 @@ lshpack_dec_push_entry (struct lshpack_dec *dec, uint8_t name_idx, const char *n int lshpack_dec_decode (struct lshpack_dec *dec, const unsigned char **src, const unsigned char *src_end, - char *dst, char *const dst_end, uint16_t *name_len, uint16_t *val_len, + char *dst, char *const dst_end, unsigned *name_len, unsigned *val_len, uint32_t *name_idx) { struct dec_table_entry *entry; @@ -6380,8 +6545,6 @@ lshpack_dec_decode (struct lshpack_dec *dec, len = hdec_dec_str((unsigned char *)name, dst_end - dst, src, src_end); if (len < 0) return len; //error - if (len > UINT16_MAX) - return -2; *name_len = len; } @@ -6389,8 +6552,6 @@ lshpack_dec_decode (struct lshpack_dec *dec, dst_end - dst - *name_len, src, src_end); if (len < 0) return len; //error - if (len > UINT16_MAX) - return -2; *val_len = len; if (indexed_type == 0) diff --git a/src/lshpack/lshpack.h b/src/lshpack/lshpack.h index 907f03ef8..e53240758 100644 --- a/src/lshpack/lshpack.h +++ b/src/lshpack/lshpack.h @@ -30,6 +30,7 @@ SOFTWARE. extern "C" { #endif +#include #include #ifndef WIN32 #include @@ -37,13 +38,8 @@ extern "C" { #include "vc_compat.h" #endif -/** - * Strings up to 65535 characters in length are supported. - */ -typedef uint16_t lshpack_strlen_t; - /** Maximum length is defined for convenience */ -#define LSHPACK_MAX_STRLEN UINT16_MAX +#define LSHPACK_MAX_STRLEN UINT_MAX struct lshpack_enc; struct lshpack_dec; @@ -157,8 +153,8 @@ lshpack_enc_cleanup (struct lshpack_enc *); */ unsigned char * lshpack_enc_encode2 (struct lshpack_enc *henc, unsigned char *dst, - unsigned char *dst_end, const char *name, lshpack_strlen_t name_len, - const char *value, lshpack_strlen_t value_len, int indexed_type); + unsigned char *dst_end, const char *name, unsigned name_len, + const char *value, unsigned value_len, int indexed_type); /** @@ -183,6 +179,20 @@ lshpack_enc_encode (struct lshpack_enc *henc, unsigned char *dst, void lshpack_enc_set_max_capacity (struct lshpack_enc *, unsigned); +/** + * Turn history on or off. Turning history on may fail (malloc), in + * which case -1 is returned. + */ +int +lshpack_enc_use_hist (struct lshpack_enc *, int on); + +/** + * Return true if history is used, false otherwise. By default, + * history is off. + */ +int +lshpack_enc_hist_used (const struct lshpack_enc *); + /** * Initialize HPACK decoder structure. */ @@ -204,8 +214,8 @@ lshpack_dec_cleanup (struct lshpack_dec *); int lshpack_dec_decode (struct lshpack_dec *dec, const unsigned char **src, const unsigned char *src_end, - char *dst, char *const dst_end, lshpack_strlen_t *name_len, - lshpack_strlen_t *val_len, uint32_t *name_idx); + char *dst, char *const dst_end, unsigned *name_len, + unsigned *val_len, uint32_t *name_idx); void lshpack_dec_set_max_capacity (struct lshpack_dec *, unsigned); @@ -242,6 +252,13 @@ struct lshpack_enc hpe_all_entries; struct lshpack_double_enc_head *hpe_buckets; + + uint32_t *hpe_hist_buf; + unsigned hpe_hist_size, hpe_hist_idx; + int hpe_hist_wrapped; + enum { + LSHPACK_ENC_USE_HIST = 1 << 0, + } hpe_flags; }; struct lshpack_arr @@ -261,8 +278,8 @@ struct lshpack_dec }; unsigned -lshpack_enc_get_stx_tab_id (const char *name, lshpack_strlen_t name_len, - const char *val, lshpack_strlen_t val_len); +lshpack_enc_get_stx_tab_id (const char *name, unsigned name_len, + const char *val, unsigned val_len); #ifdef __cplusplus } diff --git a/test/unittests/test_frame_writer.c b/test/unittests/test_frame_writer.c index 18fdd33fd..9830181de 100644 --- a/test/unittests/test_frame_writer.c +++ b/test/unittests/test_frame_writer.c @@ -173,6 +173,7 @@ test_oversize_header (void) &s_conn_stats, #endif 0); + lsquic_frame_writer_max_header_list_size(fw, 1 << 16); reset_output(0); value = malloc(big_len);