From 69f9448497a4f0ee8f91b65ad92d8e4fe168179e Mon Sep 17 00:00:00 2001 From: Stefan Rua Date: Fri, 24 Jun 2022 13:47:09 +0200 Subject: [PATCH] Add bsc 3.2.4 --- Makefile | 27 +- README.md | 1 + _lzbench/compressors.cpp | 67 + _lzbench/compressors.h | 17 + _lzbench/lzbench.h | 10 +- libbsc/AUTHORS | 15 + libbsc/CHANGES | 113 + libbsc/LICENSE | 202 + libbsc/README | 71 + libbsc/VERSION | 1 + libbsc/bsc.cpp | 885 +++ libbsc/libbsc/adler32/adler32.cpp | 210 + libbsc/libbsc/adler32/adler32.h | 57 + libbsc/libbsc/bwt/bwt.cpp | 129 + libbsc/libbsc/bwt/bwt.h | 71 + libbsc/libbsc/bwt/libsais/VERSION | 1 + libbsc/libbsc/bwt/libsais/libsais.c | 7654 +++++++++++++++++++++++ libbsc/libbsc/bwt/libsais/libsais.h | 310 + libbsc/libbsc/coder/coder.cpp | 351 ++ libbsc/libbsc/coder/coder.h | 76 + libbsc/libbsc/coder/common/predictor.h | 220 + libbsc/libbsc/coder/common/rangecoder.h | 261 + libbsc/libbsc/coder/common/tables.h | 1872 ++++++ libbsc/libbsc/coder/qlfc/qlfc.cpp | 2208 +++++++ libbsc/libbsc/coder/qlfc/qlfc.h | 109 + libbsc/libbsc/coder/qlfc/qlfc_model.cpp | 91 + libbsc/libbsc/coder/qlfc/qlfc_model.h | 269 + libbsc/libbsc/filters.h | 109 + libbsc/libbsc/filters/detectors.cpp | 585 ++ libbsc/libbsc/filters/preprocessing.cpp | 180 + libbsc/libbsc/filters/tables.h | 754 +++ libbsc/libbsc/libbsc.h | 157 + libbsc/libbsc/libbsc/libbsc.cpp | 620 ++ libbsc/libbsc/lzp/lzp.cpp | 884 +++ libbsc/libbsc/lzp/lzp.h | 72 + libbsc/libbsc/platform/platform.cpp | 264 + libbsc/libbsc/platform/platform.h | 226 + libbsc/libbsc/st/st.cpp | 1533 +++++ libbsc/libbsc/st/st.cu | 416 ++ libbsc/libbsc/st/st.cuh | 69 + libbsc/libbsc/st/st.h | 80 + libbsc/makefile | 125 + 42 files changed, 21366 insertions(+), 6 deletions(-) create mode 100644 libbsc/AUTHORS create mode 100644 libbsc/CHANGES create mode 100644 libbsc/LICENSE create mode 100644 libbsc/README create mode 100644 libbsc/VERSION create mode 100644 libbsc/bsc.cpp create mode 100644 libbsc/libbsc/adler32/adler32.cpp create mode 100644 libbsc/libbsc/adler32/adler32.h create mode 100644 libbsc/libbsc/bwt/bwt.cpp create mode 100644 libbsc/libbsc/bwt/bwt.h create mode 100644 libbsc/libbsc/bwt/libsais/VERSION create mode 100644 libbsc/libbsc/bwt/libsais/libsais.c create mode 100644 libbsc/libbsc/bwt/libsais/libsais.h create mode 100644 libbsc/libbsc/coder/coder.cpp create mode 100644 libbsc/libbsc/coder/coder.h create mode 100644 libbsc/libbsc/coder/common/predictor.h create mode 100644 libbsc/libbsc/coder/common/rangecoder.h create mode 100644 libbsc/libbsc/coder/common/tables.h create mode 100644 libbsc/libbsc/coder/qlfc/qlfc.cpp create mode 100644 libbsc/libbsc/coder/qlfc/qlfc.h create mode 100644 libbsc/libbsc/coder/qlfc/qlfc_model.cpp create mode 100644 libbsc/libbsc/coder/qlfc/qlfc_model.h create mode 100644 libbsc/libbsc/filters.h create mode 100644 libbsc/libbsc/filters/detectors.cpp create mode 100644 libbsc/libbsc/filters/preprocessing.cpp create mode 100644 libbsc/libbsc/filters/tables.h create mode 100644 libbsc/libbsc/libbsc.h create mode 100644 libbsc/libbsc/libbsc/libbsc.cpp create mode 100644 libbsc/libbsc/lzp/lzp.cpp create mode 100644 libbsc/libbsc/lzp/lzp.h create mode 100644 libbsc/libbsc/platform/platform.cpp create mode 100644 libbsc/libbsc/platform/platform.h create mode 100644 libbsc/libbsc/st/st.cpp create mode 100644 libbsc/libbsc/st/st.cu create mode 100644 libbsc/libbsc/st/st.cuh create mode 100644 libbsc/libbsc/st/st.h create mode 100644 libbsc/makefile diff --git a/Makefile b/Makefile index 2271fdc6..6c0ebdf0 100644 --- a/Makefile +++ b/Makefile @@ -308,6 +308,25 @@ ifneq "$(DONT_BUILD_NVCOMP)" "1" NVCOMP_CU_OBJ = $(NVCOMP_CU_SRC:%=%.o) NVCOMP_FILES = $(NVCOMP_CU_OBJ) $(NVCOMP_CPP_OBJ) endif +ifeq "$(DONT_BUILD_BSC)" "1" + DEFINES += -DBENCH_HAS_BSC +else + CFLAGS += -DLIBBSC_CUDA_SUPPORT + CFLAGS += -DLIBBSC_SORT_TRANSFORM_SUPPORT + BSC_FILES = libbsc/libbsc/adler32/adler32.o + BSC_FILES += libbsc/libbsc/bwt/libsais/libsais.o + BSC_FILES += libbsc/libbsc/bwt/bwt.o + BSC_FILES += libbsc/libbsc/coder/coder.o + BSC_FILES += libbsc/libbsc/coder/qlfc/qlfc.o + BSC_FILES += libbsc/libbsc/coder/qlfc/qlfc_model.o + BSC_FILES += libbsc/libbsc/filters/detectors.o + BSC_FILES += libbsc/libbsc/filters/preprocessing.o + BSC_FILES += libbsc/libbsc/libbsc/libbsc.o + BSC_FILES += libbsc/libbsc/lzp/lzp.o + BSC_FILES += libbsc/libbsc/platform/platform.o + BSC_FILES += libbsc/libbsc/st/st.o + BSC_FILES += libbsc/libbsc/st/st_cu.o +endif endif all: lzbench @@ -347,13 +366,17 @@ $(NVCOMP_CPP_OBJ): %.cpp.o: %.cpp @$(MKDIR) $(dir $@) $(CXX) $(CFLAGS) -c $< -o $@ +libbsc/libbsc/st/st_cu.o: libbsc/libbsc/st/st.cu + @$(MKDIR) $(dir $@) + $(CUDA_CC) $(CUDA_CFLAGS) $(CFLAGS) -c $< -o $@ + # disable the implicit rule for making a binary out of a single object file %: %.o _lzbench/lzbench.o: _lzbench/lzbench.cpp _lzbench/lzbench.h -lzbench: $(BZIP2_FILES) $(DENSITY_FILES) $(FASTLZMA2_OBJ) $(ZSTD_FILES) $(GLZA_FILES) $(LZSSE_FILES) $(LZFSE_FILES) $(XPACK_FILES) $(GIPFELI_FILES) $(XZ_FILES) $(LIBLZG_FILES) $(BRIEFLZ_FILES) $(LZF_FILES) $(LZRW_FILES) $(BROTLI_FILES) $(CSC_FILES) $(LZMA_FILES) $(ZLING_FILES) $(QUICKLZ_FILES) $(SNAPPY_FILES) $(ZLIB_FILES) $(LZHAM_FILES) $(LZO_FILES) $(UCL_FILES) $(LZMAT_FILES) $(LZ4_FILES) $(LIBDEFLATE_FILES) $(MISC_FILES) $(NVCOMP_FILES) $(LZBENCH_FILES) +lzbench: $(BSC_FILES) $(BZIP2_FILES) $(DENSITY_FILES) $(FASTLZMA2_OBJ) $(ZSTD_FILES) $(GLZA_FILES) $(LZSSE_FILES) $(LZFSE_FILES) $(XPACK_FILES) $(GIPFELI_FILES) $(XZ_FILES) $(LIBLZG_FILES) $(BRIEFLZ_FILES) $(LZF_FILES) $(LZRW_FILES) $(BROTLI_FILES) $(CSC_FILES) $(LZMA_FILES) $(ZLING_FILES) $(QUICKLZ_FILES) $(SNAPPY_FILES) $(ZLIB_FILES) $(LZHAM_FILES) $(LZO_FILES) $(UCL_FILES) $(LZMAT_FILES) $(LZ4_FILES) $(LIBDEFLATE_FILES) $(MISC_FILES) $(NVCOMP_FILES) $(LZBENCH_FILES) $(CXX) $^ -o $@ $(LDFLAGS) @echo Linked GCC_VERSION=$(GCC_VERSION) CLANG_VERSION=$(CLANG_VERSION) COMPILER=$(COMPILER) @@ -370,4 +393,4 @@ lzbench: $(BZIP2_FILES) $(DENSITY_FILES) $(FASTLZMA2_OBJ) $(ZSTD_FILES) $(GLZA_F $(CXX) $(CFLAGS) $< -c -o $@ clean: - rm -rf lzbench lzbench.exe *.o _lzbench/*.o bzip2/*.o fast-lzma2/*.o slz/*.o zstd/lib/*.o zstd/lib/*.a zstd/lib/common/*.o zstd/lib/compress/*.o zstd/lib/decompress/*.o zstd/lib/dictBuilder/*.o lzsse/lzsse2/*.o lzsse/lzsse4/*.o lzsse/lzsse8/*.o lzfse/*.o xpack/lib/*.o blosclz/*.o gipfeli/*.o xz/*.o xz/common/*.o xz/check/*.o xz/lzma/*.o xz/lz/*.o xz/rangecoder/*.o liblzg/*.o lzlib/*.o brieflz/*.o brotli/common/*.o brotli/enc/*.o brotli/dec/*.o libcsc/*.o wflz/*.o lzjb/*.o lzma/*.o density/buffers/*.o density/algorithms/*.o density/algorithms/cheetah/core/*.o density/algorithms/*.o density/algorithms/lion/forms/*.o density/algorithms/lion/core/*.o density/algorithms/chameleon/core/*.o density/*.o density/structure/*.o pithy/*.o glza/*.o libzling/*.o yappy/*.o shrinker/*.o fastlz/*.o ucl/*.o zlib/*.o lzham/*.o lzmat/*.o lizard/*.o lz4/*.o crush/*.o lzf/*.o lzrw/*.o lzo/*.o snappy/*.o quicklz/*.o tornado/*.o libdeflate/lib/*.o libdeflate/lib/x86/*.o libdeflate/lib/arm/*.o nakamichi/*.o nvcomp/*.o + rm -rf lzbench lzbench.exe *.o _lzbench/*.o bzip2/*.o libbsc/libbsc/adler32/*.o libbsc/libbsc/bwt/libsais/*.o libbsc/libbsc/bwt/*.o libbsc/libbsc/coder/*.o libbsc/libbsc/coder/qlfc/*.o libbsc/libbsc/filters/*.o libbsc/libbsc/libbsc/*.o libbsc/libbsc/lzp/*.o libbsc/libbsc/platform/*.o libbsc/libbsc/st/*.o fast-lzma2/*.o slz/*.o zstd/lib/*.o zstd/lib/*.a zstd/lib/common/*.o zstd/lib/compress/*.o zstd/lib/decompress/*.o zstd/lib/dictBuilder/*.o lzsse/lzsse2/*.o lzsse/lzsse4/*.o lzsse/lzsse8/*.o lzfse/*.o xpack/lib/*.o blosclz/*.o gipfeli/*.o xz/*.o xz/common/*.o xz/check/*.o xz/lzma/*.o xz/lz/*.o xz/rangecoder/*.o liblzg/*.o lzlib/*.o brieflz/*.o brotli/common/*.o brotli/enc/*.o brotli/dec/*.o libcsc/*.o wflz/*.o lzjb/*.o lzma/*.o density/buffers/*.o density/algorithms/*.o density/algorithms/cheetah/core/*.o density/algorithms/*.o density/algorithms/lion/forms/*.o density/algorithms/lion/core/*.o density/algorithms/chameleon/core/*.o density/*.o density/structure/*.o pithy/*.o glza/*.o libzling/*.o yappy/*.o shrinker/*.o fastlz/*.o ucl/*.o zlib/*.o lzham/*.o lzmat/*.o lizard/*.o lz4/*.o crush/*.o lzf/*.o lzrw/*.o lzo/*.o snappy/*.o quicklz/*.o tornado/*.o libdeflate/lib/*.o libdeflate/lib/x86/*.o libdeflate/lib/arm/*.o nakamichi/*.o nvcomp/*.o diff --git a/README.md b/README.md index 4bed5442..90f1ca38 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ see the [CompFuzz Results](https://github.com/nemequ/compfuzz/wiki/Results) page - [blosclz 2.0.0](https://github.com/Blosc/c-blosc2) - [brieflz 1.3.0](https://github.com/jibsen/brieflz) - [brotli 1.0.9](https://github.com/google/brotli) + - [bsc 3.2.4](https://github.com/IlyaGrebnov/libbsc) - [bzip2 1.0.8](http://www.bzip.org/downloads.html) - [crush 1.0](https://sourceforge.net/projects/crush/) - [csc 2016-10-13](https://github.com/fusiyuan2010/CSC) - WARNING: it can throw SEGFAULT compiled with Apple LLVM version 7.3.0 (clang-703.0.31) diff --git a/_lzbench/compressors.cpp b/_lzbench/compressors.cpp index 08039a68..c5045bf1 100644 --- a/_lzbench/compressors.cpp +++ b/_lzbench/compressors.cpp @@ -92,6 +92,73 @@ int64_t lzbench_brotli_decompress(char *inbuf, size_t insize, char *outbuf, size +#ifdef BENCH_HAS_BSC +#include "libbsc/libbsc/libbsc.h" + +char *lzbench_bsc_init(size_t insize, size_t level, size_t) +{ + int features = LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING; + bsc_init(features); + return 0; +} + +int64_t lzbench_bsc_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + int features = LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING; + int lzpHashSize = 15; // -H + int lzpMinLen = 128; // -M + int blockSorter = level == 2 ? 1 : (int)level; // -m, note: 2 doesn't exist, default to 1 + int coder = 1; // -e + + int res = bsc_compress((unsigned char *)inbuf, (unsigned char *)outbuf, (int)insize, lzpHashSize, lzpMinLen, blockSorter, coder, features); + return res; +} + +int64_t lzbench_bsc_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + int features = LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING; + int insize_bsc; + int outsize_bsc; + + bsc_block_info((unsigned char *)inbuf, LIBBSC_HEADER_SIZE, &insize_bsc, &outsize_bsc, features); + bsc_decompress((unsigned char *)inbuf, insize_bsc, (unsigned char *)outbuf, outsize_bsc, features); + return outsize; +} + +char *lzbench_bsc_cuda_init(size_t insize, size_t level, size_t) +{ + int features = LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING | LIBBSC_FEATURE_CUDA; + bsc_init(features); + return 0; +} + +int64_t lzbench_bsc_cuda_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + int features = LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING | LIBBSC_FEATURE_CUDA; + int lzpHashSize = 15; + int lzpMinLen = 128; + int blockSorter = (int)level; + int coder = 1; + + int res = bsc_compress((unsigned char *)inbuf, (unsigned char *)outbuf, (int)insize, lzpHashSize, lzpMinLen, blockSorter, coder, features); + return res; +} + +int64_t lzbench_bsc_cuda_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + int features = LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING | LIBBSC_FEATURE_CUDA; + int insize_bsc; + int outsize_bsc; + + bsc_block_info((unsigned char *)inbuf, LIBBSC_HEADER_SIZE, &insize_bsc, &outsize_bsc, features); + bsc_decompress((unsigned char *)inbuf, insize_bsc, (unsigned char *)outbuf, outsize_bsc, features); + return outsize; +} + +#endif // BENCH_HAS_BSC + + + #ifndef BENCH_REMOVE_BZIP2 #include "bzip2/bzlib.h" diff --git a/_lzbench/compressors.h b/_lzbench/compressors.h index 6141dc31..b31afa50 100644 --- a/_lzbench/compressors.h +++ b/_lzbench/compressors.h @@ -40,6 +40,23 @@ int64_t lzbench_return_0(char *inbuf, size_t insize, char *outbuf, size_t outsiz #endif +#ifdef BENCH_HAS_BSC + char* lzbench_bsc_init(size_t insize, size_t level, size_t); + int64_t lzbench_bsc_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); + int64_t lzbench_bsc_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); + char* lzbench_bsc_cuda_init(size_t insize, size_t level, size_t); + int64_t lzbench_bsc_cuda_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); + int64_t lzbench_bsc_cuda_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); +#else + #define lzbench_bsc_init NULL + #define lzbench_bsc_compress NULL + #define lzbench_bsc_decompress NULL + #define lzbench_bsc_cuda init NULL + #define lzbench_bsc_cuda_compress NULL + #define lzbench_bsc_cuda_decompress NULL +#endif // BENCH_HAS_BSC + + #ifndef BENCH_REMOVE_BZIP2 int64_t lzbench_bzip2_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); int64_t lzbench_bzip2_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); diff --git a/_lzbench/lzbench.h b/_lzbench/lzbench.h index 9a660eb2..2267600c 100644 --- a/_lzbench/lzbench.h +++ b/_lzbench/lzbench.h @@ -137,7 +137,7 @@ typedef struct -#define LZBENCH_COMPRESSOR_COUNT 73 +#define LZBENCH_COMPRESSOR_COUNT 75 static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] = { @@ -147,6 +147,8 @@ static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] = { "brotli", "1.0.9", 0, 11, 0, 0, lzbench_brotli_compress, lzbench_brotli_decompress, NULL, NULL }, { "brotli22", "1.0.9", 0, 11, 22, 0, lzbench_brotli_compress, lzbench_brotli_decompress, NULL, NULL }, { "brotli24", "1.0.9", 0, 11, 24, 0, lzbench_brotli_compress, lzbench_brotli_decompress, NULL, NULL }, + { "bsc", "3.2.4", 1, 6, 0, 0, lzbench_bsc_compress, lzbench_bsc_decompress, lzbench_bsc_init, NULL }, + { "bsc_cuda", "3.2.4", 5, 8, 0, 0, lzbench_bsc_cuda_compress, lzbench_bsc_cuda_decompress, lzbench_bsc_cuda_init, NULL }, { "bzip2", "1.0.8", 1, 9, 0, 0, lzbench_bzip2_compress, lzbench_bzip2_decompress, NULL, NULL }, { "crush", "1.0", 0, 2, 0, 0, lzbench_crush_compress, lzbench_crush_decompress, NULL, NULL }, { "csc", "2016-10-13", 1, 5, 0, 0, lzbench_csc_compress, lzbench_csc_decompress, NULL, NULL }, @@ -225,14 +227,14 @@ static const alias_desc_t alias_desc[LZBENCH_ALIASES_COUNT] = { "fast", "density/fastlz/lizard,10,11,12,13,14/lz4/lz4fast,3,17/lzf/lzfse/lzjb/lzo1b,1/lzo1c,1/lzo1f,1/lzo1x,1/lzo1y,1/" \ "lzrw,1,3,4,5/lzsse4fast/lzsse8fast/lzvn/pithy,0,3,6,9/quicklz,1,2/shrinker/snappy/tornado,1,2,3/zstd,1,2,3,4,5" }, // default alias #if !defined(__arm__) && !defined(__aarch64__) - { "all", "blosclz,1,3,6,9/brieflz,1,3,6,8/brotli,0,2,5,8,11/bzip2,1,5,9/" \ + { "all", "blosclz,1,3,6,9/brieflz,1,3,6,8/brotli,0,2,5,8,11/bsc,1,3,6/bsc_cuda,5,7,8/bzip2,1,5,9/" \ "crush,0,1,2/csc,1,3,5/density,1,2,3/fastlz,1,2/fastlzma2,1,3,5,8,10/gipfeli/libdeflate,1,3,6,9,12/lizard,10,12,15,19,20,22,25,29,30,32,35,39,40,42,45,49/lz4/lz4fast,3,17/lz4hc,1,4,9,12/" \ "lzf,0,1/lzfse/lzg,1,4,6,8/lzham,0,1/lzjb/lzlib,0,3,6,9/lzma,0,2,4,5,9/lzo1/lzo1a/lzo1b,1,3,6,9,99,999/lzo1c,1,3,6,9,99,999/lzo1f/lzo1x/lzo1y/lzo1z/lzo2a/" \ "lzrw,1,3,4,5/lzsse2,1,6,12,16/lzsse4,1,6,12,16/lzsse8,1,6,12,16/lzvn/pithy,0,3,6,9/quicklz,1,2,3/slz_gzip/snappy/tornado,1,2,3,4,5,6,7,10,13,16/" \ "ucl_nrv2b,1,6,9/ucl_nrv2d,1,6,9/ucl_nrv2e,1,6,9/xpack,1,6,9/xz,0,3,6,9/yalz77,1,4,8,12/yappy,1,10,100/zlib,1,6,9/zling,0,1,2,3,4/zstd,1,2,5,8,11,15,18,22/" \ "shrinker/wflz/lzmat" }, // these can SEGFAULT #else - { "all", "blosclz,1,3,6,9/brieflz,1,3,6,8/brotli,0,2,5,8/bzip2,1,5,9/" \ + { "all", "blosclz,1,3,6,9/brieflz,1,3,6,8/brotli,0,2,5,8/bsc,1,3,6/bsc_cuda,5,7,8/bzip2,1,5,9/" \ "crush,0,1,2/csc,1,3,5/density,1,2,3/fastlz,1,2/gipfeli/libdeflate,1,3,6,9,12/lizard,10,12,15,20,22,25,30,32,35,40,42,45/lz4/lz4fast,3,17/lz4hc,1,4,9/" \ "lzf,0,1/lzfse/lzg,1,4,6,8/lzham,0,1/lzjb/lzlib,0,3,6,9/lzma,0,2,4,5/lzo1/lzo1a/lzo1b,1,3,6,9,99,999/lzo1c,1,3,6,9,99,999/lzo1f/lzo1x/lzo1y/lzo1z/lzo2a/" \ "lzrw,1,3,4,5/lzsse2,1,6,12,16/lzsse4,1,6,12,16/lzsse8,1,6,12,16/lzvn/pithy,0,3,6,9/quicklz,1,2,3/slz_gzip/snappy/tornado,1,2,3,4,5,6,7,10,13,16/" \ @@ -250,7 +252,7 @@ static const alias_desc_t alias_desc[LZBENCH_ALIASES_COUNT] = { "lzo1y", "lzo1y,1,999" }, { "lzo", "lzo1/lzo1a/lzo1b/lzo1c/lzo1f/lzo1x/lzo1y/lzo1z/lzo2a" }, { "ucl", "ucl_nrv2b/ucl_nrv2d/ucl_nrv2e" }, - { "cuda", "cudaMemcpy/nvcomp_lz4,0,1,3,5" }, + { "cuda", "cudaMemcpy/nvcomp_lz4,0,1,3,5/bsc_cuda,5,6,7,8" }, }; #endif diff --git a/libbsc/AUTHORS b/libbsc/AUTHORS new file mode 100644 index 00000000..73f67585 --- /dev/null +++ b/libbsc/AUTHORS @@ -0,0 +1,15 @@ +-- Authors of bsc and libbsc + + Ilya Grebnov + +-- This program is based on (at least) the work of + + Yuta Mori, Charles Bloom, Julian Seward, Mike Burrows, Matt Mahoney, + David Wheeler, Sebastian Deorowicz, Florin Ghido, Peter Fenwick, + Michael Schindler, Bulat Ziganshin, Eugene Shelwien, Yann Collet, + Dmitry Shkarin, Mark Adler, Przemyslaw Skibinski, Duane Merrill, + Michael Maniscalco, Jarek Duda, Fabian Giesen, Pascal Massimino, + James K. Bonfield, Nania Francesco. + + + diff --git a/libbsc/CHANGES b/libbsc/CHANGES new file mode 100644 index 00000000..fbdf96d5 --- /dev/null +++ b/libbsc/CHANGES @@ -0,0 +1,113 @@ +Changes in 3.2.4 (18 January, 18 2022) +- Improved performance for AArch64 (ARM64) platform. + +Changes in 3.2.3 (September, 30 2021) +- Fixed various out-of-bound memory access bugs found by LibFuzzer. +- Fixed data corruption issue found by LibFuzzer. +- Due to these fixes, an upgrade to this version is strongly recommended. + +Changes in 3.2.2 (September, 18 2021) +- Improved performance of LZP algorithm. + +Changes in 3.2.1 (September, 17 2021) +- Improved performance of LZP algorithm. + +Changes in 3.2.0 (September, 10 2021) +- New BWT / ST post-coder for fast compression and decompression. + +Changes in 3.1.9 (August, 25 2021) +- Updated makefile to use Clang compiler and AVX2 instruction set for maximum performance. +- Slightly improved compression and decompression performance. + +Changes in 3.1.8 (August, 18 2021) +- Slightly improved compression performance. + +Changes in 3.1.7 (August, 15 2021) +- Slightly improved compression performance. + +Changes in 3.1.6 (August, 12 2021) +- Slightly improved decompression performance. + +Changes in 3.1.5 (August, 10 2021) +- Improved Adler-32 performance with SIMD (SSSE3). +- Improved reverse MTF performance with SIMD (SSE4.1). + +Changes in 3.1.4 (August, 4 2021) +- Implemented dynamic CPU Dispatching to SSE2, AVX and AVX2. +- Further improved forward MTF performance. + +Changes in 3.1.3 (July, 14 2021) +- Maximum compression block size increased to 2047 megabytes +- Improved forward MTF performance with SIMD (SSE2) + +Changes in 3.1.2 (July, 14 2021) +- Improved reverse BWT performance with libsais 2.4.0 + +Changes in 3.1.1 (June, 24 2021) +- divsufsort library is replaced with libsais 2.3.0 +- back40computing library is replaced with cub from CUDA Toolkit 11.3 + +Changes in 3.1.0 (July 8, 2012) +- Added Kepler GPU support with CUDA Toolkit 4.2 + +Changes in 3.0.0 (August 26, 2011) +- NVIDIA GPU acceleration of forward ST algorithms +- Added Sort Transform of order 7 & 8 (GPU only) + +Changes in 2.8.0 (August 8, 2011) +- Added parallel version of LZP algorithm +- Large RAM pages (2 MB) support for Windows +- Improved performance of ST and BWT algorithms + +Changes in 2.7.0 (June 5, 2011) +- Improved performance of LZP algorithm + +Changes in 2.6.1 (May 4, 2011) +- Fixed bug in segmentation algorithm + +Changes in 2.6.0 (April 30, 2011) +- Added Sort Transform of order 6 + +Changes in 2.5.0 (March 20, 2011) +- Some minor performance improvments +- CRC32 replaced with Adler32 + +Changes in 2.4.5 (January 3, 2011) +- Improved performance of reverse BWT and ST algorithms + +Changes in 2.4.0 (October 18, 2010) +- Improved performance of reverse BWT and ST algorithms + +Changes in 2.3.0 (August 9, 2010) +- Improved performance of QLFC algorithm + +Changes in 2.2.5 (July 5, 2010) +- Added parallel version of segmentation algorithm + +Changes in 2.2.0 (June 15, 2010) +- Added parallel version of reverse BWT transform +- Added parallel version of forward ST transform + +Changes in 2.1.5 (June 1, 2010) +- Improved multi-core systems support +- Improved segmentation algorithm + +Changes in 2.1.0 (May 17, 2010) +- Added GNU C++ compiler support +- Added makefile + +Changes in 2.0.0 (May 3, 2010) +- Released source code under LGPL license +- Added multi-core systems support +- Added fast "-f" compression mode +- Added Sort Transform of order 3 + +Changes in 1.0.3 (April 11, 2010) +- Fixed bug in block-sorting algorithm +- Added support for large files(>2Gb long) + +Changes in 1.0.1 (April 8, 2010) +- Decreased memory usage from 6 to 5 times per block size + +Changes in 1.0.0 (April 7, 2010) +- First public version for community technology preview diff --git a/libbsc/LICENSE b/libbsc/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/libbsc/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/libbsc/README b/libbsc/README new file mode 100644 index 00000000..1e7d0f50 --- /dev/null +++ b/libbsc/README @@ -0,0 +1,71 @@ +Introduction: +------------- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + +bsc is a high performance file compressor based on lossless, +block-sorting data compression algorithms. + +libbsc is a library based on bsc, it uses the same algorithms +as bsc and enables you to compress memory blocks. + +Copyright (c) 2009-2021 Ilya Grebnov + +See file AUTHORS for a full list of contributors. + +See the bsc and libbsc web site: + http://libbsc.com/ for more information. + + +Software License: +----------------- + +Copyright (c) 2009-2021 Ilya Grebnov + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + + +Memory usage: +------------- + +bsc compresses large files in blocks. Multiple blocks can be processed in +parallel on multiple-core CPU. At decompression time, the block size used +for compression is read from the header of the compressed file. The block +size and number of blocks processed in parallel affects both the compression +ratio achieved, and the amount of memory needed for compression and decompression. +Compression and decompression requirements are the same and in bytes, can +be estimated as 16Mb + 5 x block size x number of blocks processed in parallel. + +GPU memory usage for NVIDIA CUDA technology is different from CPU memory usage +and can be estimated as 20 x block size. + + +NVIDIA GPU acceleration: +------------------------ + +1. libbsc uses NVIDIA CUDA technology, resulting in a performance boost on computers +with NVIDIA GPU of compute capability 3.5 or higher. Lists of supported GPUs +can be found on the NVIDIA website http://developer.nvidia.com/cuda-gpus. +You also need to install latest graphics drivers that support CUDA. + +2. Individual kernels are limited to a 2-second runtime by Windows. Kernels that run for +longer than 2 seconds will trigger the Timeout Detection and Recovery (TDR) mechanism. +Detailed information on disabling the Windows TDR is available at: +http://msdn.microsoft.com/en-us/windows/hardware/gg487368.aspx#E2 diff --git a/libbsc/VERSION b/libbsc/VERSION new file mode 100644 index 00000000..9b7a431d --- /dev/null +++ b/libbsc/VERSION @@ -0,0 +1 @@ +3.2.4 \ No newline at end of file diff --git a/libbsc/bsc.cpp b/libbsc/bsc.cpp new file mode 100644 index 00000000..9aebc918 --- /dev/null +++ b/libbsc/bsc.cpp @@ -0,0 +1,885 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Block Sorting Compressor */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include +#include +#include +#include + +#include "libbsc/libbsc.h" +#include "libbsc/filters.h" +#include "libbsc/platform/platform.h" + +#pragma pack(push, 1) + +#define LIBBSC_CONTEXTS_AUTODETECT 3 + +unsigned char bscFileSign[4] = {'b', 's', 'c', 0x31}; + +typedef struct BSC_BLOCK_HEADER +{ + long long blockOffset; + signed char recordSize; + signed char sortingContexts; +} BSC_BLOCK_HEADER; + +#pragma pack(pop) + +int paramBlockSize = 25 * 1024 * 1024; +int paramBlockSorter = LIBBSC_BLOCKSORTER_BWT; +int paramCoder = LIBBSC_CODER_QLFC_STATIC; +int paramSortingContexts = LIBBSC_CONTEXTS_FOLLOWING; + +int paramEnableParallelProcessing = 1; +int paramEnableMultiThreading = 1; +int paramEnableFastMode = 1; +int paramEnableLargePages = 0; +int paramEnableCUDA = 0; +int paramEnableSegmentation = 0; +int paramEnableReordering = 0; +int paramEnableLZP = 1; +int paramLZPHashSize = 15; +int paramLZPMinLen = 128; + +int paramFeatures() +{ + int features = + (paramEnableFastMode ? LIBBSC_FEATURE_FASTMODE : LIBBSC_FEATURE_NONE) | + (paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE) | + (paramEnableLargePages ? LIBBSC_FEATURE_LARGEPAGES : LIBBSC_FEATURE_NONE) | + (paramEnableCUDA ? LIBBSC_FEATURE_CUDA : LIBBSC_FEATURE_NONE) + ; + + return features; +} + +#if defined(__GNUC__) && (defined(_GLIBCXX_USE_LFS) || defined(__MINGW32__)) + #define BSC_FSEEK fseeko64 + #define BSC_FTELL ftello64 + #define BSC_FILEOFFSET off64_t +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define BSC_FSEEK _fseeki64 + #define BSC_FTELL _ftelli64 + #define BSC_FILEOFFSET __int64 +#else + #define BSC_FSEEK fseek + #define BSC_FTELL ftell + #define BSC_FILEOFFSET long +#endif + +#if defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__) || defined(_MSC_VER) + #include + double BSC_CLOCK() { return 0.001 * GetTickCount(); } +#elif defined (__unix) || defined (__linux__) || defined (__QNX__) || defined (_AIX) || defined (__NetBSD__) || defined(macintosh) || defined (_MAC) + #include + double BSC_CLOCK() { timeval tv; gettimeofday(&tv, 0); return tv.tv_sec + tv.tv_usec * 0.000001; } +#else + double BSC_CLOCK() { return (double)clock() / CLOCKS_PER_SEC; } +#endif + +int segmentedBlock[256]; + +void Compression(char * argv[]) +{ + if (!paramEnableLZP) + { + paramLZPHashSize = 0; + paramLZPMinLen = 0; + } + + FILE * fInput = fopen(argv[2], "rb"); + if (fInput == NULL) + { + fprintf(stderr, "Can't open input file: %s!\n", argv[2]); + exit(1); + } + + FILE * fOutput = fopen(argv[3], "wb"); + if (fOutput == NULL) + { + fprintf(stderr, "Can't create output file: %s!\n", argv[3]); + exit(1); + } + + if (BSC_FSEEK(fInput, 0, SEEK_END)) + { + fprintf(stderr, "IO error on file: %s!\n", argv[2]); + exit(1); + } + + BSC_FILEOFFSET fileSize = BSC_FTELL(fInput); + if (fileSize < 0) + { + fprintf(stderr, "IO error on file: %s!\n", argv[2]); + exit(1); + } + + if (BSC_FSEEK(fInput, 0, SEEK_SET)) + { + fprintf(stderr, "IO error on file: %s!\n", argv[2]); + exit(1); + } + + if (paramBlockSize > fileSize) + { + paramBlockSize = (int)fileSize; + } + + if (fwrite(bscFileSign, sizeof(bscFileSign), 1, fOutput) != 1) + { + fprintf(stderr, "IO error on file: %s!\n", argv[3]); + exit(1); + } + + int nBlocks = paramBlockSize > 0 ? (int)((fileSize + paramBlockSize - 1) / paramBlockSize) : 0; + if (fwrite(&nBlocks, sizeof(nBlocks), 1, fOutput) != 1) + { + fprintf(stderr, "IO error on file: %s!\n", argv[3]); + exit(1); + } + + double startTime = BSC_CLOCK(); + +#ifdef LIBBSC_OPENMP + + int numThreads = 1; + if (paramEnableParallelProcessing) + { + numThreads = omp_get_max_threads(); + if (numThreads <= nBlocks) paramEnableMultiThreading = 0; + if (numThreads >= nBlocks) numThreads = nBlocks; + } + +#endif + + int segmentationStart = 0, segmentationEnd = 0; + +#ifdef LIBBSC_OPENMP + #pragma omp parallel num_threads(numThreads) if(numThreads > 1) +#endif + { + unsigned char * buffer = (unsigned char *)bsc_malloc(paramBlockSize + LIBBSC_HEADER_SIZE); + if (buffer == NULL) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + + fprintf(stderr, "Not enough memory! Please check README file for more information.\n"); + exit(2); + } + } + + while (true) + { + BSC_FILEOFFSET blockOffset = 0; + int dataSize = 0; + +#ifdef LIBBSC_OPENMP + #pragma omp critical(input) +#endif + { + if ((feof(fInput) == 0) && (BSC_FTELL(fInput) != fileSize)) + { +#ifdef LIBBSC_OPENMP + #pragma omp master +#endif + { + double progress = (100.0 * (double)BSC_FTELL(fInput)) / fileSize; + fprintf(stdout, "\rCompressing %.55s(%02d%%)", argv[2], (int)progress); + fflush(stdout); + } + + blockOffset = BSC_FTELL(fInput); + + int currentBlockSize = paramBlockSize; + if (paramEnableSegmentation) + { + if (segmentationEnd - segmentationStart > 1) currentBlockSize = segmentedBlock[segmentationStart]; + } + + dataSize = (int)fread(buffer, 1, currentBlockSize, fInput); + if (dataSize <= 0) + { + fprintf(stderr, "\nIO error on file: %s!\n", argv[2]); + exit(1); + } + + if (paramEnableSegmentation) + { + bool bSegmentation = false; + + if (segmentationStart == segmentationEnd) bSegmentation = true; + if ((segmentationEnd - segmentationStart == 1) && (dataSize != segmentedBlock[segmentationStart])) bSegmentation = true; + + if (bSegmentation) + { + segmentationStart = 0; segmentationEnd = bsc_detect_segments(buffer, dataSize, segmentedBlock, 256, paramFeatures()); + if (segmentationEnd <= LIBBSC_NO_ERROR) + { + switch (segmentationEnd) + { + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); break; + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + + int newDataSize = segmentedBlock[segmentationStart++]; + if (dataSize != newDataSize) + { + BSC_FILEOFFSET pos = BSC_FTELL(fInput) - dataSize + newDataSize; + BSC_FSEEK(fInput, pos, SEEK_SET); + dataSize = newDataSize; + } + } + } + } + + if (dataSize == 0) break; + + signed char recordSize = 1; + if (paramEnableReordering) + { + recordSize = bsc_detect_recordsize(buffer, dataSize, paramFeatures()); + if (recordSize < LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + switch (recordSize) + { + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); break; + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + if (recordSize > 1) + { + int result = bsc_reorder_forward(buffer, dataSize, recordSize, paramFeatures()); + if (result != LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + switch (result) + { + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); break; + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + } + } + + signed char sortingContexts = paramSortingContexts; + if (paramSortingContexts == LIBBSC_CONTEXTS_AUTODETECT) + { + sortingContexts = bsc_detect_contextsorder(buffer, dataSize, paramFeatures()); + if (sortingContexts < LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + switch (sortingContexts) + { + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory!\n"); break; + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + } + if (sortingContexts == LIBBSC_CONTEXTS_PRECEDING) + { + int result = bsc_reverse_block(buffer, dataSize, paramFeatures()); + if (result != LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + fprintf(stderr, "\nInternal program error, please contact the author!\n"); + exit(2); + } + } + } + + int blockSize = bsc_compress(buffer, buffer, dataSize, paramLZPHashSize, paramLZPMinLen, paramBlockSorter, paramCoder, paramFeatures()); + if (blockSize == LIBBSC_NOT_COMPRESSIBLE) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(input) +#endif + { + sortingContexts = LIBBSC_CONTEXTS_FOLLOWING; recordSize = 1; + + BSC_FILEOFFSET pos = BSC_FTELL(fInput); + { + BSC_FSEEK(fInput, blockOffset, SEEK_SET); + if (dataSize != (int)fread(buffer, 1, dataSize, fInput)) + { + fprintf(stderr, "\nInternal program error, please contact the author!\n"); + exit(2); + } + } + BSC_FSEEK(fInput, pos, SEEK_SET); + } + + blockSize = bsc_store(buffer, buffer, dataSize, paramFeatures()); + } + if (blockSize < LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + switch (blockSize) + { + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); break; + case LIBBSC_NOT_SUPPORTED : fprintf(stderr, "\nSpecified compression method is not supported on this platform!\n"); break; + case LIBBSC_GPU_ERROR : fprintf(stderr, "\nGeneral GPU failure! Please check README file for more information.\n"); break; + case LIBBSC_GPU_NOT_SUPPORTED : fprintf(stderr, "\nYour GPU is not supported! Please check README file for more information.\n"); break; + case LIBBSC_GPU_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough GPU memory! Please check README file for more information.\n"); break; + + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + +#ifdef LIBBSC_OPENMP + #pragma omp critical(output) +#endif + { + BSC_BLOCK_HEADER header = {blockOffset, recordSize, sortingContexts}; + + if (fwrite(&header, sizeof(BSC_BLOCK_HEADER), 1, fOutput) != 1) + { + fprintf(stderr, "\nIO error on file: %s!\n", argv[3]); + exit(1); + } + + if ((int)fwrite(buffer, 1, blockSize, fOutput) != blockSize) + { + fprintf(stderr, "\nIO error on file: %s!\n", argv[3]); + exit(1); + } + } + + } + + bsc_free(buffer); + } + + fprintf(stdout, "\r%.55s compressed %.0f into %.0f in %.3f seconds.\n", argv[2], (double)fileSize, (double)BSC_FTELL(fOutput), BSC_CLOCK() - startTime); + + fclose(fInput); fclose(fOutput); +} + +void Decompression(char * argv[]) +{ + FILE * fInput = fopen(argv[2], "rb"); + if (fInput == NULL) + { + fprintf(stderr, "Can't open input file: %s!\n", argv[2]); + exit(1); + } + + FILE * fOutput = fopen(argv[3], "wb"); + if (fOutput == NULL) + { + fprintf(stderr, "Can't create output file: %s!\n", argv[3]); + exit(1); + } + + if (BSC_FSEEK(fInput, 0, SEEK_END)) + { + fprintf(stderr, "IO error on file: %s!\n", argv[2]); + exit(1); + } + + BSC_FILEOFFSET fileSize = BSC_FTELL(fInput); + if (fileSize < 0) + { + fprintf(stderr, "IO error on file: %s!\n", argv[2]); + exit(1); + } + + if (BSC_FSEEK(fInput, 0, SEEK_SET)) + { + fprintf(stderr, "IO error on file: %s!\n", argv[2]); + exit(1); + } + + unsigned char inputFileSign[sizeof(bscFileSign)]; + + if (fread(inputFileSign, sizeof(bscFileSign), 1, fInput) != 1) + { + fprintf(stderr, "This is not bsc archive!\n"); + exit(1); + } + + if (memcmp(inputFileSign, bscFileSign, sizeof(bscFileSign)) != 0) + { + fprintf(stderr, "This is not bsc archive or invalid compression method!\n"); + exit(2); + } + + int nBlocks = 0; + if (fread(&nBlocks, sizeof(nBlocks), 1, fInput) != 1) + { + fprintf(stderr, "This is not bsc archive!\n"); + exit(1); + } + + double startTime = BSC_CLOCK(); + +#ifdef LIBBSC_OPENMP + + int numThreads = 1; + if (paramEnableParallelProcessing) + { + numThreads = omp_get_max_threads(); + if (numThreads <= nBlocks) paramEnableMultiThreading = 0; + if (numThreads >= nBlocks) numThreads = nBlocks; + } + + #pragma omp parallel num_threads(numThreads) if(numThreads > 1) +#endif + { + int bufferSize = -1; unsigned char * buffer = NULL; + + while (true) + { + BSC_FILEOFFSET blockOffset = 0; + + signed char sortingContexts = 0; + signed char recordSize = 0; + int blockSize = 0; + int dataSize = 0; + +#ifdef LIBBSC_OPENMP + #pragma omp critical(input) +#endif + { + if ((feof(fInput) == 0) && (BSC_FTELL(fInput) != fileSize)) + { +#ifdef LIBBSC_OPENMP + #pragma omp master +#endif + { + double progress = (100.0 * (double)BSC_FTELL(fInput)) / fileSize; + fprintf(stdout, "\rDecompressing %.55s(%02d%%)", argv[2], (int)progress); + fflush(stdout); + } + + BSC_BLOCK_HEADER header = {0, 0, 0}; + if (fread(&header, sizeof(BSC_BLOCK_HEADER), 1, fInput) != 1) + { + fprintf(stderr, "\nUnexpected end of file: %s!\n", argv[2]); + exit(1); + } + + recordSize = header.recordSize; + if (recordSize < 1) + { + fprintf(stderr, "\nThis is not bsc archive or invalid compression method!\n"); + exit(2); + } + + sortingContexts = header.sortingContexts; + if ((sortingContexts != LIBBSC_CONTEXTS_FOLLOWING) && (sortingContexts != LIBBSC_CONTEXTS_PRECEDING)) + { + fprintf(stderr, "\nThis is not bsc archive or invalid compression method!\n"); + exit(2); + } + + blockOffset = (BSC_FILEOFFSET)header.blockOffset; + + unsigned char bscBlockHeader[LIBBSC_HEADER_SIZE]; + + if (fread(bscBlockHeader, LIBBSC_HEADER_SIZE, 1, fInput) != 1) + { + fprintf(stderr, "\nUnexpected end of file: %s!\n", argv[2]); + exit(1); + } + + if (bsc_block_info(bscBlockHeader, LIBBSC_HEADER_SIZE, &blockSize, &dataSize, paramFeatures()) != LIBBSC_NO_ERROR) + { + fprintf(stderr, "\nThis is not bsc archive or invalid compression method!\n"); + exit(2); + } + + if ((blockSize > bufferSize) || (dataSize > bufferSize)) + { + if (blockSize > bufferSize) bufferSize = blockSize; + if (dataSize > bufferSize) bufferSize = dataSize; + + if (buffer != NULL) { bsc_free(buffer); } buffer = (unsigned char *)bsc_malloc(bufferSize); + } + + if (buffer == NULL) + { + fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); + exit(2); + } + + memcpy(buffer, bscBlockHeader, LIBBSC_HEADER_SIZE); + + if (fread(buffer + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE, 1, fInput) != 1) + { + fprintf(stderr, "\nUnexpected end of file: %s!\n", argv[2]); + exit(1); + } + } + } + + if (dataSize == 0) break; + + int result = bsc_decompress(buffer, blockSize, buffer, dataSize, paramFeatures()); + if (result < LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + switch (result) + { + case LIBBSC_DATA_CORRUPT : fprintf(stderr, "\nThe compressed data is corrupted!\n"); break; + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); break; + case LIBBSC_GPU_ERROR : fprintf(stderr, "\nGeneral GPU failure! Please check README file for more information.\n"); break; + case LIBBSC_GPU_NOT_SUPPORTED : fprintf(stderr, "\nYour GPU is not supported! Please check README file for more information.\n"); break; + case LIBBSC_GPU_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough GPU memory! Please check README file for more information.\n"); break; + + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + + if (sortingContexts == LIBBSC_CONTEXTS_PRECEDING) + { + result = bsc_reverse_block(buffer, dataSize, paramFeatures()); + if (result != LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + fprintf(stderr, "\nInternal program error, please contact the author!\n"); + exit(2); + } + } + } + + if (recordSize > 1) + { + result = bsc_reorder_reverse(buffer, dataSize, recordSize, paramFeatures()); + if (result != LIBBSC_NO_ERROR) + { +#ifdef LIBBSC_OPENMP + #pragma omp critical(print) +#endif + { + switch (result) + { + case LIBBSC_NOT_ENOUGH_MEMORY : fprintf(stderr, "\nNot enough memory! Please check README file for more information.\n"); break; + default : fprintf(stderr, "\nInternal program error, please contact the author!\n"); + } + exit(2); + } + } + } + +#ifdef LIBBSC_OPENMP + #pragma omp critical(output) +#endif + { + if (BSC_FSEEK(fOutput, blockOffset, SEEK_SET)) + { + fprintf(stderr, "\nIO error on file: %s!\n", argv[3]); + exit(1); + } + + if ((int)fwrite(buffer, 1, dataSize, fOutput) != dataSize) + { + fprintf(stderr, "\nIO error on file: %s!\n", argv[3]); + exit(1); + } + } + } + + if (buffer != NULL) bsc_free(buffer); + } + + if (BSC_FSEEK(fOutput, 0, SEEK_END)) + { + fprintf(stderr, "IO error on file: %s!\n", argv[3]); + exit(1); + } + + fprintf(stdout, "\r%.55s decompressed %.0f into %.0f in %.3f seconds.\n", argv[2], (double)fileSize, (double)BSC_FTELL(fOutput), BSC_CLOCK() - startTime); + + fclose(fInput); fclose(fOutput); +} + +void ShowUsage(void) +{ +#if !defined(BSC_DECOMPRESSION_ONLY) + fprintf(stdout, "Usage: bsc inputfile outputfile \n\n"); +#elif defined(LIBBSC_CUDA_SUPPORT) || defined(_WIN32) || defined(LIBBSC_OPENMP) + fprintf(stdout, "Usage: bsc d inputfile outputfile \n\n"); +#else + fprintf(stdout, "Usage: bsc d inputfile outputfile\n\n"); +#endif + +#if !defined(BSC_DECOMPRESSION_ONLY) + fprintf(stdout, "Block sorting options:\n"); + fprintf(stdout, " -b Block size in megabytes, default: -b25\n"); + fprintf(stdout, " minimum: -b1, maximum: -b2047\n"); + fprintf(stdout, " -m Block sorting algorithm, default: -m0\n"); + fprintf(stdout, " -m0 Burrows Wheeler Transform (default)\n"); +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + fprintf(stdout, " -m3..8 Sort Transform of order n\n"); +#endif + fprintf(stdout, " -c Contexts for sorting, default: -cf\n"); + fprintf(stdout, " -cf Following contexts (default)\n"); + fprintf(stdout, " -cp Preceding contexts\n"); + fprintf(stdout, " -ca Autodetect (experimental)\n"); + fprintf(stdout, " -e Entropy encoding algorithm, default: -e1\n"); + fprintf(stdout, " -e0 Fast Quantized Local Frequency Coding\n"); + fprintf(stdout, " -e1 Static Quantized Local Frequency Coding (default)\n"); + fprintf(stdout, " -e2 Adaptive Quantized Local Frequency Coding (best compression)\n"); + + fprintf(stdout, "\nPreprocessing options:\n"); + fprintf(stdout, " -p Disable all preprocessing techniques\n"); + fprintf(stdout, " -s Enable segmentation (adaptive block size), default: disable\n"); + fprintf(stdout, " -r Enable structured data reordering, default: disable\n"); + fprintf(stdout, " -l Enable Lempel-Ziv preprocessing, default: enable\n"); + fprintf(stdout, " -H LZP dictionary size in bits, default: -H15\n"); + fprintf(stdout, " minimum: -H10, maximum: -H28\n"); + fprintf(stdout, " -M LZP minimum match length, default: -M128\n"); + fprintf(stdout, " minimum: -M4, maximum: -M255\n\n"); +#endif + +#if defined(LIBBSC_CUDA_SUPPORT) || defined(_WIN32) || defined(LIBBSC_OPENMP) + fprintf(stdout, "Platform specific options:\n"); +#ifdef LIBBSC_CUDA_SUPPORT + fprintf(stdout, " -G Enable Sort Transform acceleration on NVIDIA GPU, default: disable\n"); +#endif +#ifdef _WIN32 + fprintf(stdout, " -P Enable large 2MB RAM pages, default: disable\n"); +#endif +#ifdef LIBBSC_OPENMP + fprintf(stdout, " -t Disable parallel blocks processing, default: enable\n"); + fprintf(stdout, " -T Disable multi-core systems support, default: enable\n"); +#endif + fprintf(stdout, "\n"); +#endif + +#if !defined(BSC_DECOMPRESSION_ONLY) || defined(LIBBSC_CUDA_SUPPORT) || defined(_WIN32) || defined(LIBBSC_OPENMP) + fprintf(stdout,"Options may be combined into one, like -b128p -m5e1\n"); +#endif + + exit(0); +} + +void ProcessSwitch(char * s) +{ + if (*s == 0) + { + ShowUsage(); + } + + for (; *s != 0; ) + { + switch (*s++) + { + case 'b': + { + char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++; + paramBlockSize = atoi(strNum) * 1024 * 1024; + if ((paramBlockSize < 1024 * 1024) || (paramBlockSize > 2047 * 1024 * 1024)) ShowUsage(); + break; + } + + case 'm': + { + char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++; + switch (atoi(strNum)) + { + case 0 : paramBlockSorter = LIBBSC_BLOCKSORTER_BWT; break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + case 3 : paramBlockSorter = LIBBSC_BLOCKSORTER_ST3; break; + case 4 : paramBlockSorter = LIBBSC_BLOCKSORTER_ST4; break; + case 5 : paramBlockSorter = LIBBSC_BLOCKSORTER_ST5; break; + case 6 : paramBlockSorter = LIBBSC_BLOCKSORTER_ST6; break; + case 7 : paramBlockSorter = LIBBSC_BLOCKSORTER_ST7; paramEnableCUDA = 1; break; + case 8 : paramBlockSorter = LIBBSC_BLOCKSORTER_ST8; paramEnableCUDA = 1; break; +#endif + + default : ShowUsage(); + } + break; + } + + case 'c': + { + switch (*s++) + { + case 'f' : paramSortingContexts = LIBBSC_CONTEXTS_FOLLOWING; break; + case 'p' : paramSortingContexts = LIBBSC_CONTEXTS_PRECEDING; break; + case 'a' : paramSortingContexts = LIBBSC_CONTEXTS_AUTODETECT; break; + default : ShowUsage(); + } + break; + } + + case 'e': + { + switch (*s++) + { + case '0' : paramCoder = LIBBSC_CODER_QLFC_FAST; break; + case '1' : paramCoder = LIBBSC_CODER_QLFC_STATIC; break; + case '2' : paramCoder = LIBBSC_CODER_QLFC_ADAPTIVE; break; + default : ShowUsage(); + } + break; + } + + case 'H': + { + char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++; + paramLZPHashSize = atoi(strNum); + if ((paramLZPHashSize < 10) || (paramLZPHashSize > 28)) ShowUsage(); + break; + } + + case 'M': + { + char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++; + paramLZPMinLen = atoi(strNum); + if ((paramLZPMinLen < 4) || (paramLZPMinLen > 255)) ShowUsage(); + break; + } + + case 'l': paramEnableLZP = 1; break; + case 's': paramEnableSegmentation = 1; break; + case 'r': paramEnableReordering = 1; break; + + case 'p': paramEnableLZP = paramEnableSegmentation = paramEnableReordering = 0; break; + +#ifdef LIBBSC_OPENMP + case 't': paramEnableParallelProcessing = 0; break; + case 'T': paramEnableParallelProcessing = paramEnableMultiThreading = 0; break; +#endif + +#ifdef LIBBSC_CUDA_SUPPORT + case 'G': paramEnableCUDA = 1; break; +#endif + +#ifdef _WIN32 + case 'P': paramEnableLargePages = 1; break; +#endif + + default : ShowUsage(); + } + } +} + +void ProcessCommandline(int argc, char * argv[]) +{ + if (argc < 4 || strlen(argv[1]) != 1) + { + ShowUsage(); + } + + for (int i = 4; i < argc; ++i) + { + if (argv[i][0] == '-') + { + ProcessSwitch(&argv[i][1]); + } + else + { + ShowUsage(); + } + } +} + +int main(int argc, char * argv[]) +{ + fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.2.4. 18 January 2022.\n"); + fprintf(stdout, "Copyright (c) 2009-2021 Ilya Grebnov .\n\n"); + +#if defined(_OPENMP) && defined(__INTEL_COMPILER) + + kmp_set_warnings_off(); + +#endif + + ProcessCommandline(argc, argv); + + if (bsc_init(paramFeatures()) != LIBBSC_NO_ERROR) + { + fprintf(stderr, "\nInternal program error, please contact the author!\n"); + exit(2); + } + + switch (*argv[1]) + { +#if !defined(BSC_DECOMPRESSION_ONLY) + case 'e' : case 'E' : Compression(argv); break; +#endif + case 'd' : case 'D' : Decompression(argv); break; + default : ShowUsage(); + } + + return 0; +} + +/*-----------------------------------------------------------*/ +/* End bsc.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/adler32/adler32.cpp b/libbsc/libbsc/adler32/adler32.cpp new file mode 100644 index 00000000..ea0381a3 --- /dev/null +++ b/libbsc/libbsc/adler32/adler32.cpp @@ -0,0 +1,210 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Adler-32 checksum functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include +#include +#include + +#include "adler32.h" + +#include "../platform/platform.h" +#include "../libbsc.h" + +#define BASE 65521UL +#define NMAX 5536 + +#define DO1(buf, i) { sum1 += (buf)[i]; sum2 += sum1; } +#define DO2(buf, i) DO1(buf, i); DO1(buf, i + 1); +#define DO4(buf, i) DO2(buf, i); DO2(buf, i + 2); +#define DO8(buf, i) DO4(buf, i); DO4(buf, i + 4); +#define DO16(buf) DO8(buf, 0); DO8(buf, 8); +#define MOD(a) a %= BASE + +#if defined(LIBBSC_DYNAMIC_CPU_DISPATCH) + unsigned int bsc_adler32(const unsigned char * T, int n, int features); + unsigned int bsc_adler32_avx(const unsigned char * T, int n, int features); + unsigned int bsc_adler32_ssse3(const unsigned char * T, int n, int features); + unsigned int bsc_adler32_sse2(const unsigned char * T, int n, int features); + + #if LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE2 + unsigned int bsc_adler32(const unsigned char * T, int n, int features) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX) { return bsc_adler32_avx (T, n, features); } + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_SSSE3) { return bsc_adler32_ssse3(T, n, features); } + + return bsc_adler32_sse2(T, n, features); + } + #endif + + #if LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_AVX + #define ADLER32_FUNCTION_NAME bsc_adler32_avx + #elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSSE3 + #define ADLER32_FUNCTION_NAME bsc_adler32_ssse3 + #elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE2 + #define ADLER32_FUNCTION_NAME bsc_adler32_sse2 + #endif +#else + #define ADLER32_FUNCTION_NAME bsc_adler32 +#endif + +#if defined(ADLER32_FUNCTION_NAME) + +#define make_uint32x4(d0, d1, d2, d3) vcombine_u32(vcreate_u32(((unsigned long long)(d0) << 0) + ((unsigned long long)(d1) << 32)), vcreate_u32(((unsigned long long)(d2) << 0) + ((unsigned long long)(d3) << 32))) +#define make_uint16x4(w0, w1, w2, w3) vcreate_u16(((unsigned long long)(w0) << 0) + ((unsigned long long)(w1) << 16) + ((unsigned long long)(w2) << 32) + ((unsigned long long)(w3) << 48)) + +unsigned int ADLER32_FUNCTION_NAME (const unsigned char * T, int n, int features) +{ + unsigned int sum1 = 1; + unsigned int sum2 = 0; + +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSSE3 || LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + + while ((((uintptr_t)T & 15) != 0) && n > 0) + { + DO1(T, 0); T += 1; n -= 1; + } + +#endif + + while (n >= NMAX) + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSSE3 + const __m128i tap1 = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); + const __m128i tap2 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + const __m128i zero = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + const __m128i ones = _mm_setr_epi16(1, 1, 1, 1, 1, 1, 1, 1); + + __m128i v_ps = _mm_set_epi32(0, 0, 0, sum1 * (NMAX / 32)); + __m128i v_s2 = _mm_set_epi32(0, 0, 0, sum2); + __m128i v_s1 = _mm_set_epi32(0, 0, 0, 0); + + for (int i = 0; i < NMAX / 32; ++i) + { + const __m128i bytes1 = _mm_load_si128((__m128i *)(T)); + const __m128i bytes2 = _mm_load_si128((__m128i *)(T + 16)); + + v_ps = _mm_add_epi32(v_ps, v_s1); + + v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes1, zero)); + v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(_mm_maddubs_epi16(bytes1, tap1), ones)); + + v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes2, zero)); + v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(_mm_maddubs_epi16(bytes2, tap2), ones)); + + T += 32; + } + + v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5)); + + v_s1 = _mm_add_epi32(v_s1, _mm_shuffle_epi32(v_s1, _MM_SHUFFLE(2, 3, 0, 1))); + v_s1 = _mm_add_epi32(v_s1, _mm_shuffle_epi32(v_s1, _MM_SHUFFLE(1, 0, 3, 2))); + sum1 += _mm_cvtsi128_si32(v_s1); + + v_s2 = _mm_add_epi32(v_s2, _mm_shuffle_epi32(v_s2, _MM_SHUFFLE(2, 3, 0, 1))); + v_s2 = _mm_add_epi32(v_s2, _mm_shuffle_epi32(v_s2, _MM_SHUFFLE(1, 0, 3, 2))); + sum2 = _mm_cvtsi128_si32(v_s2); +#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + + unsigned int sum1_n = sum1 * (NMAX / 32); + + uint32x4_t v_s2 = make_uint32x4(0, 0, 0, sum1_n); + uint32x4_t v_s1 = make_uint32x4(0, 0, 0, 0); + uint16x8_t v_column_sum_1 = vdupq_n_u16(0); + uint16x8_t v_column_sum_2 = vdupq_n_u16(0); + uint16x8_t v_column_sum_3 = vdupq_n_u16(0); + uint16x8_t v_column_sum_4 = vdupq_n_u16(0); + + for (int i = 0; i < NMAX / 32; ++i) + { + const uint8x16_t bytes1 = vld1q_u8((uint8_t *)(T)); + const uint8x16_t bytes2 = vld1q_u8((uint8_t *)(T + 16)); + + v_s2 = vaddq_u32(v_s2, v_s1); + v_s1 = vpadalq_u16(v_s1, vpadalq_u8(vpaddlq_u8(bytes1), bytes2)); + + v_column_sum_1 = vaddw_u8(v_column_sum_1, vget_low_u8(bytes1)); + v_column_sum_2 = vaddw_u8(v_column_sum_2, vget_high_u8(bytes1)); + v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2)); + v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2)); + + T += 32; + } + + v_s2 = vshlq_n_u32(v_s2, 5); + + v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_1), make_uint16x4(32, 31, 30, 29)); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), make_uint16x4(28, 27, 26, 25)); + v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_2), make_uint16x4(24, 23, 22, 21)); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), make_uint16x4(20, 19, 18, 17)); + v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_3), make_uint16x4(16, 15, 14, 13)); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), make_uint16x4(12, 11, 10, 9)); + v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_column_sum_4), make_uint16x4( 8, 7, 6, 5)); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), make_uint16x4( 4, 3, 2, 1)); + + uint32x2_t v_sum1 = vpadd_u32(vget_low_u32(v_s1), vget_high_u32(v_s1)); + uint32x2_t v_sum2 = vpadd_u32(vget_low_u32(v_s2), vget_high_u32(v_s2)); + uint32x2_t v_s1s2 = vpadd_u32(v_sum1, v_sum2); + + sum1 += vget_lane_u32(v_s1s2, 0); + sum2 += vget_lane_u32(v_s1s2, 1); + +#else + for (int i = 0; i < NMAX / 16; ++i) + { + DO16(T); T += 16; + } +#endif + + MOD(sum1); MOD(sum2); n -= NMAX; + } + + while (n >= 16) + { + DO16(T); T += 16; n -= 16; + } + + while (n > 0) + { + DO1(T, 0); T += 1; n -= 1; + } + + MOD(sum1); MOD(sum2); + + return sum1 | (sum2 << 16); +} + +#endif + +/*-----------------------------------------------------------*/ +/* End adler32.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/adler32/adler32.h b/libbsc/libbsc/adler32/adler32.h new file mode 100644 index 00000000..f9458bf0 --- /dev/null +++ b/libbsc/libbsc/adler32/adler32.h @@ -0,0 +1,57 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to Adler-32 checksum functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_ADLER32_H +#define _LIBBSC_ADLER32_H + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * Calculates Adler-32 checksum for input memory block. + * @param T - the input memory block of n bytes. + * @param n - the length of the input memory block. + * @param features - the set of additional features. + * @return the value of cyclic redundancy check. + */ + unsigned int bsc_adler32(const unsigned char * T, int n, int features); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End adler32.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/bwt/bwt.cpp b/libbsc/libbsc/bwt/bwt.cpp new file mode 100644 index 00000000..a8678ebf --- /dev/null +++ b/libbsc/libbsc/bwt/bwt.cpp @@ -0,0 +1,129 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Burrows Wheeler Transform */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include + +#include "bwt.h" + +#include "../platform/platform.h" +#include "../libbsc.h" + +#include "libsais/libsais.h" + +int bsc_bwt_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features) +{ + if (int * RESTRICT A = (int *)bsc_malloc(n * sizeof(int))) + { + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + } + +#ifdef LIBBSC_OPENMP + int index = libsais_bwt_aux_omp(T, T, A, n, 0, NULL, mod + 1, indexes, (features & LIBBSC_FEATURE_MULTITHREADING) > 0 ? 0 : 1); +#else + int index = libsais_bwt_aux(T, T, A, n, 0, NULL, mod + 1, indexes); +#endif + + bsc_free(A); + + switch (index) + { + case -1 : return LIBBSC_BAD_PARAMETER; + case -2 : return LIBBSC_NOT_ENOUGH_MEMORY; + } + + num_indexes[0] = (unsigned char)((n - 1) / (mod + 1)); + index = indexes[0]; for (int t = 0; t < num_indexes[0]; ++t) indexes[t] = indexes[t + 1] - 1; + + return index; + } + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_bwt_decode(unsigned char * T, int n, int index, unsigned char num_indexes, int * indexes, int features) +{ + if ((T == NULL) || (n < 0) || (index <= 0) || (index > n)) + { + return LIBBSC_BAD_PARAMETER; + } + if (n <= 1) + { + return LIBBSC_NO_ERROR; + } + if (int * P = (int *)bsc_malloc((n + 1) * sizeof(int))) + { + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + } + + if (num_indexes == (unsigned char)((n - 1) / (mod + 1)) && indexes != NULL) + { + int I[256]; I[0] = index; for (int t = 0; t < num_indexes; ++t) { I[t + 1] = indexes[t] + 1; } + +#ifdef LIBBSC_OPENMP + index = libsais_unbwt_aux_omp(T, T, P, n, NULL, mod + 1, I, (features & LIBBSC_FEATURE_MULTITHREADING) > 0 ? 0 : 1); +#else + index = libsais_unbwt_aux(T, T, P, n, NULL, mod + 1, I); +#endif + } + else + { +#ifdef LIBBSC_OPENMP + index = libsais_unbwt_omp(T, T, P, n, NULL, index, (features & LIBBSC_FEATURE_MULTITHREADING) > 0 ? 0 : 1); +#else + index = libsais_unbwt(T, T, P, n, NULL, index); +#endif + } + + bsc_free(P); + + switch (index) + { + case -1 : return LIBBSC_BAD_PARAMETER; + case -2 : return LIBBSC_NOT_ENOUGH_MEMORY; + } + + return LIBBSC_NO_ERROR; + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +/*-----------------------------------------------------------*/ +/* End bwt.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/bwt/bwt.h b/libbsc/libbsc/bwt/bwt.h new file mode 100644 index 00000000..6b9bb7b2 --- /dev/null +++ b/libbsc/libbsc/bwt/bwt.h @@ -0,0 +1,71 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to Burrows Wheeler Transform */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_BWT_H +#define _LIBBSC_BWT_H + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * Constructs the burrows wheeler transformed string of a given string. + * @param T - the input/output string of n chars. + * @param n - the length of the given string. + * @param num_indexes - the length of secondary indexes array, can be NULL. + * @param indexes - the secondary indexes array, can be NULL. + * @param features - the set of additional features. + * @return the primary index if no error occurred, error code otherwise. + */ + int bsc_bwt_encode(unsigned char * T, int n, unsigned char * num_indexes, int * indexes, int features); + + /** + * Reconstructs the original string from burrows wheeler transformed string. + * @param T - the input/output string of n chars. + * @param n - the length of the given string. + * @param index - the primary index. + * @param num_indexes - the length of secondary indexes array, can be 0. + * @param indexes - the secondary indexes array, can be NULL. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_bwt_decode(unsigned char * T, int n, int index, unsigned char num_indexes, int * indexes, int features); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End bwt.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/bwt/libsais/VERSION b/libbsc/libbsc/bwt/libsais/VERSION new file mode 100644 index 00000000..68167133 --- /dev/null +++ b/libbsc/libbsc/bwt/libsais/VERSION @@ -0,0 +1 @@ +2.6.5 \ No newline at end of file diff --git a/libbsc/libbsc/bwt/libsais/libsais.c b/libbsc/libbsc/bwt/libsais/libsais.c new file mode 100644 index 00000000..c1e9505d --- /dev/null +++ b/libbsc/libbsc/bwt/libsais/libsais.c @@ -0,0 +1,7654 @@ +/*-- + +This file is a part of libsais, a library for linear time +suffix array and burrows wheeler transform construction. + + Copyright (c) 2021-2022 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information. + +--*/ + +/*-- + +Changes made to the original file: + - July 14, 2021 Switched to internal bsc malloc / free functions. + +--*/ + +#include +#include +#include +#include +#include + +#include "libsais.h" + +#include "../../platform/platform.h" + +#undef INLINE +#undef RESTRICT +#undef ALPHABET_SIZE + +#if defined(_OPENMP) + #include +#else + #define UNUSED(_x) (void)(_x) +#endif + +typedef int32_t sa_sint_t; +typedef uint32_t sa_uint_t; +typedef ptrdiff_t fast_sint_t; +typedef size_t fast_uint_t; + +#define SAINT_BIT (32) +#define SAINT_MAX INT32_MAX +#define SAINT_MIN INT32_MIN + +#define ALPHABET_SIZE (1 << CHAR_BIT) +#define UNBWT_FASTBITS (17) + +#define SUFFIX_GROUP_BIT (SAINT_BIT - 1) +#define SUFFIX_GROUP_MARKER (((sa_sint_t)1) << (SUFFIX_GROUP_BIT - 1)) + +#define BUCKETS_INDEX2(_c, _s) (((_c) << 1) + (_s)) +#define BUCKETS_INDEX4(_c, _s) (((_c) << 2) + (_s)) + +#define LIBSAIS_PER_THREAD_CACHE_SIZE (24576) + +typedef struct LIBSAIS_THREAD_CACHE +{ + sa_sint_t symbol; + sa_sint_t index; +} LIBSAIS_THREAD_CACHE; + +typedef union LIBSAIS_THREAD_STATE +{ + struct + { + fast_sint_t position; + fast_sint_t count; + + fast_sint_t m; + fast_sint_t last_lms_suffix; + + sa_sint_t * buckets; + LIBSAIS_THREAD_CACHE * cache; + } state; + + uint8_t padding[64]; +} LIBSAIS_THREAD_STATE; + +typedef struct LIBSAIS_CONTEXT +{ + sa_sint_t * buckets; + LIBSAIS_THREAD_STATE * thread_state; + fast_sint_t threads; +} LIBSAIS_CONTEXT; + +typedef struct LIBSAIS_UNBWT_CONTEXT +{ + sa_uint_t * bucket2; + uint16_t * fastbits; + sa_uint_t * buckets; + fast_sint_t threads; +} LIBSAIS_UNBWT_CONTEXT; + +#if defined(__GNUC__) || defined(__clang__) + #define RESTRICT __restrict__ +#elif defined(_MSC_VER) || defined(__INTEL_COMPILER) + #define RESTRICT __restrict +#else + #error Your compiler, configuration or platform is not supported. +#endif + +#if defined(__has_builtin) + #if __has_builtin(__builtin_prefetch) + #define HAS_BUILTIN_PREFECTCH + #endif +#elif defined(__GNUC__) && ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4) + #define HAS_BUILTIN_PREFECTCH +#endif + +#if defined(__has_builtin) + #if __has_builtin(__builtin_bswap16) + #define HAS_BUILTIN_BSWAP16 + #endif +#elif defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) || (__GNUC__ >= 5) + #define HAS_BUILTIN_BSWAP16 +#endif + +#if defined(HAS_BUILTIN_PREFECTCH) + #define libsais_prefetch(address) __builtin_prefetch((const void *)(address), 0, 0) + #define libsais_prefetchw(address) __builtin_prefetch((const void *)(address), 1, 0) +#elif defined (_M_IX86) || defined (_M_AMD64) + #include + #define libsais_prefetch(address) _mm_prefetch((const void *)(address), _MM_HINT_NTA) + #define libsais_prefetchw(address) _m_prefetchw((const void *)(address)) +#elif defined (_M_ARM) + #include + #define libsais_prefetch(address) __prefetch((const void *)(address)) + #define libsais_prefetchw(address) __prefetchw((const void *)(address)) +#elif defined (_M_ARM64) + #include + #define libsais_prefetch(address) __prefetch2((const void *)(address), 1) + #define libsais_prefetchw(address) __prefetch2((const void *)(address), 17) +#else + #error Your compiler, configuration or platform is not supported. +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) + #if defined(_LITTLE_ENDIAN) \ + || (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN) \ + || (defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN) \ + || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) \ + || (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define __LITTLE_ENDIAN__ + #elif defined(_BIG_ENDIAN) \ + || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN) \ + || (defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN) \ + || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) \ + || (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define __BIG_ENDIAN__ + #elif defined(_WIN32) + #define __LITTLE_ENDIAN__ + #endif +#endif + +#if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) + #if defined(HAS_BUILTIN_BSWAP16) + #define libsais_bswap16(x) (__builtin_bswap16(x)) + #elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #define libsais_bswap16(x) (_byteswap_ushort(x)) + #else + #define libsais_bswap16(x) ((uint16_t)(x >> 8) | (uint16_t)(x << 8)) + #endif +#elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__) + #define libsais_bswap16(x) (x) +#else + #error Your compiler, configuration or platform is not supported. +#endif + +static void * libsais_align_up(const void * address, size_t alignment) +{ + return (void *)((((ptrdiff_t)address) + ((ptrdiff_t)alignment) - 1) & (-((ptrdiff_t)alignment))); +} + +static void * libsais_alloc_aligned(size_t size, size_t alignment) +{ + void * address = bsc_malloc(size + sizeof(short) + alignment - 1); + if (address != NULL) + { + void * aligned_address = libsais_align_up((void *)((ptrdiff_t)address + (ptrdiff_t)(sizeof(short))), alignment); + ((short *)aligned_address)[-1] = (short)((ptrdiff_t)aligned_address - (ptrdiff_t)address); + + return aligned_address; + } + + return NULL; +} + +static void libsais_free_aligned(void * aligned_address) +{ + if (aligned_address != NULL) + { + bsc_free((void *)((ptrdiff_t)aligned_address - ((short *)aligned_address)[-1])); + } +} + +static LIBSAIS_THREAD_STATE * libsais_alloc_thread_state(sa_sint_t threads) +{ + LIBSAIS_THREAD_STATE * RESTRICT thread_state = (LIBSAIS_THREAD_STATE *)libsais_alloc_aligned((size_t)threads * sizeof(LIBSAIS_THREAD_STATE), 4096); + sa_sint_t * RESTRICT thread_buckets = (sa_sint_t *)libsais_alloc_aligned((size_t)threads * 4 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096); + LIBSAIS_THREAD_CACHE * RESTRICT thread_cache = (LIBSAIS_THREAD_CACHE *)libsais_alloc_aligned((size_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE * sizeof(LIBSAIS_THREAD_CACHE), 4096); + + if (thread_state != NULL && thread_buckets != NULL && thread_cache != NULL) + { + fast_sint_t t; + for (t = 0; t < threads; ++t) + { + thread_state[t].state.buckets = thread_buckets; thread_buckets += 4 * ALPHABET_SIZE; + thread_state[t].state.cache = thread_cache; thread_cache += LIBSAIS_PER_THREAD_CACHE_SIZE; + } + + return thread_state; + } + + libsais_free_aligned(thread_cache); + libsais_free_aligned(thread_buckets); + libsais_free_aligned(thread_state); + return NULL; +} + +static void libsais_free_thread_state(LIBSAIS_THREAD_STATE * thread_state) +{ + if (thread_state != NULL) + { + libsais_free_aligned(thread_state[0].state.cache); + libsais_free_aligned(thread_state[0].state.buckets); + libsais_free_aligned(thread_state); + } +} + +static LIBSAIS_CONTEXT * libsais_create_ctx_main(sa_sint_t threads) +{ + LIBSAIS_CONTEXT * RESTRICT ctx = (LIBSAIS_CONTEXT *)libsais_alloc_aligned(sizeof(LIBSAIS_CONTEXT), 64); + sa_sint_t * RESTRICT buckets = (sa_sint_t *)libsais_alloc_aligned(8 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096); + LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL; + + if (ctx != NULL && buckets != NULL && (thread_state != NULL || threads == 1)) + { + ctx->buckets = buckets; + ctx->threads = threads; + ctx->thread_state = thread_state; + + return ctx; + } + + libsais_free_thread_state(thread_state); + libsais_free_aligned(buckets); + libsais_free_aligned(ctx); + return NULL; +} + +static void libsais_free_ctx_main(LIBSAIS_CONTEXT * ctx) +{ + if (ctx != NULL) + { + libsais_free_thread_state(ctx->thread_state); + libsais_free_aligned(ctx->buckets); + libsais_free_aligned(ctx); + } +} + +#if defined(_OPENMP) + +static sa_sint_t libsais_count_negative_marked_suffixes(sa_sint_t * RESTRICT SA, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + sa_sint_t count = 0; + + fast_sint_t i; for (i = omp_block_start; i < omp_block_start + omp_block_size; ++i) { count += (SA[i] < 0); } + + return count; +} + +static sa_sint_t libsais_count_zero_marked_suffixes(sa_sint_t * RESTRICT SA, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + sa_sint_t count = 0; + + fast_sint_t i; for (i = omp_block_start; i < omp_block_start + omp_block_size; ++i) { count += (SA[i] == 0); } + + return count; +} + +static void libsais_place_cached_suffixes(sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&cache[i + 2 * prefetch_distance]); + + libsais_prefetchw(&SA[cache[i + prefetch_distance + 0].symbol]); + libsais_prefetchw(&SA[cache[i + prefetch_distance + 1].symbol]); + libsais_prefetchw(&SA[cache[i + prefetch_distance + 2].symbol]); + libsais_prefetchw(&SA[cache[i + prefetch_distance + 3].symbol]); + + SA[cache[i + 0].symbol] = cache[i + 0].index; + SA[cache[i + 1].symbol] = cache[i + 1].index; + SA[cache[i + 2].symbol] = cache[i + 2].index; + SA[cache[i + 3].symbol] = cache[i + 3].index; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + SA[cache[i].symbol] = cache[i].index; + } +} + +static void libsais_compact_and_place_cached_suffixes(sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j, l; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j; i += 4) + { + libsais_prefetchw(&cache[i + prefetch_distance]); + + cache[l] = cache[i + 0]; l += cache[l].symbol >= 0; + cache[l] = cache[i + 1]; l += cache[l].symbol >= 0; + cache[l] = cache[i + 2]; l += cache[l].symbol >= 0; + cache[l] = cache[i + 3]; l += cache[l].symbol >= 0; + } + + for (j += 3; i < j; i += 1) + { + cache[l] = cache[i]; l += cache[l].symbol >= 0; + } + + libsais_place_cached_suffixes(SA, cache, omp_block_start, l - omp_block_start); +} + +static void libsais_accumulate_counts_s32_2(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s]; } +} + +static void libsais_accumulate_counts_s32_3(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s]; } +} + +static void libsais_accumulate_counts_s32_4(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + sa_sint_t * RESTRICT bucket03 = bucket02 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s] + bucket03[s]; } +} + +static void libsais_accumulate_counts_s32_5(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + sa_sint_t * RESTRICT bucket03 = bucket02 - bucket_stride; + sa_sint_t * RESTRICT bucket04 = bucket03 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s] + bucket03[s] + bucket04[s]; } +} + +static void libsais_accumulate_counts_s32_6(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + sa_sint_t * RESTRICT bucket03 = bucket02 - bucket_stride; + sa_sint_t * RESTRICT bucket04 = bucket03 - bucket_stride; + sa_sint_t * RESTRICT bucket05 = bucket04 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s] + bucket03[s] + bucket04[s] + bucket05[s]; } +} + +static void libsais_accumulate_counts_s32_7(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + sa_sint_t * RESTRICT bucket03 = bucket02 - bucket_stride; + sa_sint_t * RESTRICT bucket04 = bucket03 - bucket_stride; + sa_sint_t * RESTRICT bucket05 = bucket04 - bucket_stride; + sa_sint_t * RESTRICT bucket06 = bucket05 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s] + bucket03[s] + bucket04[s] + bucket05[s] + bucket06[s]; } +} + +static void libsais_accumulate_counts_s32_8(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + sa_sint_t * RESTRICT bucket03 = bucket02 - bucket_stride; + sa_sint_t * RESTRICT bucket04 = bucket03 - bucket_stride; + sa_sint_t * RESTRICT bucket05 = bucket04 - bucket_stride; + sa_sint_t * RESTRICT bucket06 = bucket05 - bucket_stride; + sa_sint_t * RESTRICT bucket07 = bucket06 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s] + bucket03[s] + bucket04[s] + bucket05[s] + bucket06[s] + bucket07[s]; } +} + +static void libsais_accumulate_counts_s32_9(sa_sint_t * RESTRICT bucket00, fast_sint_t bucket_size, fast_sint_t bucket_stride) +{ + sa_sint_t * RESTRICT bucket01 = bucket00 - bucket_stride; + sa_sint_t * RESTRICT bucket02 = bucket01 - bucket_stride; + sa_sint_t * RESTRICT bucket03 = bucket02 - bucket_stride; + sa_sint_t * RESTRICT bucket04 = bucket03 - bucket_stride; + sa_sint_t * RESTRICT bucket05 = bucket04 - bucket_stride; + sa_sint_t * RESTRICT bucket06 = bucket05 - bucket_stride; + sa_sint_t * RESTRICT bucket07 = bucket06 - bucket_stride; + sa_sint_t * RESTRICT bucket08 = bucket07 - bucket_stride; + fast_sint_t s; for (s = 0; s < bucket_size; s += 1) { bucket00[s] = bucket00[s] + bucket01[s] + bucket02[s] + bucket03[s] + bucket04[s] + bucket05[s] + bucket06[s] + bucket07[s] + bucket08[s]; } +} + +static void libsais_accumulate_counts_s32(sa_sint_t * RESTRICT buckets, fast_sint_t bucket_size, fast_sint_t bucket_stride, fast_sint_t num_buckets) +{ + while (num_buckets >= 9) + { + libsais_accumulate_counts_s32_9(buckets - (num_buckets - 9) * bucket_stride, bucket_size, bucket_stride); num_buckets -= 8; + } + + switch (num_buckets) + { + case 1: break; + case 2: libsais_accumulate_counts_s32_2(buckets, bucket_size, bucket_stride); break; + case 3: libsais_accumulate_counts_s32_3(buckets, bucket_size, bucket_stride); break; + case 4: libsais_accumulate_counts_s32_4(buckets, bucket_size, bucket_stride); break; + case 5: libsais_accumulate_counts_s32_5(buckets, bucket_size, bucket_stride); break; + case 6: libsais_accumulate_counts_s32_6(buckets, bucket_size, bucket_stride); break; + case 7: libsais_accumulate_counts_s32_7(buckets, bucket_size, bucket_stride); break; + case 8: libsais_accumulate_counts_s32_8(buckets, bucket_size, bucket_stride); break; + } +} + +#endif + +static void libsais_gather_lms_suffixes_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, fast_sint_t m, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + if (omp_block_size > 0) + { + const fast_sint_t prefetch_distance = 128; + + fast_sint_t i, j = omp_block_start + omp_block_size, c0 = T[omp_block_start + omp_block_size - 1], c1 = -1; + + while (j < n && (c1 = T[j]) == c0) { ++j; } + + fast_uint_t s = c0 >= c1; + + for (i = omp_block_start + omp_block_size - 2, j = omp_block_start + 3; i >= j; i -= 4) + { + libsais_prefetch(&T[i - prefetch_distance]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 0); m -= ((s & 3) == 1); + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 1); m -= ((s & 3) == 1); + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 2); m -= ((s & 3) == 1); + } + + for (j -= 3; i >= j; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + } + + SA[m] = (sa_sint_t)(i + 1); + } +} + +static void libsais_gather_lms_suffixes_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536 && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_gather_lms_suffixes_8u(T, SA, n, (fast_sint_t)n - 1, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + fast_sint_t t, m = 0; for (t = omp_num_threads - 1; t > omp_thread_num; --t) { m += thread_state[t].state.m; } + + libsais_gather_lms_suffixes_8u(T, SA, n, (fast_sint_t)n - 1 - m, omp_block_start, omp_block_size); + + #pragma omp barrier + + if (thread_state[omp_thread_num].state.m > 0) + { + SA[(fast_sint_t)n - 1 - m] = (sa_sint_t)thread_state[omp_thread_num].state.last_lms_suffix; + } + } +#endif + } +} + +static sa_sint_t libsais_gather_lms_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t i = n - 2; + sa_sint_t m = n - 1; + fast_uint_t s = 1; + fast_sint_t c0 = T[n - 1]; + fast_sint_t c1 = 0; + + for (; i >= 3; i -= 4) + { + libsais_prefetch(&T[i - prefetch_distance]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = i + 1; m -= ((s & 3) == 1); + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = i - 0; m -= ((s & 3) == 1); + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = i - 1; m -= ((s & 3) == 1); + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = i - 2; m -= ((s & 3) == 1); + } + + for (; i >= 0; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = i + 1; m -= ((s & 3) == 1); + } + + return n - 1 - m; +} + +static sa_sint_t libsais_gather_compacted_lms_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t i = n - 2; + sa_sint_t m = n - 1; + fast_uint_t s = 1; + fast_sint_t c0 = T[n - 1]; + fast_sint_t c1 = 0; + + for (; i >= 3; i -= 4) + { + libsais_prefetch(&T[i - prefetch_distance]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = i + 1; m -= ((fast_sint_t)(s & 3) == (c0 >= 0)); + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = i - 0; m -= ((fast_sint_t)(s & 3) == (c1 >= 0)); + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = i - 1; m -= ((fast_sint_t)(s & 3) == (c0 >= 0)); + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = i - 2; m -= ((fast_sint_t)(s & 3) == (c1 >= 0)); + } + + for (; i >= 0; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = i + 1; m -= ((fast_sint_t)(s & 3) == (c1 >= 0)); + } + + return n - 1 - m; +} + +#if defined(_OPENMP) + +static void libsais_count_lms_suffixes_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, 4 * (size_t)k * sizeof(sa_sint_t)); + + sa_sint_t i = n - 2; + fast_uint_t s = 1; + fast_sint_t c0 = T[n - 1]; + fast_sint_t c1 = 0; + + for (; i >= prefetch_distance + 3; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 0], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 1], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 2], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 3], 0)]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + } + + for (; i >= 0; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + } + + buckets[BUCKETS_INDEX4((fast_uint_t)c0, (s << 1) & 3)]++; +} + +#endif + +static void libsais_count_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t)); + + sa_sint_t i = n - 2; + fast_uint_t s = 1; + fast_sint_t c0 = T[n - 1]; + fast_sint_t c1 = 0; + + for (; i >= prefetch_distance + 3; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 0], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 1], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 2], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 3], 0)]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + for (; i >= 0; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + buckets[BUCKETS_INDEX2((fast_uint_t)c0, 0)]++; +} + +#if defined(_OPENMP) + +static void libsais_count_compacted_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t)); + + sa_sint_t i = n - 2; + fast_uint_t s = 1; + fast_sint_t c0 = T[n - 1]; + fast_sint_t c1 = 0; + + for (; i >= prefetch_distance + 3; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 0] & SAINT_MAX, 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 1] & SAINT_MAX, 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 2] & SAINT_MAX, 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 3] & SAINT_MAX, 0)]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + c0 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + c1 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + c0 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + c1 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + for (; i >= 0; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + c1 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + c0 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c0, 0)]++; +} + +#endif + +static sa_sint_t libsais_count_and_gather_lms_suffixes_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + memset(buckets, 0, 4 * ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t m = omp_block_start + omp_block_size - 1; + + if (omp_block_size > 0) + { + const fast_sint_t prefetch_distance = 128; + + fast_sint_t i, j = m + 1, c0 = T[m], c1 = -1; + + while (j < n && (c1 = T[j]) == c0) { ++j; } + + fast_uint_t s = c0 >= c1; + + for (i = m - 1, j = omp_block_start + 3; i >= j; i -= 4) + { + libsais_prefetch(&T[i - prefetch_distance]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 0); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 2); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + } + + for (j -= 3; i >= j; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + } + + c1 = (i >= 0) ? T[i] : -1; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + } + + return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m); +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t m = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536 && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + m = libsais_count_and_gather_lms_suffixes_8u(T, SA, n, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.position = omp_block_start + omp_block_size; + thread_state[omp_thread_num].state.m = libsais_count_and_gather_lms_suffixes_8u(T, SA, n, thread_state[omp_thread_num].state.buckets, omp_block_start, omp_block_size); + + if (thread_state[omp_thread_num].state.m > 0) + { + thread_state[omp_thread_num].state.last_lms_suffix = SA[thread_state[omp_thread_num].state.position - 1]; + } + } + + #pragma omp barrier + + #pragma omp master + { + memset(buckets, 0, 4 * ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + m += (sa_sint_t)thread_state[t].state.m; + + if (t != omp_num_threads - 1 && thread_state[t].state.m > 0) + { + memcpy(&SA[n - m], &SA[thread_state[t].state.position - thread_state[t].state.m], (size_t)thread_state[t].state.m * sizeof(sa_sint_t)); + } + + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t s; for (s = 0; s < 4 * ALPHABET_SIZE; s += 1) { sa_sint_t A = buckets[s], B = temp_bucket[s]; buckets[s] = A + B; temp_bucket[s] = A; } + } + } + } + } +#endif + } + + return m; +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + memset(buckets, 0, 4 * (size_t)k * sizeof(sa_sint_t)); + + fast_sint_t m = omp_block_start + omp_block_size - 1; + + if (omp_block_size > 0) + { + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j = m + 1, c0 = T[m], c1 = -1; + + while (j < n && (c1 = T[j]) == c0) { ++j; } + + fast_uint_t s = c0 >= c1; + + for (i = m - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 0], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 1], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 2], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX4(T[i - prefetch_distance - 3], 0)]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 0); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 2); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + } + + for (j -= prefetch_distance + 3; i >= j; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]++; + } + + c1 = (i >= 0) ? T[i] : -1; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX4((fast_uint_t)c0, s & 3)]++; + } + + return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m); +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t)); + + fast_sint_t m = omp_block_start + omp_block_size - 1; + + if (omp_block_size > 0) + { + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j = m + 1, c0 = T[m], c1 = -1; + + while (j < n && (c1 = T[j]) == c0) { ++j; } + + fast_uint_t s = c0 >= c1; + + for (i = m - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 0], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 1], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 2], 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 3], 0)]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 0); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 2); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + for (j -= prefetch_distance + 3; i >= j; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + c1 = (i >= 0) ? T[i] : -1; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((s & 3) == 1); + buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + } + + return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m); +} + +static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t)); + + fast_sint_t m = omp_block_start + omp_block_size - 1; + + if (omp_block_size > 0) + { + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j = m + 1, c0 = T[m], c1 = -1; + + while (j < n && (c1 = T[j]) == c0) { ++j; } + + fast_uint_t s = c0 >= c1; + + for (i = m - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 0] & SAINT_MAX, 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 1] & SAINT_MAX, 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 2] & SAINT_MAX, 0)]); + libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 3] & SAINT_MAX, 0)]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((fast_sint_t)(s & 3) == (c0 >= 0)); + c0 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 0); m -= ((fast_sint_t)(s & 3) == (c1 >= 0)); + c1 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 1); m -= ((fast_sint_t)(s & 3) == (c0 >= 0)); + c0 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i - 2); m -= ((fast_sint_t)(s & 3) == (c1 >= 0)); + c1 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + for (j -= prefetch_distance + 3; i >= j; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((fast_sint_t)(s & 3) == (c1 >= 0)); + c1 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c1, (s & 3) == 1)]++; + } + + c1 = (i >= 0) ? T[i] : -1; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); SA[m] = (sa_sint_t)(i + 1); m -= ((fast_sint_t)(s & 3) == (c0 >= 0)); + c0 &= SAINT_MAX; buckets[BUCKETS_INDEX2((fast_uint_t)c0, (s & 3) == 1)]++; + } + + return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m); +} + +#if defined(_OPENMP) + +static fast_sint_t libsais_get_bucket_stride(fast_sint_t free_space, fast_sint_t bucket_size, fast_sint_t num_buckets) +{ + fast_sint_t bucket_size_1024 = (bucket_size + 1023) & (-1024); if (free_space / (num_buckets - 1) >= bucket_size_1024) { return bucket_size_1024; } + fast_sint_t bucket_size_16 = (bucket_size + 15) & (-16); if (free_space / (num_buckets - 1) >= bucket_size_16) { return bucket_size_16; } + + return bucket_size; +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_fs_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t m = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + m = libsais_count_and_gather_lms_suffixes_32s_4k(T, SA, n, k, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + fast_sint_t bucket_size = 4 * (fast_sint_t)k; + fast_sint_t bucket_stride = libsais_get_bucket_stride(buckets - &SA[n], bucket_size, omp_num_threads); + + { + thread_state[omp_thread_num].state.position = omp_block_start + omp_block_size; + thread_state[omp_thread_num].state.count = libsais_count_and_gather_lms_suffixes_32s_4k(T, SA, n, k, buckets - (omp_thread_num * bucket_stride), omp_block_start, omp_block_size); + } + + #pragma omp barrier + + if (omp_thread_num == omp_num_threads - 1) + { + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + m += (sa_sint_t)thread_state[t].state.count; + + if (t != omp_num_threads - 1 && thread_state[t].state.count > 0) + { + memcpy(&SA[n - m], &SA[thread_state[t].state.position - thread_state[t].state.count], (size_t)thread_state[t].state.count * sizeof(sa_sint_t)); + } + } + } + else + { + omp_num_threads = omp_num_threads - 1; + omp_block_stride = (bucket_size / omp_num_threads) & (-16); + omp_block_start = omp_thread_num * omp_block_stride; + omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : bucket_size - omp_block_start; + + libsais_accumulate_counts_s32(buckets + omp_block_start, omp_block_size, bucket_stride, omp_num_threads + 1); + } + } +#endif + } + + return m; +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_fs_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t m = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + m = libsais_count_and_gather_lms_suffixes_32s_2k(T, SA, n, k, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + fast_sint_t bucket_size = 2 * (fast_sint_t)k; + fast_sint_t bucket_stride = libsais_get_bucket_stride(buckets - &SA[n], bucket_size, omp_num_threads); + + { + thread_state[omp_thread_num].state.position = omp_block_start + omp_block_size; + thread_state[omp_thread_num].state.count = libsais_count_and_gather_lms_suffixes_32s_2k(T, SA, n, k, buckets - (omp_thread_num * bucket_stride), omp_block_start, omp_block_size); + } + + #pragma omp barrier + + if (omp_thread_num == omp_num_threads - 1) + { + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + m += (sa_sint_t)thread_state[t].state.count; + + if (t != omp_num_threads - 1 && thread_state[t].state.count > 0) + { + memcpy(&SA[n - m], &SA[thread_state[t].state.position - thread_state[t].state.count], (size_t)thread_state[t].state.count * sizeof(sa_sint_t)); + } + } + } + else + { + omp_num_threads = omp_num_threads - 1; + omp_block_stride = (bucket_size / omp_num_threads) & (-16); + omp_block_start = omp_thread_num * omp_block_stride; + omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : bucket_size - omp_block_start; + + libsais_accumulate_counts_s32(buckets + omp_block_start, omp_block_size, bucket_stride, omp_num_threads + 1); + } + } +#endif + } + + return m; +} + +static void libsais_count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_count_and_gather_compacted_lms_suffixes_32s_2k(T, SA, n, k, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + fast_sint_t bucket_size = 2 * (fast_sint_t)k; + fast_sint_t bucket_stride = libsais_get_bucket_stride(buckets - &SA[n + n], bucket_size, omp_num_threads); + + { + thread_state[omp_thread_num].state.position = omp_block_start + omp_block_size; + thread_state[omp_thread_num].state.count = libsais_count_and_gather_compacted_lms_suffixes_32s_2k(T, SA + n, n, k, buckets - (omp_thread_num * bucket_stride), omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t t, m = 0; for (t = omp_num_threads - 1; t >= omp_thread_num; --t) { m += (sa_sint_t)thread_state[t].state.count; } + + if (thread_state[omp_thread_num].state.count > 0) + { + memcpy(&SA[n - m], &SA[n + thread_state[omp_thread_num].state.position - thread_state[omp_thread_num].state.count], (size_t)thread_state[omp_thread_num].state.count * sizeof(sa_sint_t)); + } + } + + { + omp_block_stride = (bucket_size / omp_num_threads) & (-16); + omp_block_start = omp_thread_num * omp_block_stride; + omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : bucket_size - omp_block_start; + + libsais_accumulate_counts_s32(buckets + omp_block_start, omp_block_size, bucket_stride, omp_num_threads); + } + } +#endif + } +} + +#endif + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_nofs_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads) +{ + sa_sint_t m = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(2) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); + + fast_sint_t omp_num_threads = 1; +#endif + if (omp_num_threads == 1) + { + m = libsais_count_and_gather_lms_suffixes_32s_4k(T, SA, n, k, buckets, 0, n); + } +#if defined(_OPENMP) + else if (omp_thread_num == 0) + { + libsais_count_lms_suffixes_32s_4k(T, n, k, buckets); + } + else + { + m = libsais_gather_lms_suffixes_32s(T, SA, n); + } +#endif + } + + return m; +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_nofs_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads) +{ + sa_sint_t m = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(2) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); + + fast_sint_t omp_num_threads = 1; +#endif + if (omp_num_threads == 1) + { + m = libsais_count_and_gather_lms_suffixes_32s_2k(T, SA, n, k, buckets, 0, n); + } +#if defined(_OPENMP) + else if (omp_thread_num == 0) + { + libsais_count_lms_suffixes_32s_2k(T, n, k, buckets); + } + else + { + m = libsais_gather_lms_suffixes_32s(T, SA, n); + } +#endif + } + + return m; +} + +static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads) +{ + sa_sint_t m = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(2) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); + + fast_sint_t omp_num_threads = 1; +#endif + if (omp_num_threads == 1) + { + m = libsais_count_and_gather_compacted_lms_suffixes_32s_2k(T, SA, n, k, buckets, 0, n); + } +#if defined(_OPENMP) + else if (omp_thread_num == 0) + { + libsais_count_compacted_lms_suffixes_32s_2k(T, n, k, buckets); + } + else + { + m = libsais_gather_compacted_lms_suffixes_32s(T, SA, n); + } +#endif + } + + return m; +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t m; + +#if defined(_OPENMP) + sa_sint_t max_threads = (sa_sint_t)((buckets - &SA[n]) / ((4 * (fast_sint_t)k + 15) & (-16))); if (max_threads > threads) { max_threads = threads; } + if (max_threads > 1 && n >= 65536 && n / k >= 2) + { + if (max_threads > n / 16 / k) { max_threads = n / 16 / k; } + m = libsais_count_and_gather_lms_suffixes_32s_4k_fs_omp(T, SA, n, k, buckets, max_threads > 2 ? max_threads : 2, thread_state); + } + else +#else + UNUSED(thread_state); +#endif + { + m = libsais_count_and_gather_lms_suffixes_32s_4k_nofs_omp(T, SA, n, k, buckets, threads); + } + + return m; +} + +static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t m; + +#if defined(_OPENMP) + sa_sint_t max_threads = (sa_sint_t)((buckets - &SA[n]) / ((2 * (fast_sint_t)k + 15) & (-16))); if (max_threads > threads) { max_threads = threads; } + if (max_threads > 1 && n >= 65536 && n / k >= 2) + { + if (max_threads > n / 8 / k) { max_threads = n / 8 / k; } + m = libsais_count_and_gather_lms_suffixes_32s_2k_fs_omp(T, SA, n, k, buckets, max_threads > 2 ? max_threads : 2, thread_state); + } + else +#else + UNUSED(thread_state); +#endif + { + m = libsais_count_and_gather_lms_suffixes_32s_2k_nofs_omp(T, SA, n, k, buckets, threads); + } + + return m; +} + +static void libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + sa_sint_t max_threads = (sa_sint_t)((buckets - &SA[n + n]) / ((2 * (fast_sint_t)k + 15) & (-16))); if (max_threads > threads) { max_threads = threads; } + if (max_threads > 1 && n >= 65536 && n / k >= 2) + { + if (max_threads > n / 8 / k) { max_threads = n / 8 / k; } + libsais_count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(T, SA, n, k, buckets, max_threads > 2 ? max_threads : 2, thread_state); + } + else +#else + UNUSED(thread_state); +#endif + { + libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(T, SA, n, k, buckets, threads); + } +} + +static void libsais_count_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, (size_t)k * sizeof(sa_sint_t)); + + fast_sint_t i, j; + for (i = 0, j = (fast_sint_t)n - 7; i < j; i += 8) + { + libsais_prefetch(&T[i + prefetch_distance]); + + buckets[T[i + 0]]++; + buckets[T[i + 1]]++; + buckets[T[i + 2]]++; + buckets[T[i + 3]]++; + buckets[T[i + 4]]++; + buckets[T[i + 5]]++; + buckets[T[i + 6]]++; + buckets[T[i + 7]]++; + } + + for (j += 7; i < j; i += 1) + { + buckets[T[i]]++; + } +} + +static void libsais_initialize_buckets_start_and_end_8u(sa_sint_t * RESTRICT buckets, sa_sint_t * RESTRICT freq) +{ + sa_sint_t * RESTRICT bucket_start = &buckets[6 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT bucket_end = &buckets[7 * ALPHABET_SIZE]; + + if (freq != NULL) + { + fast_sint_t i, j; sa_sint_t sum = 0; + for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0); i += BUCKETS_INDEX4(1, 0), j += 1) + { + bucket_start[j] = sum; + sum += (freq[j] = buckets[i + BUCKETS_INDEX4(0, 0)] + buckets[i + BUCKETS_INDEX4(0, 1)] + buckets[i + BUCKETS_INDEX4(0, 2)] + buckets[i + BUCKETS_INDEX4(0, 3)]); + bucket_end[j] = sum; + } + } + else + { + fast_sint_t i, j; sa_sint_t sum = 0; + for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0); i += BUCKETS_INDEX4(1, 0), j += 1) + { + bucket_start[j] = sum; + sum += buckets[i + BUCKETS_INDEX4(0, 0)] + buckets[i + BUCKETS_INDEX4(0, 1)] + buckets[i + BUCKETS_INDEX4(0, 2)] + buckets[i + BUCKETS_INDEX4(0, 3)]; + bucket_end[j] = sum; + } + } +} + +static void libsais_initialize_buckets_start_and_end_32s_6k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + sa_sint_t * RESTRICT bucket_start = &buckets[4 * k]; + sa_sint_t * RESTRICT bucket_end = &buckets[5 * k]; + + fast_sint_t i, j; sa_sint_t sum = 0; + for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX4(1, 0), j += 1) + { + bucket_start[j] = sum; + sum += buckets[i + BUCKETS_INDEX4(0, 0)] + buckets[i + BUCKETS_INDEX4(0, 1)] + buckets[i + BUCKETS_INDEX4(0, 2)] + buckets[i + BUCKETS_INDEX4(0, 3)]; + bucket_end[j] = sum; + } +} + +static void libsais_initialize_buckets_start_and_end_32s_4k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + sa_sint_t * RESTRICT bucket_start = &buckets[2 * k]; + sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + + fast_sint_t i, j; sa_sint_t sum = 0; + for (i = BUCKETS_INDEX2(0, 0), j = 0; i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0), j += 1) + { + bucket_start[j] = sum; + sum += buckets[i + BUCKETS_INDEX2(0, 0)] + buckets[i + BUCKETS_INDEX2(0, 1)]; + bucket_end[j] = sum; + } +} + +static void libsais_initialize_buckets_end_32s_2k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + fast_sint_t i; sa_sint_t sum0 = 0; + for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) + { + sum0 += buckets[i + BUCKETS_INDEX2(0, 0)] + buckets[i + BUCKETS_INDEX2(0, 1)]; buckets[i + BUCKETS_INDEX2(0, 0)] = sum0; + } +} + +static void libsais_initialize_buckets_start_and_end_32s_2k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + fast_sint_t i, j; + for (i = BUCKETS_INDEX2(0, 0), j = 0; i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0), j += 1) + { + buckets[j] = buckets[i]; + } + + buckets[k] = 0; memcpy(&buckets[k + 1], buckets, ((size_t)k - 1) * sizeof(sa_sint_t)); +} + +static void libsais_initialize_buckets_start_32s_1k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + fast_sint_t i; sa_sint_t sum = 0; + for (i = 0; i <= (fast_sint_t)k - 1; i += 1) { sa_sint_t tmp = buckets[i]; buckets[i] = sum; sum += tmp; } +} + +static void libsais_initialize_buckets_end_32s_1k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + fast_sint_t i; sa_sint_t sum = 0; + for (i = 0; i <= (fast_sint_t)k - 1; i += 1) { sum += buckets[i]; buckets[i] = sum; } +} + +static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix) +{ + { + fast_uint_t s = 0; + fast_sint_t c0 = T[first_lms_suffix]; + fast_sint_t c1 = 0; + + for (; --first_lms_suffix >= 0; ) + { + c1 = c0; c0 = T[first_lms_suffix]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]--; + } + + buckets[BUCKETS_INDEX4((fast_uint_t)c0, (s << 1) & 3)]--; + } + + { + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * ALPHABET_SIZE]; + + fast_sint_t i, j; sa_sint_t sum = 0; + for (i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0); i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) + { + temp_bucket[j + BUCKETS_INDEX2(0, 1)] = sum; sum += buckets[i + BUCKETS_INDEX4(0, 1)] + buckets[i + BUCKETS_INDEX4(0, 3)]; temp_bucket[j] = sum; + } + + return sum; + } +} + +static void libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix) +{ + buckets[BUCKETS_INDEX2(T[first_lms_suffix], 0)]++; + buckets[BUCKETS_INDEX2(T[first_lms_suffix], 1)]--; + + fast_sint_t i; sa_sint_t sum0 = 0, sum1 = 0; + for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) + { + sum0 += buckets[i + BUCKETS_INDEX2(0, 0)] + buckets[i + BUCKETS_INDEX2(0, 1)]; + sum1 += buckets[i + BUCKETS_INDEX2(0, 1)]; + + buckets[i + BUCKETS_INDEX2(0, 0)] = sum0; + buckets[i + BUCKETS_INDEX2(0, 1)] = sum1; + } +} + +static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix) +{ + { + fast_uint_t s = 0; + fast_sint_t c0 = T[first_lms_suffix]; + fast_sint_t c1 = 0; + + for (; --first_lms_suffix >= 0; ) + { + c1 = c0; c0 = T[first_lms_suffix]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + buckets[BUCKETS_INDEX4((fast_uint_t)c1, s & 3)]--; + } + + buckets[BUCKETS_INDEX4((fast_uint_t)c0, (s << 1) & 3)]--; + } + + { + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + + fast_sint_t i, j; sa_sint_t sum = 0; + for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX4(1, 0), j += 1) + { + sum += buckets[i + BUCKETS_INDEX4(0, 1)] + buckets[i + BUCKETS_INDEX4(0, 3)]; temp_bucket[j] = sum; + } + + return sum; + } +} + +static void libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix) +{ + sa_sint_t * RESTRICT bucket_start = &buckets[2 * k]; + sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + + buckets[BUCKETS_INDEX2(T[first_lms_suffix], 0)]++; + buckets[BUCKETS_INDEX2(T[first_lms_suffix], 1)]--; + + fast_sint_t i, j; sa_sint_t sum0 = 0, sum1 = 0; + for (i = BUCKETS_INDEX2(0, 0), j = 0; i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0), j += 1) + { + bucket_start[j] = sum1; + + sum0 += buckets[i + BUCKETS_INDEX2(0, 1)]; + sum1 += buckets[i + BUCKETS_INDEX2(0, 0)] + buckets[i + BUCKETS_INDEX2(0, 1)]; + buckets[i + BUCKETS_INDEX2(0, 1)] = sum0; + + bucket_end[j] = sum1; + } +} + +static void libsais_radix_sort_lms_suffixes_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetch(&SA[i - 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - prefetch_distance - 0]]); + libsais_prefetch(&T[SA[i - prefetch_distance - 1]]); + libsais_prefetch(&T[SA[i - prefetch_distance - 2]]); + libsais_prefetch(&T[SA[i - prefetch_distance - 3]]); + + sa_sint_t p0 = SA[i - 0]; SA[--induction_bucket[BUCKETS_INDEX2(T[p0], 0)]] = p0; + sa_sint_t p1 = SA[i - 1]; SA[--induction_bucket[BUCKETS_INDEX2(T[p1], 0)]] = p1; + sa_sint_t p2 = SA[i - 2]; SA[--induction_bucket[BUCKETS_INDEX2(T[p2], 0)]] = p2; + sa_sint_t p3 = SA[i - 3]; SA[--induction_bucket[BUCKETS_INDEX2(T[p3], 0)]] = p3; + } + + for (j -= prefetch_distance + 3; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[--induction_bucket[BUCKETS_INDEX2(T[p], 0)]] = p; + } +} + +static void libsais_radix_sort_lms_suffixes_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536 && m >= 65536 && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_num_threads = 1; +#endif + if (omp_num_threads == 1) + { + libsais_radix_sort_lms_suffixes_8u(T, SA, &buckets[4 * ALPHABET_SIZE], (fast_sint_t)n - (fast_sint_t)m + 1, (fast_sint_t)m - 1); + } +#if defined(_OPENMP) + else + { + { + sa_sint_t * RESTRICT src_bucket = &buckets[4 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT dst_bucket = thread_state[omp_thread_num].state.buckets; + + fast_sint_t i, j; + for (i = BUCKETS_INDEX2(0, 0), j = BUCKETS_INDEX4(0, 1); i <= BUCKETS_INDEX2(ALPHABET_SIZE - 1, 0); i += BUCKETS_INDEX2(1, 0), j += BUCKETS_INDEX4(1, 0)) + { + dst_bucket[i] = src_bucket[i] - dst_bucket[j]; + } + } + + { + fast_sint_t t, omp_block_start = 0, omp_block_size = thread_state[omp_thread_num].state.m; + for (t = omp_num_threads - 1; t >= omp_thread_num; --t) omp_block_start += thread_state[t].state.m; + + if (omp_block_start == (fast_sint_t)m && omp_block_size > 0) + { + omp_block_start -= 1; omp_block_size -= 1; + } + + libsais_radix_sort_lms_suffixes_8u(T, SA, thread_state[omp_thread_num].state.buckets, (fast_sint_t)n - omp_block_start, omp_block_size); + } + } +#endif + } +} + +static void libsais_radix_sort_lms_suffixes_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetch(&SA[i - 3 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0]]); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 1]]); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 2]]); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 3]]); + + libsais_prefetchw(&induction_bucket[T[SA[i - prefetch_distance - 0]]]); + libsais_prefetchw(&induction_bucket[T[SA[i - prefetch_distance - 1]]]); + libsais_prefetchw(&induction_bucket[T[SA[i - prefetch_distance - 2]]]); + libsais_prefetchw(&induction_bucket[T[SA[i - prefetch_distance - 3]]]); + + sa_sint_t p0 = SA[i - 0]; SA[--induction_bucket[T[p0]]] = p0; + sa_sint_t p1 = SA[i - 1]; SA[--induction_bucket[T[p1]]] = p1; + sa_sint_t p2 = SA[i - 2]; SA[--induction_bucket[T[p2]]] = p2; + sa_sint_t p3 = SA[i - 3]; SA[--induction_bucket[T[p3]]] = p3; + } + + for (j -= 2 * prefetch_distance + 3; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[--induction_bucket[T[p]]] = p; + } +} + +static void libsais_radix_sort_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetch(&SA[i - 3 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0]]); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 1]]); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 2]]); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 3]]); + + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(T[SA[i - prefetch_distance - 0]], 0)]); + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(T[SA[i - prefetch_distance - 1]], 0)]); + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(T[SA[i - prefetch_distance - 2]], 0)]); + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(T[SA[i - prefetch_distance - 3]], 0)]); + + sa_sint_t p0 = SA[i - 0]; SA[--induction_bucket[BUCKETS_INDEX2(T[p0], 0)]] = p0; + sa_sint_t p1 = SA[i - 1]; SA[--induction_bucket[BUCKETS_INDEX2(T[p1], 0)]] = p1; + sa_sint_t p2 = SA[i - 2]; SA[--induction_bucket[BUCKETS_INDEX2(T[p2], 0)]] = p2; + sa_sint_t p3 = SA[i - 3]; SA[--induction_bucket[BUCKETS_INDEX2(T[p3], 0)]] = p3; + } + + for (j -= 2 * prefetch_distance + 3; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[--induction_bucket[BUCKETS_INDEX2(T[p], 0)]] = p; + } +} + +#if defined(_OPENMP) + +static void libsais_radix_sort_lms_suffixes_32s_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i + prefetch_distance + 0]]); + libsais_prefetch(&T[SA[i + prefetch_distance + 1]]); + libsais_prefetch(&T[SA[i + prefetch_distance + 2]]); + libsais_prefetch(&T[SA[i + prefetch_distance + 3]]); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + cache[i + 0].symbol = T[cache[i + 0].index = SA[i + 0]]; + cache[i + 1].symbol = T[cache[i + 1].index = SA[i + 1]]; + cache[i + 2].symbol = T[cache[i + 2].index = SA[i + 2]]; + cache[i + 3].symbol = T[cache[i + 3].index = SA[i + 3]]; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + cache[i].symbol = T[cache[i].index = SA[i]]; + } +} + +static void libsais_radix_sort_lms_suffixes_32s_6k_block_sort(sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetchw(&cache[i - 2 * prefetch_distance]); + + libsais_prefetchw(&induction_bucket[cache[i - prefetch_distance - 0].symbol]); + libsais_prefetchw(&induction_bucket[cache[i - prefetch_distance - 1].symbol]); + libsais_prefetchw(&induction_bucket[cache[i - prefetch_distance - 2].symbol]); + libsais_prefetchw(&induction_bucket[cache[i - prefetch_distance - 3].symbol]); + + cache[i - 0].symbol = --induction_bucket[cache[i - 0].symbol]; + cache[i - 1].symbol = --induction_bucket[cache[i - 1].symbol]; + cache[i - 2].symbol = --induction_bucket[cache[i - 2].symbol]; + cache[i - 3].symbol = --induction_bucket[cache[i - 3].symbol]; + } + + for (j -= prefetch_distance + 3; i >= j; i -= 1) + { + cache[i].symbol = --induction_bucket[cache[i].symbol]; + } +} + +static void libsais_radix_sort_lms_suffixes_32s_2k_block_sort(sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) + { + libsais_prefetchw(&cache[i - 2 * prefetch_distance]); + + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(cache[i - prefetch_distance - 0].symbol, 0)]); + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(cache[i - prefetch_distance - 1].symbol, 0)]); + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(cache[i - prefetch_distance - 2].symbol, 0)]); + libsais_prefetchw(&induction_bucket[BUCKETS_INDEX2(cache[i - prefetch_distance - 3].symbol, 0)]); + + cache[i - 0].symbol = --induction_bucket[BUCKETS_INDEX2(cache[i - 0].symbol, 0)]; + cache[i - 1].symbol = --induction_bucket[BUCKETS_INDEX2(cache[i - 1].symbol, 0)]; + cache[i - 2].symbol = --induction_bucket[BUCKETS_INDEX2(cache[i - 2].symbol, 0)]; + cache[i - 3].symbol = --induction_bucket[BUCKETS_INDEX2(cache[i - 3].symbol, 0)]; + } + + for (j -= prefetch_distance + 3; i >= j; i -= 1) + { + cache[i].symbol = --induction_bucket[BUCKETS_INDEX2(cache[i].symbol, 0)]; + } +} + +static void libsais_radix_sort_lms_suffixes_32s_6k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_radix_sort_lms_suffixes_32s_6k(T, SA, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_radix_sort_lms_suffixes_32s_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + libsais_radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } +} + +static void libsais_radix_sort_lms_suffixes_32s_2k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_radix_sort_lms_suffixes_32s_2k(T, SA, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_radix_sort_lms_suffixes_32s_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + libsais_radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } +} + +#endif + +static void libsais_radix_sort_lms_suffixes_32s_6k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || m < 65536) + { + libsais_radix_sort_lms_suffixes_32s_6k(T, SA, induction_bucket, (fast_sint_t)n - (fast_sint_t)m + 1, (fast_sint_t)m - 1); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = 0; block_start < (fast_sint_t)m - 1; block_start = block_end) + { + block_end = block_start + (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end >= m) { block_end = (fast_sint_t)m - 1; } + + libsais_radix_sort_lms_suffixes_32s_6k_block_omp(T, SA, induction_bucket, thread_state[0].state.cache, (fast_sint_t)n - block_end, block_end - block_start, threads); + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_radix_sort_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || m < 65536) + { + libsais_radix_sort_lms_suffixes_32s_2k(T, SA, induction_bucket, (fast_sint_t)n - (fast_sint_t)m + 1, (fast_sint_t)m - 1); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = 0; block_start < (fast_sint_t)m - 1; block_start = block_end) + { + block_end = block_start + (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end >= m) { block_end = (fast_sint_t)m - 1; } + + libsais_radix_sort_lms_suffixes_32s_2k_block_omp(T, SA, induction_bucket, thread_state[0].state.cache, (fast_sint_t)n - block_end, block_end - block_start, threads); + } + } +#else + UNUSED(thread_state); +#endif +} + +static sa_sint_t libsais_radix_sort_lms_suffixes_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t i = n - 2; + sa_sint_t m = 0; + fast_uint_t s = 1; + fast_sint_t c0 = T[n - 1]; + fast_sint_t c1 = 0; + fast_sint_t c2 = 0; + + for (; i >= prefetch_distance + 3; i -= 4) + { + libsais_prefetch(&T[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[T[i - prefetch_distance - 0]]); + libsais_prefetchw(&buckets[T[i - prefetch_distance - 1]]); + libsais_prefetchw(&buckets[T[i - prefetch_distance - 2]]); + libsais_prefetchw(&buckets[T[i - prefetch_distance - 3]]); + + c1 = T[i - 0]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + if ((s & 3) == 1) { SA[--buckets[c2 = c0]] = i + 1; m++; } + + c0 = T[i - 1]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + if ((s & 3) == 1) { SA[--buckets[c2 = c1]] = i - 0; m++; } + + c1 = T[i - 2]; s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1))); + if ((s & 3) == 1) { SA[--buckets[c2 = c0]] = i - 1; m++; } + + c0 = T[i - 3]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + if ((s & 3) == 1) { SA[--buckets[c2 = c1]] = i - 2; m++; } + } + + for (; i >= 0; i -= 1) + { + c1 = c0; c0 = T[i]; s = (s << 1) + (fast_uint_t)(c0 > (c1 - (fast_sint_t)(s & 1))); + if ((s & 3) == 1) { SA[--buckets[c2 = c1]] = i + 1; m++; } + } + + if (m > 1) + { + SA[buckets[c2]] = 0; + } + + return m; +} + +static void libsais_radix_sort_set_markers_32s_6k(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&induction_bucket[i + 2 * prefetch_distance]); + + libsais_prefetchw(&SA[induction_bucket[i + prefetch_distance + 0]]); + libsais_prefetchw(&SA[induction_bucket[i + prefetch_distance + 1]]); + libsais_prefetchw(&SA[induction_bucket[i + prefetch_distance + 2]]); + libsais_prefetchw(&SA[induction_bucket[i + prefetch_distance + 3]]); + + SA[induction_bucket[i + 0]] |= SAINT_MIN; + SA[induction_bucket[i + 1]] |= SAINT_MIN; + SA[induction_bucket[i + 2]] |= SAINT_MIN; + SA[induction_bucket[i + 3]] |= SAINT_MIN; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + SA[induction_bucket[i]] |= SAINT_MIN; + } +} + +static void libsais_radix_sort_set_markers_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&induction_bucket[BUCKETS_INDEX2(i + 2 * prefetch_distance, 0)]); + + libsais_prefetchw(&SA[induction_bucket[BUCKETS_INDEX2(i + prefetch_distance + 0, 0)]]); + libsais_prefetchw(&SA[induction_bucket[BUCKETS_INDEX2(i + prefetch_distance + 1, 0)]]); + libsais_prefetchw(&SA[induction_bucket[BUCKETS_INDEX2(i + prefetch_distance + 2, 0)]]); + libsais_prefetchw(&SA[induction_bucket[BUCKETS_INDEX2(i + prefetch_distance + 3, 0)]]); + + SA[induction_bucket[BUCKETS_INDEX2(i + 0, 0)]] |= SUFFIX_GROUP_MARKER; + SA[induction_bucket[BUCKETS_INDEX2(i + 1, 0)]] |= SUFFIX_GROUP_MARKER; + SA[induction_bucket[BUCKETS_INDEX2(i + 2, 0)]] |= SUFFIX_GROUP_MARKER; + SA[induction_bucket[BUCKETS_INDEX2(i + 3, 0)]] |= SUFFIX_GROUP_MARKER; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + SA[induction_bucket[BUCKETS_INDEX2(i, 0)]] |= SUFFIX_GROUP_MARKER; + } +} + +static void libsais_radix_sort_set_markers_32s_6k_omp(sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && k >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + fast_sint_t omp_block_stride = (((fast_sint_t)k - 1) / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : (fast_sint_t)k - 1 - omp_block_start; +#else + UNUSED(threads); + + fast_sint_t omp_block_start = 0; + fast_sint_t omp_block_size = (fast_sint_t)k - 1; +#endif + + libsais_radix_sort_set_markers_32s_6k(SA, induction_bucket, omp_block_start, omp_block_size); + } +} + +static void libsais_radix_sort_set_markers_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && k >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + fast_sint_t omp_block_stride = (((fast_sint_t)k - 1) / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : (fast_sint_t)k - 1 - omp_block_start; +#else + UNUSED(threads); + + fast_sint_t omp_block_start = 0; + fast_sint_t omp_block_size = (fast_sint_t)k - 1; +#endif + + libsais_radix_sort_set_markers_32s_4k(SA, induction_bucket, omp_block_start, omp_block_size); + } +} + +static void libsais_initialize_buckets_for_partial_sorting_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count) +{ + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * ALPHABET_SIZE]; + + buckets[BUCKETS_INDEX4((fast_uint_t)T[first_lms_suffix], 1)]++; + + fast_sint_t i, j; sa_sint_t sum0 = left_suffixes_count + 1, sum1 = 0; + for (i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0); i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) + { + temp_bucket[j + BUCKETS_INDEX2(0, 0)] = sum0; + + sum0 += buckets[i + BUCKETS_INDEX4(0, 0)] + buckets[i + BUCKETS_INDEX4(0, 2)]; + sum1 += buckets[i + BUCKETS_INDEX4(0, 1)]; + + buckets[j + BUCKETS_INDEX2(0, 0)] = sum0; + buckets[j + BUCKETS_INDEX2(0, 1)] = sum1; + } +} + +static void libsais_initialize_buckets_for_partial_sorting_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count) +{ + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + + fast_sint_t i, j; sa_sint_t sum0 = left_suffixes_count + 1, sum1 = 0, sum2 = 0; + for (first_lms_suffix = T[first_lms_suffix], i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX4((fast_sint_t)first_lms_suffix - 1, 0); i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) + { + sa_sint_t SS = buckets[i + BUCKETS_INDEX4(0, 0)]; + sa_sint_t LS = buckets[i + BUCKETS_INDEX4(0, 1)]; + sa_sint_t SL = buckets[i + BUCKETS_INDEX4(0, 2)]; + sa_sint_t LL = buckets[i + BUCKETS_INDEX4(0, 3)]; + + buckets[i + BUCKETS_INDEX4(0, 0)] = sum0; + buckets[i + BUCKETS_INDEX4(0, 1)] = sum2; + buckets[i + BUCKETS_INDEX4(0, 2)] = 0; + buckets[i + BUCKETS_INDEX4(0, 3)] = 0; + + sum0 += SS + SL; sum1 += LS; sum2 += LS + LL; + + temp_bucket[j + BUCKETS_INDEX2(0, 0)] = sum0; + temp_bucket[j + BUCKETS_INDEX2(0, 1)] = sum1; + } + + for (sum1 += 1; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) + { + sa_sint_t SS = buckets[i + BUCKETS_INDEX4(0, 0)]; + sa_sint_t LS = buckets[i + BUCKETS_INDEX4(0, 1)]; + sa_sint_t SL = buckets[i + BUCKETS_INDEX4(0, 2)]; + sa_sint_t LL = buckets[i + BUCKETS_INDEX4(0, 3)]; + + buckets[i + BUCKETS_INDEX4(0, 0)] = sum0; + buckets[i + BUCKETS_INDEX4(0, 1)] = sum2; + buckets[i + BUCKETS_INDEX4(0, 2)] = 0; + buckets[i + BUCKETS_INDEX4(0, 3)] = 0; + + sum0 += SS + SL; sum1 += LS; sum2 += LS + LL; + + temp_bucket[j + BUCKETS_INDEX2(0, 0)] = sum0; + temp_bucket[j + BUCKETS_INDEX2(0, 1)] = sum1; + } +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[4 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 2); + + sa_sint_t p0 = SA[i + 0]; d += (p0 < 0); p0 &= SAINT_MAX; sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] >= T[p0 - 1]); + SA[induction_bucket[v0]++] = (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1)); distinct_names[v0] = d; + + sa_sint_t p1 = SA[i + 1]; d += (p1 < 0); p1 &= SAINT_MAX; sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] >= T[p1 - 1]); + SA[induction_bucket[v1]++] = (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1)); distinct_names[v1] = d; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] >= T[p - 1]); + SA[induction_bucket[v]++] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1)); distinct_names[v] = d; + } + + return d; +} + +#if defined(_OPENMP) + +static void libsais_partial_sorting_scan_left_to_right_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size, LIBSAIS_THREAD_STATE * RESTRICT state) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + memset(buckets, 0, 4 * ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; sa_sint_t d = 1; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 2); + + sa_sint_t p0 = cache[count].index = SA[i + 0]; d += (p0 < 0); p0 &= SAINT_MAX; sa_sint_t v0 = cache[count++].symbol = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] >= T[p0 - 1]); induction_bucket[v0]++; distinct_names[v0] = d; + sa_sint_t p1 = cache[count].index = SA[i + 1]; d += (p1 < 0); p1 &= SAINT_MAX; sa_sint_t v1 = cache[count++].symbol = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] >= T[p1 - 1]); induction_bucket[v1]++; distinct_names[v1] = d; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = cache[count].index = SA[i]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = cache[count++].symbol = BUCKETS_INDEX2(T[p - 1], T[p - 2] >= T[p - 1]); induction_bucket[v]++; distinct_names[v] = d; + } + + state[0].state.position = (fast_sint_t)d - 1; + state[0].state.count = count; +} + +static void libsais_partial_sorting_scan_left_to_right_8u_block_place(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t count, sa_sint_t d) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t i, j; + for (i = 0, j = count - 1; i < j; i += 2) + { + libsais_prefetch(&cache[i + prefetch_distance]); + + sa_sint_t p0 = cache[i + 0].index; d += (p0 < 0); sa_sint_t v0 = cache[i + 0].symbol; + SA[induction_bucket[v0]++] = (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1)); distinct_names[v0] = d; + + sa_sint_t p1 = cache[i + 1].index; d += (p1 < 0); sa_sint_t v1 = cache[i + 1].symbol; + SA[induction_bucket[v1]++] = (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1)); distinct_names[v1] = d; + } + + for (j += 1; i < j; i += 1) + { + sa_sint_t p = cache[i].index; d += (p < 0); sa_sint_t v = cache[i].symbol; + SA[induction_bucket[v]++] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1)); distinct_names[v] = d; + } +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + d = libsais_partial_sorting_scan_left_to_right_8u(T, SA, buckets, d, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_left_to_right_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size, &thread_state[omp_thread_num]); + } + + #pragma omp barrier + + #pragma omp master + { + sa_sint_t * RESTRICT induction_bucket = &buckets[4 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t t; + for (t = 0; t < omp_num_threads; ++t) + { + sa_sint_t * RESTRICT temp_induction_bucket = &thread_state[t].state.buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT temp_distinct_names = &thread_state[t].state.buckets[2 * ALPHABET_SIZE]; + + fast_sint_t c; + for (c = 0; c < 2 * ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_induction_bucket[c]; induction_bucket[c] = A + B; temp_induction_bucket[c] = A; } + + for (d -= 1, c = 0; c < 2 * ALPHABET_SIZE; c += 1) { sa_sint_t A = distinct_names[c], B = temp_distinct_names[c], D = B + d; distinct_names[c] = B > 0 ? D : A; temp_distinct_names[c] = A; } + d += 1 + (sa_sint_t)thread_state[t].state.position; thread_state[t].state.position = (fast_sint_t)d - thread_state[t].state.position; + } + } + + #pragma omp barrier + + { + libsais_partial_sorting_scan_left_to_right_8u_block_place(SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count, (sa_sint_t)thread_state[omp_thread_num].state.position); + } + } +#endif + } + + return d; +} + +#endif + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t * RESTRICT induction_bucket = &buckets[4 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + SA[induction_bucket[BUCKETS_INDEX2(T[n - 1], T[n - 2] >= T[n - 1])]++] = (n - 1) | SAINT_MIN; + distinct_names[BUCKETS_INDEX2(T[n - 1], T[n - 2] >= T[n - 1])] = ++d; + + if (threads == 1 || left_suffixes_count < 65536) + { + d = libsais_partial_sorting_scan_left_to_right_8u(T, SA, buckets, d, 0, left_suffixes_count); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = 0; block_start < left_suffixes_count; ) + { + if (SA[block_start] == 0) + { + block_start++; + } + else + { + fast_sint_t block_max_end = block_start + ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end > left_suffixes_count) { block_max_end = left_suffixes_count;} + fast_sint_t block_end = block_start + 1; while (block_end < block_max_end && SA[block_end] != 0) { block_end++; } + fast_sint_t block_size = block_end - block_start; + + if (block_size < 32) + { + for (; block_start < block_end; block_start += 1) + { + sa_sint_t p = SA[block_start]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] >= T[p - 1]); + SA[induction_bucket[v]++] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1)); distinct_names[v] = d; + } + } + else + { + d = libsais_partial_sorting_scan_left_to_right_8u_block_omp(T, SA, buckets, d, block_start, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetch(&SA[i + 3 * prefetch_distance]); + + libsais_prefetch(&T[SA[i + 2 * prefetch_distance + 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + 2 * prefetch_distance + 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i + 2 * prefetch_distance + 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + 2 * prefetch_distance + 1] & SAINT_MAX] - 2); + + sa_sint_t p0 = SA[i + prefetch_distance + 0] & SAINT_MAX; sa_sint_t v0 = BUCKETS_INDEX4(T[p0 - (p0 > 0)], 0); libsais_prefetchw(&buckets[v0]); + sa_sint_t p1 = SA[i + prefetch_distance + 1] & SAINT_MAX; sa_sint_t v1 = BUCKETS_INDEX4(T[p1 - (p1 > 0)], 0); libsais_prefetchw(&buckets[v1]); + + sa_sint_t p2 = SA[i + 0]; d += (p2 < 0); p2 &= SAINT_MAX; sa_sint_t v2 = BUCKETS_INDEX4(T[p2 - 1], T[p2 - 2] >= T[p2 - 1]); + SA[buckets[v2]++] = (p2 - 1) | ((sa_sint_t)(buckets[2 + v2] != d) << (SAINT_BIT - 1)); buckets[2 + v2] = d; + + sa_sint_t p3 = SA[i + 1]; d += (p3 < 0); p3 &= SAINT_MAX; sa_sint_t v3 = BUCKETS_INDEX4(T[p3 - 1], T[p3 - 2] >= T[p3 - 1]); + SA[buckets[v3]++] = (p3 - 1) | ((sa_sint_t)(buckets[2 + v3] != d) << (SAINT_BIT - 1)); buckets[2 + v3] = d; + } + + for (j += 2 * prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = BUCKETS_INDEX4(T[p - 1], T[p - 2] >= T[p - 1]); + SA[buckets[v]++] = (p - 1) | ((sa_sint_t)(buckets[2 + v] != d) << (SAINT_BIT - 1)); buckets[2 + v] = d; + } + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 3 * prefetch_distance]); + + sa_sint_t s0 = SA[i + 2 * prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + 2 * prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + sa_sint_t s2 = SA[i + 1 * prefetch_distance + 0]; if (s2 > 0) { const fast_sint_t Ts2 = T[(s2 & ~SUFFIX_GROUP_MARKER) - 1]; libsais_prefetchw(&induction_bucket[Ts2]); libsais_prefetchw(&distinct_names[BUCKETS_INDEX2(Ts2, 0)]); } + sa_sint_t s3 = SA[i + 1 * prefetch_distance + 1]; if (s3 > 0) { const fast_sint_t Ts3 = T[(s3 & ~SUFFIX_GROUP_MARKER) - 1]; libsais_prefetchw(&induction_bucket[Ts3]); libsais_prefetchw(&distinct_names[BUCKETS_INDEX2(Ts3, 0)]); } + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 & SAINT_MAX; + if (p0 > 0) + { + SA[i + 0] = 0; d += (p0 >> (SUFFIX_GROUP_BIT - 1)); p0 &= ~SUFFIX_GROUP_MARKER; sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] < T[p0 - 1]); + SA[induction_bucket[T[p0 - 1]]++] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v0] = d; + } + + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 & SAINT_MAX; + if (p1 > 0) + { + SA[i + 1] = 0; d += (p1 >> (SUFFIX_GROUP_BIT - 1)); p1 &= ~SUFFIX_GROUP_MARKER; sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] < T[p1 - 1]); + SA[induction_bucket[T[p1 - 1]]++] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v1] = d; + } + } + + for (j += 2 * prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; + if (p > 0) + { + SA[i] = 0; d += (p >> (SUFFIX_GROUP_BIT - 1)); p &= ~SUFFIX_GROUP_MARKER; sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] < T[p - 1]); + SA[induction_bucket[T[p - 1]]++] = (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v] = d; + } + } + + return d; +} + +static void libsais_partial_sorting_scan_left_to_right_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 3 * prefetch_distance]); + + sa_sint_t s0 = SA[i + 2 * prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + 2 * prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + sa_sint_t s2 = SA[i + 1 * prefetch_distance + 0]; if (s2 > 0) { libsais_prefetchw(&induction_bucket[T[s2 - 1]]); libsais_prefetch(&T[s2] - 2); } + sa_sint_t s3 = SA[i + 1 * prefetch_distance + 1]; if (s3 > 0) { libsais_prefetchw(&induction_bucket[T[s3 - 1]]); libsais_prefetch(&T[s3] - 2); } + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 & SAINT_MAX; if (p0 > 0) { SA[i + 0] = 0; SA[induction_bucket[T[p0 - 1]]++] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 & SAINT_MAX; if (p1 > 0) { SA[i + 1] = 0; SA[induction_bucket[T[p1 - 1]]++] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1)); } + } + + for (j += 2 * prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { SA[i] = 0; SA[induction_bucket[T[p - 1]]++] = (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1)); } + } +} + +#if defined(_OPENMP) + +static void libsais_partial_sorting_scan_left_to_right_32s_6k_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 2); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t p0 = cache[i + 0].index = SA[i + 0]; sa_sint_t symbol0 = 0; p0 &= SAINT_MAX; if (p0 != 0) { symbol0 = BUCKETS_INDEX4(T[p0 - 1], T[p0 - 2] >= T[p0 - 1]); } cache[i + 0].symbol = symbol0; + sa_sint_t p1 = cache[i + 1].index = SA[i + 1]; sa_sint_t symbol1 = 0; p1 &= SAINT_MAX; if (p1 != 0) { symbol1 = BUCKETS_INDEX4(T[p1 - 1], T[p1 - 2] >= T[p1 - 1]); } cache[i + 1].symbol = symbol1; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = cache[i].index = SA[i]; sa_sint_t symbol = 0; p &= SAINT_MAX; if (p != 0) { symbol = BUCKETS_INDEX4(T[p - 1], T[p - 2] >= T[p - 1]); } cache[i].symbol = symbol; + } +} + +static void libsais_partial_sorting_scan_left_to_right_32s_4k_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t symbol0 = SAINT_MIN, p0 = SA[i + 0]; if (p0 > 0) { cache[i + 0].index = p0; p0 &= ~SUFFIX_GROUP_MARKER; symbol0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] < T[p0 - 1]); p0 = 0; } cache[i + 0].symbol = symbol0; SA[i + 0] = p0 & SAINT_MAX; + sa_sint_t symbol1 = SAINT_MIN, p1 = SA[i + 1]; if (p1 > 0) { cache[i + 1].index = p1; p1 &= ~SUFFIX_GROUP_MARKER; symbol1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] < T[p1 - 1]); p1 = 0; } cache[i + 1].symbol = symbol1; SA[i + 1] = p1 & SAINT_MAX; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t symbol = SAINT_MIN, p = SA[i]; if (p > 0) { cache[i].index = p; p &= ~SUFFIX_GROUP_MARKER; symbol = BUCKETS_INDEX2(T[p - 1], T[p - 2] < T[p - 1]); p = 0; } cache[i].symbol = symbol; SA[i] = p & SAINT_MAX; + } +} + +static void libsais_partial_sorting_scan_left_to_right_32s_1k_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t symbol0 = SAINT_MIN, p0 = SA[i + 0]; if (p0 > 0) { cache[i + 0].index = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1)); symbol0 = T[p0 - 1]; p0 = 0; } cache[i + 0].symbol = symbol0; SA[i + 0] = p0 & SAINT_MAX; + sa_sint_t symbol1 = SAINT_MIN, p1 = SA[i + 1]; if (p1 > 0) { cache[i + 1].index = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1)); symbol1 = T[p1 - 1]; p1 = 0; } cache[i + 1].symbol = symbol1; SA[i + 1] = p1 & SAINT_MAX; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t symbol = SAINT_MIN, p = SA[i]; if (p > 0) { cache[i].index = (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1)); symbol = T[p - 1]; p = 0; } cache[i].symbol = symbol; SA[i] = p & SAINT_MAX; + } +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j, omp_block_end = omp_block_start + omp_block_size; + for (i = omp_block_start, j = omp_block_end - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&cache[i + 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[cache[i + prefetch_distance + 0].symbol]); + libsais_prefetchw(&buckets[cache[i + prefetch_distance + 1].symbol]); + + sa_sint_t v0 = cache[i + 0].symbol, p0 = cache[i + 0].index; d += (p0 < 0); cache[i + 0].symbol = buckets[v0]++; cache[i + 0].index = (p0 - 1) | ((sa_sint_t)(buckets[2 + v0] != d) << (SAINT_BIT - 1)); buckets[2 + v0] = d; + if (cache[i + 0].symbol < omp_block_end) { sa_sint_t s = cache[i + 0].symbol, q = (cache[s].index = cache[i + 0].index) & SAINT_MAX; cache[s].symbol = BUCKETS_INDEX4(T[q - 1], T[q - 2] >= T[q - 1]); } + + sa_sint_t v1 = cache[i + 1].symbol, p1 = cache[i + 1].index; d += (p1 < 0); cache[i + 1].symbol = buckets[v1]++; cache[i + 1].index = (p1 - 1) | ((sa_sint_t)(buckets[2 + v1] != d) << (SAINT_BIT - 1)); buckets[2 + v1] = d; + if (cache[i + 1].symbol < omp_block_end) { sa_sint_t s = cache[i + 1].symbol, q = (cache[s].index = cache[i + 1].index) & SAINT_MAX; cache[s].symbol = BUCKETS_INDEX4(T[q - 1], T[q - 2] >= T[q - 1]); } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t v = cache[i].symbol, p = cache[i].index; d += (p < 0); cache[i].symbol = buckets[v]++; cache[i].index = (p - 1) | ((sa_sint_t)(buckets[2 + v] != d) << (SAINT_BIT - 1)); buckets[2 + v] = d; + if (cache[i].symbol < omp_block_end) { sa_sint_t s = cache[i].symbol, q = (cache[s].index = cache[i].index) & SAINT_MAX; cache[s].symbol = BUCKETS_INDEX4(T[q - 1], T[q - 2] >= T[q - 1]); } + } + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + + fast_sint_t i, j, omp_block_end = omp_block_start + omp_block_size; + for (i = omp_block_start, j = omp_block_end - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&cache[i + 2 * prefetch_distance]); + + sa_sint_t s0 = cache[i + prefetch_distance + 0].symbol; const sa_sint_t * Is0 = &induction_bucket[s0 >> 1]; libsais_prefetchw(s0 >= 0 ? Is0 : NULL); const sa_sint_t * Ds0 = &distinct_names[s0]; libsais_prefetchw(s0 >= 0 ? Ds0 : NULL); + sa_sint_t s1 = cache[i + prefetch_distance + 1].symbol; const sa_sint_t * Is1 = &induction_bucket[s1 >> 1]; libsais_prefetchw(s1 >= 0 ? Is1 : NULL); const sa_sint_t * Ds1 = &distinct_names[s1]; libsais_prefetchw(s1 >= 0 ? Ds1 : NULL); + + sa_sint_t v0 = cache[i + 0].symbol; + if (v0 >= 0) + { + sa_sint_t p0 = cache[i + 0].index; d += (p0 >> (SUFFIX_GROUP_BIT - 1)); cache[i + 0].symbol = induction_bucket[v0 >> 1]++; cache[i + 0].index = (p0 - 1) | (v0 << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v0] = d; + if (cache[i + 0].symbol < omp_block_end) { sa_sint_t ni = cache[i + 0].symbol, np = cache[i + 0].index; if (np > 0) { cache[ni].index = np; np &= ~SUFFIX_GROUP_MARKER; cache[ni].symbol = BUCKETS_INDEX2(T[np - 1], T[np - 2] < T[np - 1]); np = 0; } cache[i + 0].index = np & SAINT_MAX; } + } + + sa_sint_t v1 = cache[i + 1].symbol; + if (v1 >= 0) + { + sa_sint_t p1 = cache[i + 1].index; d += (p1 >> (SUFFIX_GROUP_BIT - 1)); cache[i + 1].symbol = induction_bucket[v1 >> 1]++; cache[i + 1].index = (p1 - 1) | (v1 << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v1] = d; + if (cache[i + 1].symbol < omp_block_end) { sa_sint_t ni = cache[i + 1].symbol, np = cache[i + 1].index; if (np > 0) { cache[ni].index = np; np &= ~SUFFIX_GROUP_MARKER; cache[ni].symbol = BUCKETS_INDEX2(T[np - 1], T[np - 2] < T[np - 1]); np = 0; } cache[i + 1].index = np & SAINT_MAX; } + } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t v = cache[i].symbol; + if (v >= 0) + { + sa_sint_t p = cache[i].index; d += (p >> (SUFFIX_GROUP_BIT - 1)); cache[i].symbol = induction_bucket[v >> 1]++; cache[i].index = (p - 1) | (v << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v] = d; + if (cache[i].symbol < omp_block_end) { sa_sint_t ni = cache[i].symbol, np = cache[i].index; if (np > 0) { cache[ni].index = np; np &= ~SUFFIX_GROUP_MARKER; cache[ni].symbol = BUCKETS_INDEX2(T[np - 1], T[np - 2] < T[np - 1]); np = 0; } cache[i].index = np & SAINT_MAX; } + } + } + + return d; +} + +static void libsais_partial_sorting_scan_left_to_right_32s_1k_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j, omp_block_end = omp_block_start + omp_block_size; + for (i = omp_block_start, j = omp_block_end - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&cache[i + 2 * prefetch_distance]); + + sa_sint_t s0 = cache[i + prefetch_distance + 0].symbol; const sa_sint_t * Is0 = &induction_bucket[s0]; libsais_prefetchw(s0 >= 0 ? Is0 : NULL); + sa_sint_t s1 = cache[i + prefetch_distance + 1].symbol; const sa_sint_t * Is1 = &induction_bucket[s1]; libsais_prefetchw(s1 >= 0 ? Is1 : NULL); + + sa_sint_t v0 = cache[i + 0].symbol; + if (v0 >= 0) + { + cache[i + 0].symbol = induction_bucket[v0]++; + if (cache[i + 0].symbol < omp_block_end) { sa_sint_t ni = cache[i + 0].symbol, np = cache[i + 0].index; if (np > 0) { cache[ni].index = (np - 1) | ((sa_sint_t)(T[np - 2] < T[np - 1]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np - 1]; np = 0; } cache[i + 0].index = np & SAINT_MAX; } + } + + sa_sint_t v1 = cache[i + 1].symbol; + if (v1 >= 0) + { + cache[i + 1].symbol = induction_bucket[v1]++; + if (cache[i + 1].symbol < omp_block_end) { sa_sint_t ni = cache[i + 1].symbol, np = cache[i + 1].index; if (np > 0) { cache[ni].index = (np - 1) | ((sa_sint_t)(T[np - 2] < T[np - 1]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np - 1]; np = 0; } cache[i + 1].index = np & SAINT_MAX; } + } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t v = cache[i].symbol; + if (v >= 0) + { + cache[i].symbol = induction_bucket[v]++; + if (cache[i].symbol < omp_block_end) { sa_sint_t ni = cache[i].symbol, np = cache[i].index; if (np > 0) { cache[ni].index = (np - 1) | ((sa_sint_t)(T[np - 2] < T[np - 1]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np - 1]; np = 0; } cache[i].index = np & SAINT_MAX; } + } + } +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + d = libsais_partial_sorting_scan_left_to_right_32s_6k(T, SA, buckets, d, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_left_to_right_32s_6k_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + d = libsais_partial_sorting_scan_left_to_right_32s_6k_block_sort(T, buckets, d, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + d = libsais_partial_sorting_scan_left_to_right_32s_4k(T, SA, k, buckets, d, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_left_to_right_32s_4k_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + d = libsais_partial_sorting_scan_left_to_right_32s_4k_block_sort(T, k, buckets, d, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_compact_and_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } + + return d; +} + +static void libsais_partial_sorting_scan_left_to_right_32s_1k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_partial_sorting_scan_left_to_right_32s_1k(T, SA, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_left_to_right_32s_1k_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + libsais_partial_sorting_scan_left_to_right_32s_1k_block_sort(T, buckets, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_compact_and_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } +} + +#endif + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + SA[buckets[BUCKETS_INDEX4(T[n - 1], T[n - 2] >= T[n - 1])]++] = (n - 1) | SAINT_MIN; + buckets[2 + BUCKETS_INDEX4(T[n - 1], T[n - 2] >= T[n - 1])] = ++d; + + if (threads == 1 || left_suffixes_count < 65536) + { + d = libsais_partial_sorting_scan_left_to_right_32s_6k(T, SA, buckets, d, 0, left_suffixes_count); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = 0; block_start < left_suffixes_count; block_start = block_end) + { + block_end = block_start + (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end > left_suffixes_count) { block_end = left_suffixes_count; } + + d = libsais_partial_sorting_scan_left_to_right_32s_6k_block_omp(T, SA, buckets, d, thread_state[0].state.cache, block_start, block_end - block_start, threads); + } + } +#else + UNUSED(thread_state); +#endif + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + + SA[induction_bucket[T[n - 1]]++] = (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1)) | SUFFIX_GROUP_MARKER; + distinct_names[BUCKETS_INDEX2(T[n - 1], T[n - 2] < T[n - 1])] = ++d; + + if (threads == 1 || n < 65536) + { + d = libsais_partial_sorting_scan_left_to_right_32s_4k(T, SA, k, buckets, d, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = 0; block_start < n; block_start = block_end) + { + block_end = block_start + (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end > n) { block_end = n; } + + d = libsais_partial_sorting_scan_left_to_right_32s_4k_block_omp(T, SA, k, buckets, d, thread_state[0].state.cache, block_start, block_end - block_start, threads); + } + } +#else + UNUSED(thread_state); +#endif + + return d; +} + +static void libsais_partial_sorting_scan_left_to_right_32s_1k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + SA[buckets[T[n - 1]]++] = (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1)); + + if (threads == 1 || n < 65536) + { + libsais_partial_sorting_scan_left_to_right_32s_1k(T, SA, buckets, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = 0; block_start < n; block_start = block_end) + { + block_end = block_start + (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end > n) { block_end = n; } + + libsais_partial_sorting_scan_left_to_right_32s_1k_block_omp(T, SA, buckets, thread_state[0].state.cache, block_start, block_end - block_start, threads); + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_partial_sorting_shift_markers_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, const sa_sint_t * RESTRICT buckets, sa_sint_t threads) +{ + const fast_sint_t prefetch_distance = 32; + + const sa_sint_t * RESTRICT temp_bucket = &buckets[4 * ALPHABET_SIZE]; + + fast_sint_t c; + +#if defined(_OPENMP) + #pragma omp parallel for schedule(static, 1) num_threads(threads) if(threads > 1 && n >= 65536) +#else + UNUSED(threads); UNUSED(n); +#endif + for (c = BUCKETS_INDEX2(ALPHABET_SIZE - 1, 0); c >= BUCKETS_INDEX2(1, 0); c -= BUCKETS_INDEX2(1, 0)) + { + fast_sint_t i, j; sa_sint_t s = SAINT_MIN; + for (i = (fast_sint_t)temp_bucket[c] - 1, j = (fast_sint_t)buckets[c - BUCKETS_INDEX2(1, 0)] + 3; i >= j; i -= 4) + { + libsais_prefetchw(&SA[i - prefetch_distance]); + + sa_sint_t p0 = SA[i - 0], q0 = (p0 & SAINT_MIN) ^ s; s = s ^ q0; SA[i - 0] = p0 ^ q0; + sa_sint_t p1 = SA[i - 1], q1 = (p1 & SAINT_MIN) ^ s; s = s ^ q1; SA[i - 1] = p1 ^ q1; + sa_sint_t p2 = SA[i - 2], q2 = (p2 & SAINT_MIN) ^ s; s = s ^ q2; SA[i - 2] = p2 ^ q2; + sa_sint_t p3 = SA[i - 3], q3 = (p3 & SAINT_MIN) ^ s; s = s ^ q3; SA[i - 3] = p3 ^ q3; + } + + for (j -= 3; i >= j; i -= 1) + { + sa_sint_t p = SA[i], q = (p & SAINT_MIN) ^ s; s = s ^ q; SA[i] = p ^ q; + } + } +} + +static void libsais_partial_sorting_shift_markers_32s_6k_omp(sa_sint_t * RESTRICT SA, sa_sint_t k, const sa_sint_t * RESTRICT buckets, sa_sint_t threads) +{ + const fast_sint_t prefetch_distance = 32; + + const sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + + fast_sint_t c; + +#if defined(_OPENMP) + #pragma omp parallel for schedule(static, 1) num_threads(threads) if(threads > 1 && k >= 65536) +#else + UNUSED(threads); +#endif + for (c = (fast_sint_t)k - 1; c >= 1; c -= 1) + { + fast_sint_t i, j; sa_sint_t s = SAINT_MIN; + for (i = (fast_sint_t)buckets[BUCKETS_INDEX4(c, 0)] - 1, j = (fast_sint_t)temp_bucket[BUCKETS_INDEX2(c - 1, 0)] + 3; i >= j; i -= 4) + { + libsais_prefetchw(&SA[i - prefetch_distance]); + + sa_sint_t p0 = SA[i - 0], q0 = (p0 & SAINT_MIN) ^ s; s = s ^ q0; SA[i - 0] = p0 ^ q0; + sa_sint_t p1 = SA[i - 1], q1 = (p1 & SAINT_MIN) ^ s; s = s ^ q1; SA[i - 1] = p1 ^ q1; + sa_sint_t p2 = SA[i - 2], q2 = (p2 & SAINT_MIN) ^ s; s = s ^ q2; SA[i - 2] = p2 ^ q2; + sa_sint_t p3 = SA[i - 3], q3 = (p3 & SAINT_MIN) ^ s; s = s ^ q3; SA[i - 3] = p3 ^ q3; + } + + for (j -= 3; i >= j; i -= 1) + { + sa_sint_t p = SA[i], q = (p & SAINT_MIN) ^ s; s = s ^ q; SA[i] = p ^ q; + } + } +} + +static void libsais_partial_sorting_shift_markers_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i; sa_sint_t s = SUFFIX_GROUP_MARKER; + for (i = (fast_sint_t)n - 1; i >= 3; i -= 4) + { + libsais_prefetchw(&SA[i - prefetch_distance]); + + sa_sint_t p0 = SA[i - 0], q0 = ((p0 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p0 > 0) << ((SUFFIX_GROUP_BIT - 1))); s = s ^ q0; SA[i - 0] = p0 ^ q0; + sa_sint_t p1 = SA[i - 1], q1 = ((p1 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p1 > 0) << ((SUFFIX_GROUP_BIT - 1))); s = s ^ q1; SA[i - 1] = p1 ^ q1; + sa_sint_t p2 = SA[i - 2], q2 = ((p2 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p2 > 0) << ((SUFFIX_GROUP_BIT - 1))); s = s ^ q2; SA[i - 2] = p2 ^ q2; + sa_sint_t p3 = SA[i - 3], q3 = ((p3 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p3 > 0) << ((SUFFIX_GROUP_BIT - 1))); s = s ^ q3; SA[i - 3] = p3 ^ q3; + } + + for (; i >= 0; i -= 1) + { + sa_sint_t p = SA[i], q = ((p & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p > 0) << ((SUFFIX_GROUP_BIT - 1))); s = s ^ q; SA[i] = p ^ q; + } +} + +static void libsais_partial_sorting_shift_buckets_32s_6k(sa_sint_t k, sa_sint_t * RESTRICT buckets) +{ + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + + fast_sint_t i; + for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) + { + buckets[2 * i + BUCKETS_INDEX4(0, 0)] = temp_bucket[i + BUCKETS_INDEX2(0, 0)]; + buckets[2 * i + BUCKETS_INDEX4(0, 1)] = temp_bucket[i + BUCKETS_INDEX2(0, 1)]; + } +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetch(&SA[i - 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - prefetch_distance - 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i - prefetch_distance - 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i - prefetch_distance - 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i - prefetch_distance - 1] & SAINT_MAX] - 2); + + sa_sint_t p0 = SA[i - 0]; d += (p0 < 0); p0 &= SAINT_MAX; sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] > T[p0 - 1]); + SA[--induction_bucket[v0]] = (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1)); distinct_names[v0] = d; + + sa_sint_t p1 = SA[i - 1]; d += (p1 < 0); p1 &= SAINT_MAX; sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] > T[p1 - 1]); + SA[--induction_bucket[v1]] = (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1)); distinct_names[v1] = d; + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]); + SA[--induction_bucket[v]] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1)); distinct_names[v] = d; + } + + return d; +} + +#if defined(_OPENMP) + +static void libsais_partial_sorting_scan_right_to_left_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size, LIBSAIS_THREAD_STATE * RESTRICT state) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + memset(buckets, 0, 4 * ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; sa_sint_t d = 1; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetch(&SA[i - 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - prefetch_distance - 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i - prefetch_distance - 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i - prefetch_distance - 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i - prefetch_distance - 1] & SAINT_MAX] - 2); + + sa_sint_t p0 = cache[count].index = SA[i - 0]; d += (p0 < 0); p0 &= SAINT_MAX; sa_sint_t v0 = cache[count++].symbol = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] > T[p0 - 1]); induction_bucket[v0]++; distinct_names[v0] = d; + sa_sint_t p1 = cache[count].index = SA[i - 1]; d += (p1 < 0); p1 &= SAINT_MAX; sa_sint_t v1 = cache[count++].symbol = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] > T[p1 - 1]); induction_bucket[v1]++; distinct_names[v1] = d; + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = cache[count].index = SA[i]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = cache[count++].symbol = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]); induction_bucket[v]++; distinct_names[v] = d; + } + + state[0].state.position = (fast_sint_t)d - 1; + state[0].state.count = count; +} + +static void libsais_partial_sorting_scan_right_to_left_8u_block_place(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t count, sa_sint_t d) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t i, j; + for (i = 0, j = count - 1; i < j; i += 2) + { + libsais_prefetch(&cache[i + prefetch_distance]); + + sa_sint_t p0 = cache[i + 0].index; d += (p0 < 0); sa_sint_t v0 = cache[i + 0].symbol; + SA[--induction_bucket[v0]] = (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1)); distinct_names[v0] = d; + + sa_sint_t p1 = cache[i + 1].index; d += (p1 < 0); sa_sint_t v1 = cache[i + 1].symbol; + SA[--induction_bucket[v1]] = (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1)); distinct_names[v1] = d; + } + + for (j += 1; i < j; i += 1) + { + sa_sint_t p = cache[i].index; d += (p < 0); sa_sint_t v = cache[i].symbol; + SA[--induction_bucket[v]] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1)); distinct_names[v] = d; + } +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + d = libsais_partial_sorting_scan_right_to_left_8u(T, SA, buckets, d, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_right_to_left_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size, &thread_state[omp_thread_num]); + } + + #pragma omp barrier + + #pragma omp master + { + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + sa_sint_t * RESTRICT temp_induction_bucket = &thread_state[t].state.buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT temp_distinct_names = &thread_state[t].state.buckets[2 * ALPHABET_SIZE]; + + fast_sint_t c; + for (c = 0; c < 2 * ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_induction_bucket[c]; induction_bucket[c] = A - B; temp_induction_bucket[c] = A; } + + for (d -= 1, c = 0; c < 2 * ALPHABET_SIZE; c += 1) { sa_sint_t A = distinct_names[c], B = temp_distinct_names[c], D = B + d; distinct_names[c] = B > 0 ? D : A; temp_distinct_names[c] = A; } + d += 1 + (sa_sint_t)thread_state[t].state.position; thread_state[t].state.position = (fast_sint_t)d - thread_state[t].state.position; + } + } + + #pragma omp barrier + + { + libsais_partial_sorting_scan_right_to_left_8u_block_place(SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count, (sa_sint_t)thread_state[omp_thread_num].state.position); + } + } +#endif + } + + return d; +} + +#endif + +static void libsais_partial_sorting_scan_right_to_left_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + fast_sint_t scan_start = (fast_sint_t)left_suffixes_count + 1; + fast_sint_t scan_end = (fast_sint_t)n - (fast_sint_t)first_lms_suffix; + + if (threads == 1 || (scan_end - scan_start) < 65536) + { + libsais_partial_sorting_scan_right_to_left_8u(T, SA, buckets, d, scan_start, scan_end - scan_start); + } +#if defined(_OPENMP) + else + { + sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE]; + sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE]; + + fast_sint_t block_start; + for (block_start = scan_end - 1; block_start >= scan_start; ) + { + if (SA[block_start] == 0) + { + block_start--; + } + else + { + fast_sint_t block_max_end = block_start - ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end < scan_start) { block_max_end = scan_start - 1; } + fast_sint_t block_end = block_start - 1; while (block_end > block_max_end && SA[block_end] != 0) { block_end--; } + fast_sint_t block_size = block_start - block_end; + + if (block_size < 32) + { + for (; block_start > block_end; block_start -= 1) + { + sa_sint_t p = SA[block_start]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]); + SA[--induction_bucket[v]] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1)); distinct_names[v] = d; + } + } + else + { + d = libsais_partial_sorting_scan_right_to_left_8u_block_omp(T, SA, buckets, d, block_end + 1, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetch(&SA[i - 3 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 1] & SAINT_MAX] - 2); + + sa_sint_t p0 = SA[i - prefetch_distance - 0] & SAINT_MAX; sa_sint_t v0 = BUCKETS_INDEX4(T[p0 - (p0 > 0)], 0); libsais_prefetchw(&buckets[v0]); + sa_sint_t p1 = SA[i - prefetch_distance - 1] & SAINT_MAX; sa_sint_t v1 = BUCKETS_INDEX4(T[p1 - (p1 > 0)], 0); libsais_prefetchw(&buckets[v1]); + + sa_sint_t p2 = SA[i - 0]; d += (p2 < 0); p2 &= SAINT_MAX; sa_sint_t v2 = BUCKETS_INDEX4(T[p2 - 1], T[p2 - 2] > T[p2 - 1]); + SA[--buckets[v2]] = (p2 - 1) | ((sa_sint_t)(buckets[2 + v2] != d) << (SAINT_BIT - 1)); buckets[2 + v2] = d; + + sa_sint_t p3 = SA[i - 1]; d += (p3 < 0); p3 &= SAINT_MAX; sa_sint_t v3 = BUCKETS_INDEX4(T[p3 - 1], T[p3 - 2] > T[p3 - 1]); + SA[--buckets[v3]] = (p3 - 1) | ((sa_sint_t)(buckets[2 + v3] != d) << (SAINT_BIT - 1)); buckets[2 + v3] = d; + } + + for (j -= 2 * prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; d += (p < 0); p &= SAINT_MAX; sa_sint_t v = BUCKETS_INDEX4(T[p - 1], T[p - 2] > T[p - 1]); + SA[--buckets[v]] = (p - 1) | ((sa_sint_t)(buckets[2 + v] != d) << (SAINT_BIT - 1)); buckets[2 + v] = d; + } + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[3 * k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 3 * prefetch_distance]); + + sa_sint_t s0 = SA[i - 2 * prefetch_distance - 0]; const sa_sint_t * Ts0 = &T[s0 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - 2 * prefetch_distance - 1]; const sa_sint_t * Ts1 = &T[s1 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + sa_sint_t s2 = SA[i - 1 * prefetch_distance - 0]; if (s2 > 0) { const fast_sint_t Ts2 = T[(s2 & ~SUFFIX_GROUP_MARKER) - 1]; libsais_prefetchw(&induction_bucket[Ts2]); libsais_prefetchw(&distinct_names[BUCKETS_INDEX2(Ts2, 0)]); } + sa_sint_t s3 = SA[i - 1 * prefetch_distance - 1]; if (s3 > 0) { const fast_sint_t Ts3 = T[(s3 & ~SUFFIX_GROUP_MARKER) - 1]; libsais_prefetchw(&induction_bucket[Ts3]); libsais_prefetchw(&distinct_names[BUCKETS_INDEX2(Ts3, 0)]); } + + sa_sint_t p0 = SA[i - 0]; + if (p0 > 0) + { + SA[i - 0] = 0; d += (p0 >> (SUFFIX_GROUP_BIT - 1)); p0 &= ~SUFFIX_GROUP_MARKER; sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] > T[p0 - 1]); + SA[--induction_bucket[T[p0 - 1]]] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v0] = d; + } + + sa_sint_t p1 = SA[i - 1]; + if (p1 > 0) + { + SA[i - 1] = 0; d += (p1 >> (SUFFIX_GROUP_BIT - 1)); p1 &= ~SUFFIX_GROUP_MARKER; sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] > T[p1 - 1]); + SA[--induction_bucket[T[p1 - 1]]] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v1] = d; + } + } + + for (j -= 2 * prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; + if (p > 0) + { + SA[i] = 0; d += (p >> (SUFFIX_GROUP_BIT - 1)); p &= ~SUFFIX_GROUP_MARKER; sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]); + SA[--induction_bucket[T[p - 1]]] = (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v] = d; + } + } + + return d; +} + +static void libsais_partial_sorting_scan_right_to_left_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 3 * prefetch_distance]); + + sa_sint_t s0 = SA[i - 2 * prefetch_distance - 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - 2 * prefetch_distance - 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + sa_sint_t s2 = SA[i - 1 * prefetch_distance - 0]; if (s2 > 0) { libsais_prefetchw(&induction_bucket[T[s2 - 1]]); libsais_prefetch(&T[s2] - 2); } + sa_sint_t s3 = SA[i - 1 * prefetch_distance - 1]; if (s3 > 0) { libsais_prefetchw(&induction_bucket[T[s3 - 1]]); libsais_prefetch(&T[s3] - 2); } + + sa_sint_t p0 = SA[i - 0]; if (p0 > 0) { SA[i - 0] = 0; SA[--induction_bucket[T[p0 - 1]]] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i - 1]; if (p1 > 0) { SA[i - 1] = 0; SA[--induction_bucket[T[p1 - 1]]] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1)); } + } + + for (j -= 2 * prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; if (p > 0) { SA[i] = 0; SA[--induction_bucket[T[p - 1]]] = (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1)); } + } +} + +#if defined(_OPENMP) + +static void libsais_partial_sorting_scan_right_to_left_32s_6k_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 2); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 1); + libsais_prefetch(&T[SA[i + prefetch_distance + 1] & SAINT_MAX] - 2); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t p0 = cache[i + 0].index = SA[i + 0]; sa_sint_t symbol0 = 0; p0 &= SAINT_MAX; if (p0 != 0) { symbol0 = BUCKETS_INDEX4(T[p0 - 1], T[p0 - 2] > T[p0 - 1]); } cache[i + 0].symbol = symbol0; + sa_sint_t p1 = cache[i + 1].index = SA[i + 1]; sa_sint_t symbol1 = 0; p1 &= SAINT_MAX; if (p1 != 0) { symbol1 = BUCKETS_INDEX4(T[p1 - 1], T[p1 - 2] > T[p1 - 1]); } cache[i + 1].symbol = symbol1; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = cache[i].index = SA[i]; sa_sint_t symbol = 0; p &= SAINT_MAX; if (p != 0) { symbol = BUCKETS_INDEX4(T[p - 1], T[p - 2] > T[p - 1]); } cache[i].symbol = symbol; + } +} + +static void libsais_partial_sorting_scan_right_to_left_32s_4k_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1 & ~SUFFIX_GROUP_MARKER] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t symbol0 = SAINT_MIN, p0 = SA[i + 0]; if (p0 > 0) { SA[i + 0] = 0; cache[i + 0].index = p0; p0 &= ~SUFFIX_GROUP_MARKER; symbol0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] > T[p0 - 1]); } cache[i + 0].symbol = symbol0; + sa_sint_t symbol1 = SAINT_MIN, p1 = SA[i + 1]; if (p1 > 0) { SA[i + 1] = 0; cache[i + 1].index = p1; p1 &= ~SUFFIX_GROUP_MARKER; symbol1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] > T[p1 - 1]); } cache[i + 1].symbol = symbol1; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t symbol = SAINT_MIN, p = SA[i]; if (p > 0) { SA[i] = 0; cache[i].index = p; p &= ~SUFFIX_GROUP_MARKER; symbol = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]); } cache[i].symbol = symbol; + } +} + +static void libsais_partial_sorting_scan_right_to_left_32s_1k_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t symbol0 = SAINT_MIN, p0 = SA[i + 0]; if (p0 > 0) { SA[i + 0] = 0; cache[i + 0].index = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1)); symbol0 = T[p0 - 1]; } cache[i + 0].symbol = symbol0; + sa_sint_t symbol1 = SAINT_MIN, p1 = SA[i + 1]; if (p1 > 0) { SA[i + 1] = 0; cache[i + 1].index = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1)); symbol1 = T[p1 - 1]; } cache[i + 1].symbol = symbol1; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t symbol = SAINT_MIN, p = SA[i]; if (p > 0) { SA[i] = 0; cache[i].index = (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1)); symbol = T[p - 1]; } cache[i].symbol = symbol; + } +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&cache[i - 2 * prefetch_distance]); + + libsais_prefetchw(&buckets[cache[i - prefetch_distance - 0].symbol]); + libsais_prefetchw(&buckets[cache[i - prefetch_distance - 1].symbol]); + + sa_sint_t v0 = cache[i - 0].symbol, p0 = cache[i - 0].index; d += (p0 < 0); cache[i - 0].symbol = --buckets[v0]; cache[i - 0].index = (p0 - 1) | ((sa_sint_t)(buckets[2 + v0] != d) << (SAINT_BIT - 1)); buckets[2 + v0] = d; + if (cache[i - 0].symbol >= omp_block_start) { sa_sint_t s = cache[i - 0].symbol, q = (cache[s].index = cache[i - 0].index) & SAINT_MAX; cache[s].symbol = BUCKETS_INDEX4(T[q - 1], T[q - 2] > T[q - 1]); } + + sa_sint_t v1 = cache[i - 1].symbol, p1 = cache[i - 1].index; d += (p1 < 0); cache[i - 1].symbol = --buckets[v1]; cache[i - 1].index = (p1 - 1) | ((sa_sint_t)(buckets[2 + v1] != d) << (SAINT_BIT - 1)); buckets[2 + v1] = d; + if (cache[i - 1].symbol >= omp_block_start) { sa_sint_t s = cache[i - 1].symbol, q = (cache[s].index = cache[i - 1].index) & SAINT_MAX; cache[s].symbol = BUCKETS_INDEX4(T[q - 1], T[q - 2] > T[q - 1]); } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t v = cache[i].symbol, p = cache[i].index; d += (p < 0); cache[i].symbol = --buckets[v]; cache[i].index = (p - 1) | ((sa_sint_t)(buckets[2 + v] != d) << (SAINT_BIT - 1)); buckets[2 + v] = d; + if (cache[i].symbol >= omp_block_start) { sa_sint_t s = cache[i].symbol, q = (cache[s].index = cache[i].index) & SAINT_MAX; cache[s].symbol = BUCKETS_INDEX4(T[q - 1], T[q - 2] > T[q - 1]); } + } + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT induction_bucket = &buckets[3 * k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&cache[i - 2 * prefetch_distance]); + + sa_sint_t s0 = cache[i - prefetch_distance - 0].symbol; const sa_sint_t * Is0 = &induction_bucket[s0 >> 1]; libsais_prefetchw(s0 >= 0 ? Is0 : NULL); const sa_sint_t * Ds0 = &distinct_names[s0]; libsais_prefetchw(s0 >= 0 ? Ds0 : NULL); + sa_sint_t s1 = cache[i - prefetch_distance - 1].symbol; const sa_sint_t * Is1 = &induction_bucket[s1 >> 1]; libsais_prefetchw(s1 >= 0 ? Is1 : NULL); const sa_sint_t * Ds1 = &distinct_names[s1]; libsais_prefetchw(s1 >= 0 ? Ds1 : NULL); + + sa_sint_t v0 = cache[i - 0].symbol; + if (v0 >= 0) + { + sa_sint_t p0 = cache[i - 0].index; d += (p0 >> (SUFFIX_GROUP_BIT - 1)); cache[i - 0].symbol = --induction_bucket[v0 >> 1]; cache[i - 0].index = (p0 - 1) | (v0 << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v0] = d; + if (cache[i - 0].symbol >= omp_block_start) { sa_sint_t ni = cache[i - 0].symbol, np = cache[i - 0].index; if (np > 0) { cache[i - 0].index = 0; cache[ni].index = np; np &= ~SUFFIX_GROUP_MARKER; cache[ni].symbol = BUCKETS_INDEX2(T[np - 1], T[np - 2] > T[np - 1]); } } + } + + sa_sint_t v1 = cache[i - 1].symbol; + if (v1 >= 0) + { + sa_sint_t p1 = cache[i - 1].index; d += (p1 >> (SUFFIX_GROUP_BIT - 1)); cache[i - 1].symbol = --induction_bucket[v1 >> 1]; cache[i - 1].index = (p1 - 1) | (v1 << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v1] = d; + if (cache[i - 1].symbol >= omp_block_start) { sa_sint_t ni = cache[i - 1].symbol, np = cache[i - 1].index; if (np > 0) { cache[i - 1].index = 0; cache[ni].index = np; np &= ~SUFFIX_GROUP_MARKER; cache[ni].symbol = BUCKETS_INDEX2(T[np - 1], T[np - 2] > T[np - 1]); } } + } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t v = cache[i].symbol; + if (v >= 0) + { + sa_sint_t p = cache[i].index; d += (p >> (SUFFIX_GROUP_BIT - 1)); cache[i].symbol = --induction_bucket[v >> 1]; cache[i].index = (p - 1) | (v << (SAINT_BIT - 1)) | ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1)); distinct_names[v] = d; + if (cache[i].symbol >= omp_block_start) { sa_sint_t ni = cache[i].symbol, np = cache[i].index; if (np > 0) { cache[i].index = 0; cache[ni].index = np; np &= ~SUFFIX_GROUP_MARKER; cache[ni].symbol = BUCKETS_INDEX2(T[np - 1], T[np - 2] > T[np - 1]); } } + } + } + + return d; +} + +static void libsais_partial_sorting_scan_right_to_left_32s_1k_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&cache[i - 2 * prefetch_distance]); + + sa_sint_t s0 = cache[i - prefetch_distance - 0].symbol; const sa_sint_t * Is0 = &induction_bucket[s0]; libsais_prefetchw(s0 >= 0 ? Is0 : NULL); + sa_sint_t s1 = cache[i - prefetch_distance - 1].symbol; const sa_sint_t * Is1 = &induction_bucket[s1]; libsais_prefetchw(s1 >= 0 ? Is1 : NULL); + + sa_sint_t v0 = cache[i - 0].symbol; + if (v0 >= 0) + { + cache[i - 0].symbol = --induction_bucket[v0]; + if (cache[i - 0].symbol >= omp_block_start) { sa_sint_t ni = cache[i - 0].symbol, np = cache[i - 0].index; if (np > 0) { cache[i - 0].index = 0; cache[ni].index = (np - 1) | ((sa_sint_t)(T[np - 2] > T[np - 1]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np - 1]; } } + } + + sa_sint_t v1 = cache[i - 1].symbol; + if (v1 >= 0) + { + cache[i - 1].symbol = --induction_bucket[v1]; + if (cache[i - 1].symbol >= omp_block_start) { sa_sint_t ni = cache[i - 1].symbol, np = cache[i - 1].index; if (np > 0) { cache[i - 1].index = 0; cache[ni].index = (np - 1) | ((sa_sint_t)(T[np - 2] > T[np - 1]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np - 1]; }} + } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t v = cache[i].symbol; + if (v >= 0) + { + cache[i].symbol = --induction_bucket[v]; + if (cache[i].symbol >= omp_block_start) { sa_sint_t ni = cache[i].symbol, np = cache[i].index; if (np > 0) { cache[i].index = 0; cache[ni].index = (np - 1) | ((sa_sint_t)(T[np - 2] > T[np - 1]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np - 1]; } } + } + } +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + d = libsais_partial_sorting_scan_right_to_left_32s_6k(T, SA, buckets, d, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_right_to_left_32s_6k_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + d = libsais_partial_sorting_scan_right_to_left_32s_6k_block_sort(T, buckets, d, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + d = libsais_partial_sorting_scan_right_to_left_32s_4k(T, SA, k, buckets, d, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_right_to_left_32s_4k_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + d = libsais_partial_sorting_scan_right_to_left_32s_4k_block_sort(T, k, buckets, d, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_compact_and_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } + + return d; +} + +static void libsais_partial_sorting_scan_right_to_left_32s_1k_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_partial_sorting_scan_right_to_left_32s_1k(T, SA, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_partial_sorting_scan_right_to_left_32s_1k_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + libsais_partial_sorting_scan_right_to_left_32s_1k_block_sort(T, buckets, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_compact_and_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } +} + +#endif + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + fast_sint_t scan_start = (fast_sint_t)left_suffixes_count + 1; + fast_sint_t scan_end = (fast_sint_t)n - (fast_sint_t)first_lms_suffix; + + if (threads == 1 || (scan_end - scan_start) < 65536) + { + d = libsais_partial_sorting_scan_right_to_left_32s_6k(T, SA, buckets, d, scan_start, scan_end - scan_start); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = scan_end - 1; block_start >= scan_start; block_start = block_end) + { + block_end = block_start - (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end < scan_start) { block_end = scan_start - 1; } + + d = libsais_partial_sorting_scan_right_to_left_32s_6k_block_omp(T, SA, buckets, d, thread_state[0].state.cache, block_end + 1, block_start - block_end, threads); + } + } +#else + UNUSED(thread_state); +#endif + + return d; +} + +static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || n < 65536) + { + d = libsais_partial_sorting_scan_right_to_left_32s_4k(T, SA, k, buckets, d, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = (fast_sint_t)n - 1; block_start >= 0; block_start = block_end) + { + block_end = block_start - (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end < 0) { block_end = -1; } + + d = libsais_partial_sorting_scan_right_to_left_32s_4k_block_omp(T, SA, k, buckets, d, thread_state[0].state.cache, block_end + 1, block_start - block_end, threads); + } + } +#else + UNUSED(thread_state); +#endif + + return d; +} + +static void libsais_partial_sorting_scan_right_to_left_32s_1k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || n < 65536) + { + libsais_partial_sorting_scan_right_to_left_32s_1k(T, SA, buckets, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = (fast_sint_t)n - 1; block_start >= 0; block_start = block_end) + { + block_end = block_start - (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end < 0) { block_end = -1; } + + libsais_partial_sorting_scan_right_to_left_32s_1k_block_omp(T, SA, buckets, thread_state[0].state.cache, block_end + 1, block_start - block_end, threads); + } + } +#else + UNUSED(thread_state); +#endif +} + +static fast_sint_t libsais_partial_sorting_gather_lms_suffixes_32s_4k(sa_sint_t * RESTRICT SA, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j, l; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j; i += 4) + { + libsais_prefetch(&SA[i + prefetch_distance]); + + sa_sint_t s0 = SA[i + 0]; SA[l] = (s0 - SUFFIX_GROUP_MARKER) & (~SUFFIX_GROUP_MARKER); l += (s0 < 0); + sa_sint_t s1 = SA[i + 1]; SA[l] = (s1 - SUFFIX_GROUP_MARKER) & (~SUFFIX_GROUP_MARKER); l += (s1 < 0); + sa_sint_t s2 = SA[i + 2]; SA[l] = (s2 - SUFFIX_GROUP_MARKER) & (~SUFFIX_GROUP_MARKER); l += (s2 < 0); + sa_sint_t s3 = SA[i + 3]; SA[l] = (s3 - SUFFIX_GROUP_MARKER) & (~SUFFIX_GROUP_MARKER); l += (s3 < 0); + } + + for (j += 3; i < j; i += 1) + { + sa_sint_t s = SA[i]; SA[l] = (s - SUFFIX_GROUP_MARKER) & (~SUFFIX_GROUP_MARKER); l += (s < 0); + } + + return l; +} + +static fast_sint_t libsais_partial_sorting_gather_lms_suffixes_32s_1k(sa_sint_t * RESTRICT SA, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j, l; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j; i += 4) + { + libsais_prefetch(&SA[i + prefetch_distance]); + + sa_sint_t s0 = SA[i + 0]; SA[l] = s0 & SAINT_MAX; l += (s0 < 0); + sa_sint_t s1 = SA[i + 1]; SA[l] = s1 & SAINT_MAX; l += (s1 < 0); + sa_sint_t s2 = SA[i + 2]; SA[l] = s2 & SAINT_MAX; l += (s2 < 0); + sa_sint_t s3 = SA[i + 3]; SA[l] = s3 & SAINT_MAX; l += (s3 < 0); + } + + for (j += 3; i < j; i += 1) + { + sa_sint_t s = SA[i]; SA[l] = s & SAINT_MAX; l += (s < 0); + } + + return l; +} + +static void libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_partial_sorting_gather_lms_suffixes_32s_4k(SA, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.position = omp_block_start; + thread_state[omp_thread_num].state.count = libsais_partial_sorting_gather_lms_suffixes_32s_4k(SA, omp_block_start, omp_block_size) - omp_block_start; + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t, position = 0; + for (t = 0; t < omp_num_threads; ++t) + { + if (t > 0 && thread_state[t].state.count > 0) + { + memmove(&SA[position], &SA[thread_state[t].state.position], (size_t)thread_state[t].state.count * sizeof(sa_sint_t)); + } + + position += thread_state[t].state.count; + } + } + } +#endif + } +} + +static void libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_partial_sorting_gather_lms_suffixes_32s_1k(SA, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.position = omp_block_start; + thread_state[omp_thread_num].state.count = libsais_partial_sorting_gather_lms_suffixes_32s_1k(SA, omp_block_start, omp_block_size) - omp_block_start; + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t, position = 0; + for (t = 0; t < omp_num_threads; ++t) + { + if (t > 0 && thread_state[t].state.count > 0) + { + memmove(&SA[position], &SA[thread_state[t].state.position], (size_t)thread_state[t].state.count * sizeof(sa_sint_t)); + } + + position += thread_state[t].state.count; + } + } + } +#endif + } +} + +static void libsais_induce_partial_order_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + memset(&buckets[2 * ALPHABET_SIZE], 0, 2 * ALPHABET_SIZE * sizeof(sa_sint_t)); + + sa_sint_t d = libsais_partial_sorting_scan_left_to_right_8u_omp(T, SA, n, buckets, left_suffixes_count, 0, threads, thread_state); + libsais_partial_sorting_shift_markers_8u_omp(SA, n, buckets, threads); + libsais_partial_sorting_scan_right_to_left_8u_omp(T, SA, n, buckets, first_lms_suffix, left_suffixes_count, d, threads, thread_state); +} + +static void libsais_induce_partial_order_32s_6k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t d = libsais_partial_sorting_scan_left_to_right_32s_6k_omp(T, SA, n, buckets, left_suffixes_count, 0, threads, thread_state); + libsais_partial_sorting_shift_markers_32s_6k_omp(SA, k, buckets, threads); + libsais_partial_sorting_shift_buckets_32s_6k(k, buckets); + libsais_partial_sorting_scan_right_to_left_32s_6k_omp(T, SA, n, buckets, first_lms_suffix, left_suffixes_count, d, threads, thread_state); +} + +static void libsais_induce_partial_order_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t)); + + sa_sint_t d = libsais_partial_sorting_scan_left_to_right_32s_4k_omp(T, SA, n, k, buckets, 0, threads, thread_state); + libsais_partial_sorting_shift_markers_32s_4k(SA, n); + libsais_partial_sorting_scan_right_to_left_32s_4k_omp(T, SA, n, k, buckets, d, threads, thread_state); + libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(SA, n, threads, thread_state); +} + +static void libsais_induce_partial_order_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_partial_sorting_scan_left_to_right_32s_1k_omp(T, SA, n, &buckets[1 * k], threads, thread_state); + libsais_partial_sorting_scan_right_to_left_32s_1k_omp(T, SA, n, &buckets[0 * k], threads, thread_state); + libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(SA, n, threads, thread_state); +} + +static void libsais_induce_partial_order_32s_1k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_count_suffixes_32s(T, n, k, buckets); + libsais_initialize_buckets_start_32s_1k(k, buckets); + libsais_partial_sorting_scan_left_to_right_32s_1k_omp(T, SA, n, buckets, threads, thread_state); + + libsais_count_suffixes_32s(T, n, k, buckets); + libsais_initialize_buckets_end_32s_1k(k, buckets); + libsais_partial_sorting_scan_right_to_left_32s_1k_omp(T, SA, n, buckets, threads, thread_state); + + libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(SA, n, threads, thread_state); +} + +static sa_sint_t libsais_renumber_lms_suffixes_8u(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t name, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAm = &SA[m]; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 0] & SAINT_MAX) >> 1]); + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 1] & SAINT_MAX) >> 1]); + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 2] & SAINT_MAX) >> 1]); + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 3] & SAINT_MAX) >> 1]); + + sa_sint_t p0 = SA[i + 0]; SAm[(p0 & SAINT_MAX) >> 1] = name | SAINT_MIN; name += p0 < 0; + sa_sint_t p1 = SA[i + 1]; SAm[(p1 & SAINT_MAX) >> 1] = name | SAINT_MIN; name += p1 < 0; + sa_sint_t p2 = SA[i + 2]; SAm[(p2 & SAINT_MAX) >> 1] = name | SAINT_MIN; name += p2 < 0; + sa_sint_t p3 = SA[i + 3]; SAm[(p3 & SAINT_MAX) >> 1] = name | SAINT_MIN; name += p3 < 0; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + sa_sint_t p = SA[i]; SAm[(p & SAINT_MAX) >> 1] = name | SAINT_MIN; name += p < 0; + } + + return name; +} + +static fast_sint_t libsais_gather_marked_suffixes_8u(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t l, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + l -= 1; + + fast_sint_t i, j; + for (i = (fast_sint_t)m + omp_block_start + omp_block_size - 1, j = (fast_sint_t)m + omp_block_start + 3; i >= j; i -= 4) + { + libsais_prefetch(&SA[i - prefetch_distance]); + + sa_sint_t s0 = SA[i - 0]; SA[l] = s0 & SAINT_MAX; l -= s0 < 0; + sa_sint_t s1 = SA[i - 1]; SA[l] = s1 & SAINT_MAX; l -= s1 < 0; + sa_sint_t s2 = SA[i - 2]; SA[l] = s2 & SAINT_MAX; l -= s2 < 0; + sa_sint_t s3 = SA[i - 3]; SA[l] = s3 & SAINT_MAX; l -= s3 < 0; + } + + for (j -= 3; i >= j; i -= 1) + { + sa_sint_t s = SA[i]; SA[l] = s & SAINT_MAX; l -= s < 0; + } + + l += 1; + + return l; +} + +static sa_sint_t libsais_renumber_lms_suffixes_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t name = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && m >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start; + + if (omp_num_threads == 1) + { + name = libsais_renumber_lms_suffixes_8u(SA, m, 0, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_count_negative_marked_suffixes(SA, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t t, count = 0; for (t = 0; t < omp_thread_num; ++t) { count += thread_state[t].state.count; } + + if (omp_thread_num == omp_num_threads - 1) + { + name = (sa_sint_t)(count + thread_state[omp_thread_num].state.count); + } + + libsais_renumber_lms_suffixes_8u(SA, m, (sa_sint_t)count, omp_block_start, omp_block_size); + } + } +#endif + } + + return name; +} + +static void libsais_gather_marked_lms_suffixes_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 131072) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (((fast_sint_t)n >> 1) / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : ((fast_sint_t)n >> 1) - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_gather_marked_suffixes_8u(SA, m, (fast_sint_t)n + (fast_sint_t)fs, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + if (omp_thread_num < omp_num_threads - 1) + { + thread_state[omp_thread_num].state.position = libsais_gather_marked_suffixes_8u(SA, m, (fast_sint_t)m + omp_block_start + omp_block_size, omp_block_start, omp_block_size); + thread_state[omp_thread_num].state.count = (fast_sint_t)m + omp_block_start + omp_block_size - thread_state[omp_thread_num].state.position; + } + else + { + thread_state[omp_thread_num].state.position = libsais_gather_marked_suffixes_8u(SA, m, (fast_sint_t)n + (fast_sint_t)fs, omp_block_start, omp_block_size); + thread_state[omp_thread_num].state.count = (fast_sint_t)n + (fast_sint_t)fs - thread_state[omp_thread_num].state.position; + } + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t, position = (fast_sint_t)n + (fast_sint_t)fs; + + for (t = omp_num_threads - 1; t >= 0; --t) + { + position -= thread_state[t].state.count; + if (t != omp_num_threads - 1 && thread_state[t].state.count > 0) + { + memmove(&SA[position], &SA[thread_state[t].state.position], (size_t)thread_state[t].state.count * sizeof(sa_sint_t)); + } + } + } + } +#endif + } +} + +static sa_sint_t libsais_renumber_and_gather_lms_suffixes_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + memset(&SA[m], 0, ((size_t)n >> 1) * sizeof(sa_sint_t)); + + sa_sint_t name = libsais_renumber_lms_suffixes_8u_omp(SA, m, threads, thread_state); + if (name < m) + { + libsais_gather_marked_lms_suffixes_8u_omp(SA, n, m, fs, threads, thread_state); + } + else + { + fast_sint_t i; for (i = 0; i < m; i += 1) { SA[i] &= SAINT_MAX; } + } + + return name; +} + +static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t name, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAm = &SA[m]; + + fast_sint_t i, j; sa_sint_t p0, p1, p2, p3 = 0; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 0] & SAINT_MAX) >> 1]); + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 1] & SAINT_MAX) >> 1]); + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 2] & SAINT_MAX) >> 1]); + libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 3] & SAINT_MAX) >> 1]); + + p0 = SA[i + 0]; SAm[(SA[i + 0] = p0 & SAINT_MAX) >> 1] = name | (p0 & p3 & SAINT_MIN); name += p0 < 0; + p1 = SA[i + 1]; SAm[(SA[i + 1] = p1 & SAINT_MAX) >> 1] = name | (p1 & p0 & SAINT_MIN); name += p1 < 0; + p2 = SA[i + 2]; SAm[(SA[i + 2] = p2 & SAINT_MAX) >> 1] = name | (p2 & p1 & SAINT_MIN); name += p2 < 0; + p3 = SA[i + 3]; SAm[(SA[i + 3] = p3 & SAINT_MAX) >> 1] = name | (p3 & p2 & SAINT_MIN); name += p3 < 0; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + p2 = p3; p3 = SA[i]; SAm[(SA[i] = p3 & SAINT_MAX) >> 1] = name | (p3 & p2 & SAINT_MIN); name += p3 < 0; + } + + return name; +} + +static void libsais_mark_distinct_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; sa_sint_t p0, p1, p2, p3 = 0; + for (i = (fast_sint_t)m + omp_block_start, j = (fast_sint_t)m + omp_block_start + omp_block_size - 3; i < j; i += 4) + { + libsais_prefetchw(&SA[i + prefetch_distance]); + + p0 = SA[i + 0]; SA[i + 0] = p0 & (p3 | SAINT_MAX); p0 = (p0 == 0) ? p3 : p0; + p1 = SA[i + 1]; SA[i + 1] = p1 & (p0 | SAINT_MAX); p1 = (p1 == 0) ? p0 : p1; + p2 = SA[i + 2]; SA[i + 2] = p2 & (p1 | SAINT_MAX); p2 = (p2 == 0) ? p1 : p2; + p3 = SA[i + 3]; SA[i + 3] = p3 & (p2 | SAINT_MAX); p3 = (p3 == 0) ? p2 : p3; + } + + for (j += 3; i < j; i += 1) + { + p2 = p3; p3 = SA[i]; SA[i] = p3 & (p2 | SAINT_MAX); p3 = (p3 == 0) ? p2 : p3; + } +} + +static void libsais_clamp_lms_suffixes_length_32s(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAm = &SA[m]; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 3; i < j; i += 4) + { + libsais_prefetchw(&SAm[i + prefetch_distance]); + + SAm[i + 0] = (SAm[i + 0] < 0 ? SAm[i + 0] : 0) & SAINT_MAX; + SAm[i + 1] = (SAm[i + 1] < 0 ? SAm[i + 1] : 0) & SAINT_MAX; + SAm[i + 2] = (SAm[i + 2] < 0 ? SAm[i + 2] : 0) & SAINT_MAX; + SAm[i + 3] = (SAm[i + 3] < 0 ? SAm[i + 3] : 0) & SAINT_MAX; + } + + for (j += 3; i < j; i += 1) + { + SAm[i] = (SAm[i] < 0 ? SAm[i] : 0) & SAINT_MAX; + } +} + +static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t name = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && m >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start; + + if (omp_num_threads == 1) + { + name = libsais_renumber_distinct_lms_suffixes_32s_4k(SA, m, 1, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_count_negative_marked_suffixes(SA, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t t, count = 1; for (t = 0; t < omp_thread_num; ++t) { count += thread_state[t].state.count; } + + if (omp_thread_num == omp_num_threads - 1) + { + name = (sa_sint_t)(count + thread_state[omp_thread_num].state.count); + } + + libsais_renumber_distinct_lms_suffixes_32s_4k(SA, m, (sa_sint_t)count, omp_block_start, omp_block_size); + } + } +#endif + } + + return name - 1; +} + +static void libsais_mark_distinct_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 131072) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + fast_sint_t omp_block_stride = (((fast_sint_t)n >> 1) / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : ((fast_sint_t)n >> 1) - omp_block_start; +#else + UNUSED(threads); + + fast_sint_t omp_block_start = 0; + fast_sint_t omp_block_size = (fast_sint_t)n >> 1; +#endif + libsais_mark_distinct_lms_suffixes_32s(SA, m, omp_block_start, omp_block_size); + } +} + +static void libsais_clamp_lms_suffixes_length_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 131072) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + fast_sint_t omp_block_stride = (((fast_sint_t)n >> 1) / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : ((fast_sint_t)n >> 1) - omp_block_start; +#else + UNUSED(threads); + + fast_sint_t omp_block_start = 0; + fast_sint_t omp_block_size = (fast_sint_t)n >> 1; +#endif + libsais_clamp_lms_suffixes_length_32s(SA, m, omp_block_start, omp_block_size); + } +} + +static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + memset(&SA[m], 0, ((size_t)n >> 1) * sizeof(sa_sint_t)); + + sa_sint_t name = libsais_renumber_distinct_lms_suffixes_32s_4k_omp(SA, m, threads, thread_state); + if (name < m) + { + libsais_mark_distinct_lms_suffixes_32s_omp(SA, n, m, threads); + } + + return name; +} + +static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAm = &SA[m]; + + { + libsais_gather_lms_suffixes_32s(T, SA, n); + + memset(&SA[m], 0, ((size_t)n - (size_t)m - (size_t)m) * sizeof(sa_sint_t)); + + fast_sint_t i, j; + for (i = (fast_sint_t)n - (fast_sint_t)m, j = (fast_sint_t)n - 1 - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 0]) >> 1]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 1]) >> 1]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 2]) >> 1]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 3]) >> 1]); + + SAm[((sa_uint_t)SA[i + 0]) >> 1] = SA[i + 1] - SA[i + 0] + 1 + SAINT_MIN; + SAm[((sa_uint_t)SA[i + 1]) >> 1] = SA[i + 2] - SA[i + 1] + 1 + SAINT_MIN; + SAm[((sa_uint_t)SA[i + 2]) >> 1] = SA[i + 3] - SA[i + 2] + 1 + SAINT_MIN; + SAm[((sa_uint_t)SA[i + 3]) >> 1] = SA[i + 4] - SA[i + 3] + 1 + SAINT_MIN; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + SAm[((sa_uint_t)SA[i]) >> 1] = SA[i + 1] - SA[i] + 1 + SAINT_MIN; + } + + SAm[((sa_uint_t)SA[n - 1]) >> 1] = 1 + SAINT_MIN; + } + + { + libsais_clamp_lms_suffixes_length_32s_omp(SA, n, m, threads); + } + + sa_sint_t name = 1; + + { + fast_sint_t i, j, p = SA[0], plen = SAm[p >> 1]; sa_sint_t pdiff = SAINT_MIN; + for (i = 1, j = m - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 0]) >> 1]); libsais_prefetch(&T[((sa_uint_t)SA[i + prefetch_distance + 0])]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 1]) >> 1]); libsais_prefetch(&T[((sa_uint_t)SA[i + prefetch_distance + 1])]); + + fast_sint_t q = SA[i + 0], qlen = SAm[q >> 1]; sa_sint_t qdiff = SAINT_MIN; + if (plen == qlen) { fast_sint_t l = 0; do { if (T[p + l] != T[q + l]) { break; } } while (++l < qlen); qdiff = (sa_sint_t)(l - qlen) & SAINT_MIN; } + SAm[p >> 1] = name | (pdiff & qdiff); name += (qdiff < 0); + + p = SA[i + 1]; plen = SAm[p >> 1]; pdiff = SAINT_MIN; + if (qlen == plen) { fast_sint_t l = 0; do { if (T[q + l] != T[p + l]) { break; } } while (++l < plen); pdiff = (sa_sint_t)(l - plen) & SAINT_MIN; } + SAm[q >> 1] = name | (qdiff & pdiff); name += (pdiff < 0); + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + fast_sint_t q = SA[i], qlen = SAm[q >> 1]; sa_sint_t qdiff = SAINT_MIN; + if (plen == qlen) { fast_sint_t l = 0; do { if (T[p + l] != T[q + l]) { break; } } while (++l < plen); qdiff = (sa_sint_t)(l - plen) & SAINT_MIN; } + SAm[p >> 1] = name | (pdiff & qdiff); name += (qdiff < 0); + + p = q; plen = qlen; pdiff = qdiff; + } + + SAm[p >> 1] = name | pdiff; name++; + } + + if (name <= m) + { + libsais_mark_distinct_lms_suffixes_32s_omp(SA, n, m, threads); + } + + return name - 1; +} + +static void libsais_reconstruct_lms_suffixes(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + const sa_sint_t * RESTRICT SAnm = &SA[n - m]; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&SAnm[SA[i + prefetch_distance + 0]]); + libsais_prefetch(&SAnm[SA[i + prefetch_distance + 1]]); + libsais_prefetch(&SAnm[SA[i + prefetch_distance + 2]]); + libsais_prefetch(&SAnm[SA[i + prefetch_distance + 3]]); + + SA[i + 0] = SAnm[SA[i + 0]]; + SA[i + 1] = SAnm[SA[i + 1]]; + SA[i + 2] = SAnm[SA[i + 2]]; + SA[i + 3] = SAnm[SA[i + 3]]; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + SA[i] = SAnm[SA[i]]; + } +} + +static void libsais_reconstruct_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && m >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start; +#else + UNUSED(threads); + + fast_sint_t omp_block_start = 0; + fast_sint_t omp_block_size = m; +#endif + + libsais_reconstruct_lms_suffixes(SA, n, m, omp_block_start, omp_block_size); + } +} + +static void libsais_place_lms_suffixes_interval_8u(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, const sa_sint_t * RESTRICT buckets) +{ + const sa_sint_t * RESTRICT bucket_end = &buckets[7 * ALPHABET_SIZE]; + + fast_sint_t c, j = n; + for (c = ALPHABET_SIZE - 2; c >= 0; --c) + { + fast_sint_t l = (fast_sint_t)buckets[BUCKETS_INDEX2(c, 1) + BUCKETS_INDEX2(1, 0)] - (fast_sint_t)buckets[BUCKETS_INDEX2(c, 1)]; + if (l > 0) + { + fast_sint_t i = bucket_end[c]; + if (j - i > 0) + { + memset(&SA[i], 0, (size_t)(j - i) * sizeof(sa_sint_t)); + } + + memmove(&SA[j = (i - l)], &SA[m -= (sa_sint_t)l], (size_t)l * sizeof(sa_sint_t)); + } + } + + memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t)); +} + +static void libsais_place_lms_suffixes_interval_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) +{ + const sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + + fast_sint_t c, j = n; + for (c = (fast_sint_t)k - 2; c >= 0; --c) + { + fast_sint_t l = (fast_sint_t)buckets[BUCKETS_INDEX2(c, 1) + BUCKETS_INDEX2(1, 0)] - (fast_sint_t)buckets[BUCKETS_INDEX2(c, 1)]; + if (l > 0) + { + fast_sint_t i = bucket_end[c]; + if (j - i > 0) + { + memset(&SA[i], 0, (size_t)(j - i) * sizeof(sa_sint_t)); + } + + memmove(&SA[j = (i - l)], &SA[m -= (sa_sint_t)l], (size_t)l * sizeof(sa_sint_t)); + } + } + + memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t)); +} + +static void libsais_place_lms_suffixes_interval_32s_2k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) +{ + fast_sint_t j = n; + + if (k > 1) + { + fast_sint_t c; + for (c = BUCKETS_INDEX2((fast_sint_t)k - 2, 0); c >= BUCKETS_INDEX2(0, 0); c -= BUCKETS_INDEX2(1, 0)) + { + fast_sint_t l = (fast_sint_t)buckets[c + BUCKETS_INDEX2(1, 1)] - (fast_sint_t)buckets[c + BUCKETS_INDEX2(0, 1)]; + if (l > 0) + { + fast_sint_t i = buckets[c]; + if (j - i > 0) + { + memset(&SA[i], 0, (size_t)(j - i) * sizeof(sa_sint_t)); + } + + memmove(&SA[j = (i - l)], &SA[m -= (sa_sint_t)l], (size_t)l * sizeof(sa_sint_t)); + } + } + } + + memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t)); +} + +static void libsais_place_lms_suffixes_interval_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k, sa_sint_t m, sa_sint_t * RESTRICT buckets) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t c = k - 1; fast_sint_t i, l = buckets[c]; + for (i = (fast_sint_t)m - 1; i >= prefetch_distance + 3; i -= 4) + { + libsais_prefetch(&SA[i - 2 * prefetch_distance]); + + libsais_prefetch(&T[SA[i - prefetch_distance - 0]]); + libsais_prefetch(&T[SA[i - prefetch_distance - 1]]); + libsais_prefetch(&T[SA[i - prefetch_distance - 2]]); + libsais_prefetch(&T[SA[i - prefetch_distance - 3]]); + + sa_sint_t p0 = SA[i - 0]; if (T[p0] != c) { c = T[p0]; memset(&SA[buckets[c]], 0, (size_t)(l - buckets[c]) * sizeof(sa_sint_t)); l = buckets[c]; } SA[--l] = p0; + sa_sint_t p1 = SA[i - 1]; if (T[p1] != c) { c = T[p1]; memset(&SA[buckets[c]], 0, (size_t)(l - buckets[c]) * sizeof(sa_sint_t)); l = buckets[c]; } SA[--l] = p1; + sa_sint_t p2 = SA[i - 2]; if (T[p2] != c) { c = T[p2]; memset(&SA[buckets[c]], 0, (size_t)(l - buckets[c]) * sizeof(sa_sint_t)); l = buckets[c]; } SA[--l] = p2; + sa_sint_t p3 = SA[i - 3]; if (T[p3] != c) { c = T[p3]; memset(&SA[buckets[c]], 0, (size_t)(l - buckets[c]) * sizeof(sa_sint_t)); l = buckets[c]; } SA[--l] = p3; + } + + for (; i >= 0; i -= 1) + { + sa_sint_t p = SA[i]; if (T[p] != c) { c = T[p]; memset(&SA[buckets[c]], 0, (size_t)(l - buckets[c]) * sizeof(sa_sint_t)); l = buckets[c]; } SA[--l] = p; + } + + memset(&SA[0], 0, (size_t)l * sizeof(sa_sint_t)); +} + +static void libsais_place_lms_suffixes_histogram_32s_6k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) +{ + const sa_sint_t * RESTRICT bucket_end = &buckets[5 * k]; + + fast_sint_t c, j = n; + for (c = (fast_sint_t)k - 2; c >= 0; --c) + { + fast_sint_t l = (fast_sint_t)buckets[BUCKETS_INDEX4(c, 1)]; + if (l > 0) + { + fast_sint_t i = bucket_end[c]; + if (j - i > 0) + { + memset(&SA[i], 0, (size_t)(j - i) * sizeof(sa_sint_t)); + } + + memmove(&SA[j = (i - l)], &SA[m -= (sa_sint_t)l], (size_t)l * sizeof(sa_sint_t)); + } + } + + memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t)); +} + +static void libsais_place_lms_suffixes_histogram_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) +{ + const sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + + fast_sint_t c, j = n; + for (c = (fast_sint_t)k - 2; c >= 0; --c) + { + fast_sint_t l = (fast_sint_t)buckets[BUCKETS_INDEX2(c, 1)]; + if (l > 0) + { + fast_sint_t i = bucket_end[c]; + if (j - i > 0) + { + memset(&SA[i], 0, (size_t)(j - i) * sizeof(sa_sint_t)); + } + + memmove(&SA[j = (i - l)], &SA[m -= (sa_sint_t)l], (size_t)l * sizeof(sa_sint_t)); + } + } + + memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t)); +} + +static void libsais_place_lms_suffixes_histogram_32s_2k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) +{ + fast_sint_t j = n; + + if (k > 1) + { + fast_sint_t c; + for (c = BUCKETS_INDEX2((fast_sint_t)k - 2, 0); c >= BUCKETS_INDEX2(0, 0); c -= BUCKETS_INDEX2(1, 0)) + { + fast_sint_t l = (fast_sint_t)buckets[c + BUCKETS_INDEX2(0, 1)]; + if (l > 0) + { + fast_sint_t i = buckets[c]; + if (j - i > 0) + { + memset(&SA[i], 0, (size_t)(j - i) * sizeof(sa_sint_t)); + } + + memmove(&SA[j = (i - l)], &SA[m -= (sa_sint_t)l], (size_t)l * sizeof(sa_sint_t)); + } + } + } + + memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t)); +} + +static void libsais_final_bwt_scan_left_to_right_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; SA[i + 0] = T[p0] | SAINT_MIN; SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; SA[i + 1] = T[p1] | SAINT_MIN; SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; SA[i] = T[p] | SAINT_MIN; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } +} + +static void libsais_final_bwt_aux_scan_left_to_right_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; SA[i + 0] = T[p0] | SAINT_MIN; SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); if ((p0 & rm) == 0) { I[p0 / (rm + 1)] = induction_bucket[T[p0]]; }} + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; SA[i + 1] = T[p1] | SAINT_MIN; SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); if ((p1 & rm) == 0) { I[p1 / (rm + 1)] = induction_bucket[T[p1]]; }} + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; SA[i] = T[p] | SAINT_MIN; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); if ((p & rm) == 0) { I[p / (rm + 1)] = induction_bucket[T[p]]; } } + } +} + +static void libsais_final_sorting_scan_left_to_right_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 ^ SAINT_MIN; if (p0 > 0) { p0--; SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 ^ SAINT_MIN; if (p1 > 0) { p1--; SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p ^ SAINT_MIN; if (p > 0) { p--; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } +} + +static void libsais_final_sorting_scan_left_to_right_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 3 * prefetch_distance]); + + sa_sint_t s0 = SA[i + 2 * prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + 2 * prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + sa_sint_t s2 = SA[i + 1 * prefetch_distance + 0]; if (s2 > 0) { libsais_prefetchw(&induction_bucket[T[s2 - 1]]); libsais_prefetch(&T[s2] - 2); } + sa_sint_t s3 = SA[i + 1 * prefetch_distance + 1]; if (s3 > 0) { libsais_prefetchw(&induction_bucket[T[s3 - 1]]); libsais_prefetch(&T[s3] - 2); } + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 ^ SAINT_MIN; if (p0 > 0) { p0--; SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 ^ SAINT_MIN; if (p1 > 0) { p1--; SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); } + } + + for (j += 2 * prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p ^ SAINT_MIN; if (p > 0) { p--; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } +} + +#if defined(_OPENMP) + +static fast_sint_t libsais_final_bwt_scan_left_to_right_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; SA[i + 0] = T[p0] | SAINT_MIN; buckets[cache[count].symbol = T[p0]]++; cache[count++].index = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; SA[i + 1] = T[p1] | SAINT_MIN; buckets[cache[count].symbol = T[p1]]++; cache[count++].index = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; SA[i] = T[p] | SAINT_MIN; buckets[cache[count].symbol = T[p]]++; cache[count++].index = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } + + return count; +} + +static fast_sint_t libsais_final_sorting_scan_left_to_right_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i + 0]; SA[i + 0] = p0 ^ SAINT_MIN; if (p0 > 0) { p0--; buckets[cache[count].symbol = T[p0]]++; cache[count++].index = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i + 1]; SA[i + 1] = p1 ^ SAINT_MIN; if (p1 > 0) { p1--; buckets[cache[count].symbol = T[p1]]++; cache[count++].index = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t p = SA[i]; SA[i] = p ^ SAINT_MIN; if (p > 0) { p--; buckets[cache[count].symbol = T[p]]++; cache[count++].index = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } + + return count; +} + +static void libsais_final_order_scan_left_to_right_8u_block_place(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t count) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = 0, j = count - 3; i < j; i += 4) + { + libsais_prefetch(&cache[i + prefetch_distance]); + + SA[buckets[cache[i + 0].symbol]++] = cache[i + 0].index; + SA[buckets[cache[i + 1].symbol]++] = cache[i + 1].index; + SA[buckets[cache[i + 2].symbol]++] = cache[i + 2].index; + SA[buckets[cache[i + 3].symbol]++] = cache[i + 3].index; + } + + for (j += 3; i < j; i += 1) + { + SA[buckets[cache[i].symbol]++] = cache[i].index; + } +} + +static void libsais_final_bwt_aux_scan_left_to_right_8u_block_place(sa_sint_t * RESTRICT SA, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t count) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = 0, j = count - 3; i < j; i += 4) + { + libsais_prefetch(&cache[i + prefetch_distance]); + + SA[buckets[cache[i + 0].symbol]++] = cache[i + 0].index; if ((cache[i + 0].index & rm) == 0) { I[(cache[i + 0].index & SAINT_MAX) / (rm + 1)] = buckets[cache[i + 0].symbol]; } + SA[buckets[cache[i + 1].symbol]++] = cache[i + 1].index; if ((cache[i + 1].index & rm) == 0) { I[(cache[i + 1].index & SAINT_MAX) / (rm + 1)] = buckets[cache[i + 1].symbol]; } + SA[buckets[cache[i + 2].symbol]++] = cache[i + 2].index; if ((cache[i + 2].index & rm) == 0) { I[(cache[i + 2].index & SAINT_MAX) / (rm + 1)] = buckets[cache[i + 2].symbol]; } + SA[buckets[cache[i + 3].symbol]++] = cache[i + 3].index; if ((cache[i + 3].index & rm) == 0) { I[(cache[i + 3].index & SAINT_MAX) / (rm + 1)] = buckets[cache[i + 3].symbol]; } + } + + for (j += 3; i < j; i += 1) + { + SA[buckets[cache[i].symbol]++] = cache[i].index; if ((cache[i].index & rm) == 0) { I[(cache[i].index & SAINT_MAX) / (rm + 1)] = buckets[cache[i].symbol]; } + } +} + +static void libsais_final_sorting_scan_left_to_right_32s_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t symbol0 = SAINT_MIN, p0 = SA[i + 0]; SA[i + 0] = p0 ^ SAINT_MIN; if (p0 > 0) { p0--; cache[i + 0].index = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1)); symbol0 = T[p0]; } cache[i + 0].symbol = symbol0; + sa_sint_t symbol1 = SAINT_MIN, p1 = SA[i + 1]; SA[i + 1] = p1 ^ SAINT_MIN; if (p1 > 0) { p1--; cache[i + 1].index = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1)); symbol1 = T[p1]; } cache[i + 1].symbol = symbol1; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t symbol = SAINT_MIN, p = SA[i]; SA[i] = p ^ SAINT_MIN; if (p > 0) { p--; cache[i].index = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); symbol = T[p]; } cache[i].symbol = symbol; + } +} + +static void libsais_final_sorting_scan_left_to_right_32s_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j, omp_block_end = omp_block_start + omp_block_size; + for (i = omp_block_start, j = omp_block_end - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&cache[i + 2 * prefetch_distance]); + + sa_sint_t s0 = cache[i + prefetch_distance + 0].symbol; const sa_sint_t * Is0 = &induction_bucket[s0]; libsais_prefetchw(s0 >= 0 ? Is0 : NULL); + sa_sint_t s1 = cache[i + prefetch_distance + 1].symbol; const sa_sint_t * Is1 = &induction_bucket[s1]; libsais_prefetchw(s1 >= 0 ? Is1 : NULL); + + sa_sint_t v0 = cache[i + 0].symbol; + if (v0 >= 0) + { + cache[i + 0].symbol = induction_bucket[v0]++; + if (cache[i + 0].symbol < omp_block_end) { sa_sint_t ni = cache[i + 0].symbol, np = cache[i + 0].index; cache[i + 0].index = np ^ SAINT_MIN; if (np > 0) { np--; cache[ni].index = np | ((sa_sint_t)(T[np - (np > 0)] < T[np]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np]; } } + } + + sa_sint_t v1 = cache[i + 1].symbol; + if (v1 >= 0) + { + cache[i + 1].symbol = induction_bucket[v1]++; + if (cache[i + 1].symbol < omp_block_end) { sa_sint_t ni = cache[i + 1].symbol, np = cache[i + 1].index; cache[i + 1].index = np ^ SAINT_MIN; if (np > 0) { np--; cache[ni].index = np | ((sa_sint_t)(T[np - (np > 0)] < T[np]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np]; } } + } + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t v = cache[i].symbol; + if (v >= 0) + { + cache[i].symbol = induction_bucket[v]++; + if (cache[i].symbol < omp_block_end) { sa_sint_t ni = cache[i].symbol, np = cache[i].index; cache[i].index = np ^ SAINT_MIN; if (np > 0) { np--; cache[ni].index = np | ((sa_sint_t)(T[np - (np > 0)] < T[np]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np]; } } + } + } +} + +static void libsais_final_bwt_scan_left_to_right_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_bwt_scan_left_to_right_8u(T, SA, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_final_bwt_scan_left_to_right_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t; + for (t = 0; t < omp_num_threads; ++t) + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_bucket[c]; induction_bucket[c] = A + B; temp_bucket[c] = A; } + } + } + + #pragma omp barrier + + { + libsais_final_order_scan_left_to_right_8u_block_place(SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count); + } + } +#endif + } +} + +static void libsais_final_bwt_aux_scan_left_to_right_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_bwt_aux_scan_left_to_right_8u(T, SA, rm, I, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_final_bwt_scan_left_to_right_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t; + for (t = 0; t < omp_num_threads; ++t) + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_bucket[c]; induction_bucket[c] = A + B; temp_bucket[c] = A; } + } + } + + #pragma omp barrier + + { + libsais_final_bwt_aux_scan_left_to_right_8u_block_place(SA, rm, I, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count); + } + } +#endif + } +} + +static void libsais_final_sorting_scan_left_to_right_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_sorting_scan_left_to_right_8u(T, SA, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_final_sorting_scan_left_to_right_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t; + for (t = 0; t < omp_num_threads; ++t) + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_bucket[c]; induction_bucket[c] = A + B; temp_bucket[c] = A; } + } + } + + #pragma omp barrier + + { + libsais_final_order_scan_left_to_right_8u_block_place(SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count); + } + } +#endif + } +} + +static void libsais_final_sorting_scan_left_to_right_32s_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_sorting_scan_left_to_right_32s(T, SA, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_final_sorting_scan_left_to_right_32s_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + libsais_final_sorting_scan_left_to_right_32s_block_sort(T, buckets, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_compact_and_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } +} + +#endif + +static void libsais_final_bwt_scan_left_to_right_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + SA[induction_bucket[T[(sa_sint_t)n - 1]]++] = ((sa_sint_t)n - 1) | ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1)); + + if (threads == 1 || n < 65536) + { + libsais_final_bwt_scan_left_to_right_8u(T, SA, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = 0; block_start < n; ) + { + if (SA[block_start] == 0) + { + block_start++; + } + else + { + fast_sint_t block_max_end = block_start + ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end > n) { block_max_end = n;} + fast_sint_t block_end = block_start + 1; while (block_end < block_max_end && SA[block_end] != 0) { block_end++; } + fast_sint_t block_size = block_end - block_start; + + if (block_size < 32) + { + for (; block_start < block_end; block_start += 1) + { + sa_sint_t p = SA[block_start]; SA[block_start] = p & SAINT_MAX; if (p > 0) { p--; SA[block_start] = T[p] | SAINT_MIN; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } + } + else + { + libsais_final_bwt_scan_left_to_right_8u_block_omp(T, SA, induction_bucket, block_start, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_final_bwt_aux_scan_left_to_right_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + SA[induction_bucket[T[(sa_sint_t)n - 1]]++] = ((sa_sint_t)n - 1) | ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1)); + + if ((((sa_sint_t)n - 1) & rm) == 0) { I[((sa_sint_t)n - 1) / (rm + 1)] = induction_bucket[T[(sa_sint_t)n - 1]]; } + + if (threads == 1 || n < 65536) + { + libsais_final_bwt_aux_scan_left_to_right_8u(T, SA, rm, I, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = 0; block_start < n; ) + { + if (SA[block_start] == 0) + { + block_start++; + } + else + { + fast_sint_t block_max_end = block_start + ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end > n) { block_max_end = n;} + fast_sint_t block_end = block_start + 1; while (block_end < block_max_end && SA[block_end] != 0) { block_end++; } + fast_sint_t block_size = block_end - block_start; + + if (block_size < 32) + { + for (; block_start < block_end; block_start += 1) + { + sa_sint_t p = SA[block_start]; SA[block_start] = p & SAINT_MAX; if (p > 0) { p--; SA[block_start] = T[p] | SAINT_MIN; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); if ((p & rm) == 0) { I[p / (rm + 1)] = induction_bucket[T[p]]; } } + } + } + else + { + libsais_final_bwt_aux_scan_left_to_right_8u_block_omp(T, SA, rm, I, induction_bucket, block_start, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_final_sorting_scan_left_to_right_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + SA[induction_bucket[T[(sa_sint_t)n - 1]]++] = ((sa_sint_t)n - 1) | ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1)); + + if (threads == 1 || n < 65536) + { + libsais_final_sorting_scan_left_to_right_8u(T, SA, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = 0; block_start < n; ) + { + if (SA[block_start] == 0) + { + block_start++; + } + else + { + fast_sint_t block_max_end = block_start + ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end > n) { block_max_end = n;} + fast_sint_t block_end = block_start + 1; while (block_end < block_max_end && SA[block_end] != 0) { block_end++; } + fast_sint_t block_size = block_end - block_start; + + if (block_size < 32) + { + for (; block_start < block_end; block_start += 1) + { + sa_sint_t p = SA[block_start]; SA[block_start] = p ^ SAINT_MIN; if (p > 0) { p--; SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1)); } + } + } + else + { + libsais_final_sorting_scan_left_to_right_8u_block_omp(T, SA, induction_bucket, block_start, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_final_sorting_scan_left_to_right_32s_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + SA[induction_bucket[T[n - 1]]++] = (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1)); + + if (threads == 1 || n < 65536) + { + libsais_final_sorting_scan_left_to_right_32s(T, SA, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = 0; block_start < n; block_start = block_end) + { + block_end = block_start + (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end > n) { block_end = n; } + + libsais_final_sorting_scan_left_to_right_32s_block_omp(T, SA, induction_bucket, thread_state[0].state.cache, block_start, block_end - block_start, threads); + } + } +#else + UNUSED(thread_state); +#endif +} + +static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; sa_sint_t index = -1; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i - prefetch_distance - 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - prefetch_distance - 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i - 0]; index = (p0 == 0) ? (sa_sint_t)(i - 0) : index; + SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0]; SA[i - 0] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p0 : t; } + + sa_sint_t p1 = SA[i - 1]; index = (p1 == 0) ? (sa_sint_t)(i - 1) : index; + SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1]; SA[i - 1] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p1 : t; } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; index = (p == 0) ? (sa_sint_t)i : index; + SA[i] = p & SAINT_MAX; if (p > 0) { p--; uint8_t c0 = T[p - (p > 0)], c1 = T[p]; SA[i] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t; } + } + + return index; +} + +static void libsais_final_bwt_aux_scan_right_to_left_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i - prefetch_distance - 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - prefetch_distance - 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i - 0]; + SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0]; SA[i - 0] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p0 : t; if ((p0 & rm) == 0) { I[p0 / (rm + 1)] = induction_bucket[T[p0]] + 1; } } + + sa_sint_t p1 = SA[i - 1]; + SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1]; SA[i - 1] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p1 : t; if ((p1 & rm) == 0) { I[p1 / (rm + 1)] = induction_bucket[T[p1]] + 1; } } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; + SA[i] = p & SAINT_MAX; if (p > 0) { p--; uint8_t c0 = T[p - (p > 0)], c1 = T[p]; SA[i] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t; if ((p & rm) == 0) { I[p / (rm + 1)] = induction_bucket[T[p]] + 1; } } + } +} + +static void libsais_final_sorting_scan_right_to_left_8u(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i - prefetch_distance - 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - prefetch_distance - 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i - 0]; SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; SA[--induction_bucket[T[p0]]] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i - 1]; SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; SA[--induction_bucket[T[p1]]] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1)); } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; SA[--induction_bucket[T[p]]] = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1)); } + } +} + +static void libsais_final_sorting_scan_right_to_left_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 3 * prefetch_distance]); + + sa_sint_t s0 = SA[i - 2 * prefetch_distance - 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - 2 * prefetch_distance - 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + sa_sint_t s2 = SA[i - 1 * prefetch_distance - 0]; if (s2 > 0) { libsais_prefetchw(&induction_bucket[T[s2 - 1]]); libsais_prefetch(&T[s2] - 2); } + sa_sint_t s3 = SA[i - 1 * prefetch_distance - 1]; if (s3 > 0) { libsais_prefetchw(&induction_bucket[T[s3 - 1]]); libsais_prefetch(&T[s3] - 2); } + + sa_sint_t p0 = SA[i - 0]; SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; SA[--induction_bucket[T[p0]]] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i - 1]; SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; SA[--induction_bucket[T[p1]]] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1)); } + } + + for (j -= 2 * prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; SA[--induction_bucket[T[p]]] = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1)); } + } +} + +#if defined(_OPENMP) + +static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i - prefetch_distance - 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - prefetch_distance - 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i - 0]; SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0]; SA[i - 0] = c1; sa_sint_t t = c0 | SAINT_MIN; buckets[cache[count].symbol = c1]++; cache[count++].index = (c0 <= c1) ? p0 : t; } + sa_sint_t p1 = SA[i - 1]; SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1]; SA[i - 1] = c1; sa_sint_t t = c0 | SAINT_MIN; buckets[cache[count].symbol = c1]++; cache[count++].index = (c0 <= c1) ? p1 : t; } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; uint8_t c0 = T[p - (p > 0)], c1 = T[p]; SA[i] = c1; sa_sint_t t = c0 | SAINT_MIN; buckets[cache[count].symbol = c1]++; cache[count++].index = (c0 <= c1) ? p : t; } + } + + return count; +} + +static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i - prefetch_distance - 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - prefetch_distance - 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i - 0]; SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0]; SA[i - 0] = c1; sa_sint_t t = c0 | SAINT_MIN; buckets[cache[count].symbol = c1]++; cache[count].index = (c0 <= c1) ? p0 : t; cache[count + 1].index = p0; count += 2; } + sa_sint_t p1 = SA[i - 1]; SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1]; SA[i - 1] = c1; sa_sint_t t = c0 | SAINT_MIN; buckets[cache[count].symbol = c1]++; cache[count].index = (c0 <= c1) ? p1 : t; cache[count + 1].index = p1; count += 2; } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; uint8_t c0 = T[p - (p > 0)], c1 = T[p]; SA[i] = c1; sa_sint_t t = c0 | SAINT_MIN; buckets[cache[count].symbol = c1]++; cache[count].index = (c0 <= c1) ? p : t; cache[count + 1].index = p; count += 2; } + } + + return count; +} + +static fast_sint_t libsais_final_sorting_scan_right_to_left_8u_block_prepare(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + memset(buckets, 0, ALPHABET_SIZE * sizeof(sa_sint_t)); + + fast_sint_t i, j, count = 0; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&SA[i - 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i - prefetch_distance - 0]; const uint8_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i - prefetch_distance - 1]; const uint8_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + sa_sint_t p0 = SA[i - 0]; SA[i - 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; buckets[cache[count].symbol = T[p0]]++; cache[count++].index = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1)); } + sa_sint_t p1 = SA[i - 1]; SA[i - 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; buckets[cache[count].symbol = T[p1]]++; cache[count++].index = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1)); } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; buckets[cache[count].symbol = T[p]]++; cache[count++].index = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1)); } + } + + return count; +} + +static void libsais_final_order_scan_right_to_left_8u_block_place(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t count) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = 0, j = count - 3; i < j; i += 4) + { + libsais_prefetch(&cache[i + prefetch_distance]); + + SA[--buckets[cache[i + 0].symbol]] = cache[i + 0].index; + SA[--buckets[cache[i + 1].symbol]] = cache[i + 1].index; + SA[--buckets[cache[i + 2].symbol]] = cache[i + 2].index; + SA[--buckets[cache[i + 3].symbol]] = cache[i + 3].index; + } + + for (j += 3; i < j; i += 1) + { + SA[--buckets[cache[i].symbol]] = cache[i].index; + } +} + +static void libsais_final_bwt_aux_scan_right_to_left_8u_block_place(sa_sint_t * RESTRICT SA, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t count) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = 0, j = count - 6; i < j; i += 8) + { + libsais_prefetch(&cache[i + prefetch_distance]); + + SA[--buckets[cache[i + 0].symbol]] = cache[i + 0].index; if ((cache[i + 1].index & rm) == 0) { I[cache[i + 1].index / (rm + 1)] = buckets[cache[i + 0].symbol] + 1; } + SA[--buckets[cache[i + 2].symbol]] = cache[i + 2].index; if ((cache[i + 3].index & rm) == 0) { I[cache[i + 3].index / (rm + 1)] = buckets[cache[i + 2].symbol] + 1; } + SA[--buckets[cache[i + 4].symbol]] = cache[i + 4].index; if ((cache[i + 5].index & rm) == 0) { I[cache[i + 5].index / (rm + 1)] = buckets[cache[i + 4].symbol] + 1; } + SA[--buckets[cache[i + 6].symbol]] = cache[i + 6].index; if ((cache[i + 7].index & rm) == 0) { I[cache[i + 7].index / (rm + 1)] = buckets[cache[i + 6].symbol] + 1; } + } + + for (j += 6; i < j; i += 2) + { + SA[--buckets[cache[i].symbol]] = cache[i].index; if ((cache[i + 1].index & rm) == 0) { I[(cache[i + 1].index & SAINT_MAX) / (rm + 1)] = buckets[cache[i].symbol] + 1; } + } +} + +static void libsais_final_sorting_scan_right_to_left_32s_block_gather(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) + { + libsais_prefetchw(&SA[i + 2 * prefetch_distance]); + + sa_sint_t s0 = SA[i + prefetch_distance + 0]; const sa_sint_t * Ts0 = &T[s0] - 1; libsais_prefetch(s0 > 0 ? Ts0 : NULL); Ts0--; libsais_prefetch(s0 > 0 ? Ts0 : NULL); + sa_sint_t s1 = SA[i + prefetch_distance + 1]; const sa_sint_t * Ts1 = &T[s1] - 1; libsais_prefetch(s1 > 0 ? Ts1 : NULL); Ts1--; libsais_prefetch(s1 > 0 ? Ts1 : NULL); + + libsais_prefetchw(&cache[i + prefetch_distance]); + + sa_sint_t symbol0 = SAINT_MIN, p0 = SA[i + 0]; SA[i + 0] = p0 & SAINT_MAX; if (p0 > 0) { p0--; cache[i + 0].index = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1)); symbol0 = T[p0]; } cache[i + 0].symbol = symbol0; + sa_sint_t symbol1 = SAINT_MIN, p1 = SA[i + 1]; SA[i + 1] = p1 & SAINT_MAX; if (p1 > 0) { p1--; cache[i + 1].index = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1)); symbol1 = T[p1]; } cache[i + 1].symbol = symbol1; + } + + for (j += prefetch_distance + 1; i < j; i += 1) + { + sa_sint_t symbol = SAINT_MIN, p = SA[i]; SA[i] = p & SAINT_MAX; if (p > 0) { p--; cache[i].index = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1)); symbol = T[p]; } cache[i].symbol = symbol; + } +} + +static void libsais_final_sorting_scan_right_to_left_32s_block_sort(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT induction_bucket, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) + { + libsais_prefetchw(&cache[i - 2 * prefetch_distance]); + + sa_sint_t s0 = cache[i - prefetch_distance - 0].symbol; const sa_sint_t * Is0 = &induction_bucket[s0]; libsais_prefetchw(s0 >= 0 ? Is0 : NULL); + sa_sint_t s1 = cache[i - prefetch_distance - 1].symbol; const sa_sint_t * Is1 = &induction_bucket[s1]; libsais_prefetchw(s1 >= 0 ? Is1 : NULL); + + sa_sint_t v0 = cache[i - 0].symbol; + if (v0 >= 0) + { + cache[i - 0].symbol = --induction_bucket[v0]; + if (cache[i - 0].symbol >= omp_block_start) { sa_sint_t ni = cache[i - 0].symbol, np = cache[i - 0].index; cache[i - 0].index = np & SAINT_MAX; if (np > 0) { np--; cache[ni].index = np | ((sa_sint_t)(T[np - (np > 0)] > T[np]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np]; } } + } + + sa_sint_t v1 = cache[i - 1].symbol; + if (v1 >= 0) + { + cache[i - 1].symbol = --induction_bucket[v1]; + if (cache[i - 1].symbol >= omp_block_start) { sa_sint_t ni = cache[i - 1].symbol, np = cache[i - 1].index; cache[i - 1].index = np & SAINT_MAX; if (np > 0) { np--; cache[ni].index = np | ((sa_sint_t)(T[np - (np > 0)] > T[np]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np]; } } + } + } + + for (j -= prefetch_distance + 1; i >= j; i -= 1) + { + sa_sint_t v = cache[i].symbol; + if (v >= 0) + { + cache[i].symbol = --induction_bucket[v]; + if (cache[i].symbol >= omp_block_start) { sa_sint_t ni = cache[i].symbol, np = cache[i].index; cache[i].index = np & SAINT_MAX; if (np > 0) { np--; cache[ni].index = np | ((sa_sint_t)(T[np - (np > 0)] > T[np]) << (SAINT_BIT - 1)); cache[ni].symbol = T[np]; } } + } + } +} + +static void libsais_final_bwt_scan_right_to_left_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_bwt_scan_right_to_left_8u(T, SA, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_final_bwt_scan_right_to_left_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_bucket[c]; induction_bucket[c] = A - B; temp_bucket[c] = A; } + } + } + + #pragma omp barrier + + { + libsais_final_order_scan_right_to_left_8u_block_place(SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count); + } + } +#endif + } +} + +static void libsais_final_bwt_aux_scan_right_to_left_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_bwt_aux_scan_right_to_left_8u(T, SA, rm, I, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_bucket[c]; induction_bucket[c] = A - B; temp_bucket[c] = A; } + } + } + + #pragma omp barrier + + { + libsais_final_bwt_aux_scan_right_to_left_8u_block_place(SA, rm, I, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count); + } + } +#endif + } +} + +static void libsais_final_sorting_scan_right_to_left_8u_block_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 64 * ALPHABET_SIZE && omp_get_dynamic() == 0) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_sorting_scan_right_to_left_8u(T, SA, induction_bucket, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_final_sorting_scan_right_to_left_8u_block_prepare(T, SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 0; --t) + { + sa_sint_t * RESTRICT temp_bucket = thread_state[t].state.buckets; + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_sint_t A = induction_bucket[c], B = temp_bucket[c]; induction_bucket[c] = A - B; temp_bucket[c] = A; } + } + } + + #pragma omp barrier + + { + libsais_final_order_scan_right_to_left_8u_block_place(SA, thread_state[omp_thread_num].state.buckets, thread_state[omp_thread_num].state.cache, thread_state[omp_thread_num].state.count); + } + } +#endif + } +} + +static void libsais_final_sorting_scan_right_to_left_32s_block_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache, fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && block_size >= 16384) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(cache); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (block_size / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start; + + omp_block_start += block_start; + + if (omp_num_threads == 1) + { + libsais_final_sorting_scan_right_to_left_32s(T, SA, buckets, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + libsais_final_sorting_scan_right_to_left_32s_block_gather(T, SA, cache - block_start, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + libsais_final_sorting_scan_right_to_left_32s_block_sort(T, buckets, cache - block_start, block_start, block_size); + } + + #pragma omp barrier + + { + libsais_compact_and_place_cached_suffixes(SA, cache - block_start, omp_block_start, omp_block_size); + } + } +#endif + } +} + +#endif + +static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t index = -1; + + if (threads == 1 || n < 65536) + { + index = libsais_final_bwt_scan_right_to_left_8u(T, SA, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = (fast_sint_t)n - 1; block_start >= 0; ) + { + if (SA[block_start] == 0) + { + index = (sa_sint_t)block_start--; + } + else + { + fast_sint_t block_max_end = block_start - ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end < 0) { block_max_end = -1; } + fast_sint_t block_end = block_start - 1; while (block_end > block_max_end && SA[block_end] != 0) { block_end--; } + fast_sint_t block_size = block_start - block_end; + + if (block_size < 32) + { + for (; block_start > block_end; block_start -= 1) + { + sa_sint_t p = SA[block_start]; SA[block_start] = p & SAINT_MAX; if (p > 0) { p--; uint8_t c0 = T[p - (p > 0)], c1 = T[p]; SA[block_start] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t; } + } + } + else + { + libsais_final_bwt_scan_right_to_left_8u_block_omp(T, SA, induction_bucket, block_end + 1, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif + + return index; +} + +static void libsais_final_bwt_aux_scan_right_to_left_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || n < 65536) + { + libsais_final_bwt_aux_scan_right_to_left_8u(T, SA, rm, I, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = (fast_sint_t)n - 1; block_start >= 0; ) + { + if (SA[block_start] == 0) + { + block_start--; + } + else + { + fast_sint_t block_max_end = block_start - ((fast_sint_t)threads) * ((LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads) / 2); if (block_max_end < 0) { block_max_end = -1; } + fast_sint_t block_end = block_start - 1; while (block_end > block_max_end && SA[block_end] != 0) { block_end--; } + fast_sint_t block_size = block_start - block_end; + + if (block_size < 32) + { + for (; block_start > block_end; block_start -= 1) + { + sa_sint_t p = SA[block_start]; SA[block_start] = p & SAINT_MAX; if (p > 0) { p--; uint8_t c0 = T[p - (p > 0)], c1 = T[p]; SA[block_start] = c1; sa_sint_t t = c0 | SAINT_MIN; SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t; if ((p & rm) == 0) { I[p / (rm + 1)] = induction_bucket[T[p]] + 1; } } + } + } + else + { + libsais_final_bwt_aux_scan_right_to_left_8u_block_omp(T, SA, rm, I, induction_bucket, block_end + 1, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_final_sorting_scan_right_to_left_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || n < 65536) + { + libsais_final_sorting_scan_right_to_left_8u(T, SA, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start; + for (block_start = (fast_sint_t)n - 1; block_start >= 0; ) + { + if (SA[block_start] == 0) + { + block_start--; + } + else + { + fast_sint_t block_max_end = block_start - ((fast_sint_t)threads) * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * (fast_sint_t)threads); if (block_max_end < -1) { block_max_end = -1; } + fast_sint_t block_end = block_start - 1; while (block_end > block_max_end && SA[block_end] != 0) { block_end--; } + fast_sint_t block_size = block_start - block_end; + + if (block_size < 32) + { + for (; block_start > block_end; block_start -= 1) + { + sa_sint_t p = SA[block_start]; SA[block_start] = p & SAINT_MAX; if (p > 0) { p--; SA[--induction_bucket[T[p]]] = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1)); } + } + } + else + { + libsais_final_sorting_scan_right_to_left_8u_block_omp(T, SA, induction_bucket, block_end + 1, block_size, threads, thread_state); + block_start = block_end; + } + } + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_final_sorting_scan_right_to_left_32s_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (threads == 1 || n < 65536) + { + libsais_final_sorting_scan_right_to_left_32s(T, SA, induction_bucket, 0, n); + } +#if defined(_OPENMP) + else + { + fast_sint_t block_start, block_end; + for (block_start = (fast_sint_t)n - 1; block_start >= 0; block_start = block_end) + { + block_end = block_start - (fast_sint_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE; if (block_end < 0) { block_end = -1; } + + libsais_final_sorting_scan_right_to_left_32s_block_omp(T, SA, induction_bucket, thread_state[0].state.cache, block_end + 1, block_start - block_end, threads); + } + } +#else + UNUSED(thread_state); +#endif +} + +static void libsais_clear_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT bucket_start, sa_sint_t * RESTRICT bucket_end, sa_sint_t threads) +{ + fast_sint_t c; + +#if defined(_OPENMP) + #pragma omp parallel for schedule(static, 1) num_threads(threads) if(threads > 1 && n >= 65536) +#else + UNUSED(threads); UNUSED(n); +#endif + for (c = 0; c < k; ++c) + { + if (bucket_end[c] > bucket_start[c]) + { + memset(&SA[bucket_start[c]], 0, ((size_t)bucket_end[c] - (size_t)bucket_start[c]) * sizeof(sa_sint_t)); + } + } +} + +static sa_sint_t libsais_induce_final_order_8u_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t bwt, sa_sint_t r, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (!bwt) + { + libsais_final_sorting_scan_left_to_right_8u_omp(T, SA, n, &buckets[6 * ALPHABET_SIZE], threads, thread_state); + if (threads > 1 && n >= 65536) { libsais_clear_lms_suffixes_omp(SA, n, ALPHABET_SIZE, &buckets[6 * ALPHABET_SIZE], &buckets[7 * ALPHABET_SIZE], threads); } + libsais_final_sorting_scan_right_to_left_8u_omp(T, SA, n, &buckets[7 * ALPHABET_SIZE], threads, thread_state); + return 0; + } + else if (I != NULL) + { + libsais_final_bwt_aux_scan_left_to_right_8u_omp(T, SA, n, r - 1, I, &buckets[6 * ALPHABET_SIZE], threads, thread_state); + if (threads > 1 && n >= 65536) { libsais_clear_lms_suffixes_omp(SA, n, ALPHABET_SIZE, &buckets[6 * ALPHABET_SIZE], &buckets[7 * ALPHABET_SIZE], threads); } + libsais_final_bwt_aux_scan_right_to_left_8u_omp(T, SA, n, r - 1, I, &buckets[7 * ALPHABET_SIZE], threads, thread_state); + return 0; + } + else + { + libsais_final_bwt_scan_left_to_right_8u_omp(T, SA, n, &buckets[6 * ALPHABET_SIZE], threads, thread_state); + if (threads > 1 && n >= 65536) { libsais_clear_lms_suffixes_omp(SA, n, ALPHABET_SIZE, &buckets[6 * ALPHABET_SIZE], &buckets[7 * ALPHABET_SIZE], threads); } + return libsais_final_bwt_scan_right_to_left_8u_omp(T, SA, n, &buckets[7 * ALPHABET_SIZE], threads, thread_state); + } +} + +static void libsais_induce_final_order_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[4 * k], threads, thread_state); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[5 * k], threads, thread_state); +} + +static void libsais_induce_final_order_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[2 * k], threads, thread_state); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[3 * k], threads, thread_state); +} + +static void libsais_induce_final_order_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[1 * k], threads, thread_state); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[0 * k], threads, thread_state); +} + +static void libsais_induce_final_order_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_count_suffixes_32s(T, n, k, buckets); + libsais_initialize_buckets_start_32s_1k(k, buckets); + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, buckets, threads, thread_state); + + libsais_count_suffixes_32s(T, n, k, buckets); + libsais_initialize_buckets_end_32s_1k(k, buckets); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, buckets, threads, thread_state); +} + +static sa_sint_t libsais_renumber_unique_and_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t f, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAm = &SA[m]; + + sa_sint_t i, j; + for (i = (sa_sint_t)omp_block_start, j = (sa_sint_t)omp_block_start + (sa_sint_t)omp_block_size - 2 * (sa_sint_t)prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&SA[i + 3 * prefetch_distance]); + + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + 2 * prefetch_distance + 0]) >> 1]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + 2 * prefetch_distance + 1]) >> 1]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + 2 * prefetch_distance + 2]) >> 1]); + libsais_prefetchw(&SAm[((sa_uint_t)SA[i + 2 * prefetch_distance + 3]) >> 1]); + + sa_uint_t q0 = (sa_uint_t)SA[i + prefetch_distance + 0]; const sa_sint_t * Tq0 = &T[q0]; libsais_prefetchw(SAm[q0 >> 1] < 0 ? Tq0 : NULL); + sa_uint_t q1 = (sa_uint_t)SA[i + prefetch_distance + 1]; const sa_sint_t * Tq1 = &T[q1]; libsais_prefetchw(SAm[q1 >> 1] < 0 ? Tq1 : NULL); + sa_uint_t q2 = (sa_uint_t)SA[i + prefetch_distance + 2]; const sa_sint_t * Tq2 = &T[q2]; libsais_prefetchw(SAm[q2 >> 1] < 0 ? Tq2 : NULL); + sa_uint_t q3 = (sa_uint_t)SA[i + prefetch_distance + 3]; const sa_sint_t * Tq3 = &T[q3]; libsais_prefetchw(SAm[q3 >> 1] < 0 ? Tq3 : NULL); + + sa_uint_t p0 = (sa_uint_t)SA[i + 0]; sa_sint_t s0 = SAm[p0 >> 1]; if (s0 < 0) { T[p0] |= SAINT_MIN; f++; s0 = i + 0 + SAINT_MIN + f; } SAm[p0 >> 1] = s0 - f; + sa_uint_t p1 = (sa_uint_t)SA[i + 1]; sa_sint_t s1 = SAm[p1 >> 1]; if (s1 < 0) { T[p1] |= SAINT_MIN; f++; s1 = i + 1 + SAINT_MIN + f; } SAm[p1 >> 1] = s1 - f; + sa_uint_t p2 = (sa_uint_t)SA[i + 2]; sa_sint_t s2 = SAm[p2 >> 1]; if (s2 < 0) { T[p2] |= SAINT_MIN; f++; s2 = i + 2 + SAINT_MIN + f; } SAm[p2 >> 1] = s2 - f; + sa_uint_t p3 = (sa_uint_t)SA[i + 3]; sa_sint_t s3 = SAm[p3 >> 1]; if (s3 < 0) { T[p3] |= SAINT_MIN; f++; s3 = i + 3 + SAINT_MIN + f; } SAm[p3 >> 1] = s3 - f; + } + + for (j += 2 * (sa_sint_t)prefetch_distance + 3; i < j; i += 1) + { + sa_uint_t p = (sa_uint_t)SA[i]; sa_sint_t s = SAm[p >> 1]; if (s < 0) { T[p] |= SAINT_MIN; f++; s = i + SAINT_MIN + f; } SAm[p >> 1] = s - f; + } + + return f; +} + +static void libsais_compact_unique_and_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t * pl, fast_sint_t * pr, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAl = &SA[0]; + sa_sint_t * RESTRICT SAr = &SA[0]; + + fast_sint_t i, j, l = *pl - 1, r = *pr - 1; + for (i = (fast_sint_t)m + omp_block_start + omp_block_size - 1, j = (fast_sint_t)m + omp_block_start + 3; i >= j; i -= 4) + { + libsais_prefetch(&SA[i - prefetch_distance]); + + sa_sint_t p0 = SA[i - 0]; SAl[l] = p0 & SAINT_MAX; l -= p0 < 0; SAr[r] = p0 - 1; r -= p0 > 0; + sa_sint_t p1 = SA[i - 1]; SAl[l] = p1 & SAINT_MAX; l -= p1 < 0; SAr[r] = p1 - 1; r -= p1 > 0; + sa_sint_t p2 = SA[i - 2]; SAl[l] = p2 & SAINT_MAX; l -= p2 < 0; SAr[r] = p2 - 1; r -= p2 > 0; + sa_sint_t p3 = SA[i - 3]; SAl[l] = p3 & SAINT_MAX; l -= p3 < 0; SAr[r] = p3 - 1; r -= p3 > 0; + } + + for (j -= 3; i >= j; i -= 1) + { + sa_sint_t p = SA[i]; SAl[l] = p & SAINT_MAX; l -= p < 0; SAr[r] = p - 1; r -= p > 0; + } + + *pl = l + 1; *pr = r + 1; +} + + +#if defined(_OPENMP) + +static sa_sint_t libsais_count_unique_suffixes(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + sa_sint_t * RESTRICT SAm = &SA[m]; + + fast_sint_t i, j; sa_sint_t f0 = 0, f1 = 0, f2 = 0, f3 = 0; + for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) + { + libsais_prefetch(&SA[i + 2 * prefetch_distance]); + + libsais_prefetch(&SAm[((sa_uint_t)SA[i + prefetch_distance + 0]) >> 1]); + libsais_prefetch(&SAm[((sa_uint_t)SA[i + prefetch_distance + 1]) >> 1]); + libsais_prefetch(&SAm[((sa_uint_t)SA[i + prefetch_distance + 2]) >> 1]); + libsais_prefetch(&SAm[((sa_uint_t)SA[i + prefetch_distance + 3]) >> 1]); + + f0 += SAm[((sa_uint_t)SA[i + 0]) >> 1] < 0; + f1 += SAm[((sa_uint_t)SA[i + 1]) >> 1] < 0; + f2 += SAm[((sa_uint_t)SA[i + 2]) >> 1] < 0; + f3 += SAm[((sa_uint_t)SA[i + 3]) >> 1] < 0; + } + + for (j += prefetch_distance + 3; i < j; i += 1) + { + f0 += SAm[((sa_uint_t)SA[i]) >> 1] < 0; + } + + return f0 + f1 + f2 + f3; +} + +#endif + +static sa_sint_t libsais_renumber_unique_and_nonunique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t f = 0; + +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && m >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start; + + if (omp_num_threads == 1) + { + f = libsais_renumber_unique_and_nonunique_lms_suffixes_32s(T, SA, m, 0, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_count_unique_suffixes(SA, m, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t t, count = 0; for (t = 0; t < omp_thread_num; ++t) { count += thread_state[t].state.count; } + + if (omp_thread_num == omp_num_threads - 1) + { + f = (sa_sint_t)(count + thread_state[omp_thread_num].state.count); + } + + libsais_renumber_unique_and_nonunique_lms_suffixes_32s(T, SA, m, (sa_sint_t)count, omp_block_start, omp_block_size); + } + } +#endif + } + + return f; +} + +static void libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t f, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 131072 && m < fs) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (((fast_sint_t)n >> 1) / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : ((fast_sint_t)n >> 1) - omp_block_start; + + if (omp_num_threads == 1) + { + fast_sint_t l = m, r = (fast_sint_t)n + (fast_sint_t)fs; + libsais_compact_unique_and_nonunique_lms_suffixes_32s(SA, m, &l, &r, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.position = (fast_sint_t)m + ((fast_sint_t)n >> 1) + omp_block_start + omp_block_size; + thread_state[omp_thread_num].state.count = (fast_sint_t)m + omp_block_start + omp_block_size; + + libsais_compact_unique_and_nonunique_lms_suffixes_32s(SA, m, &thread_state[omp_thread_num].state.position, &thread_state[omp_thread_num].state.count, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + fast_sint_t t, position; + + for (position = m, t = omp_num_threads - 1; t >= 0; --t) + { + fast_sint_t omp_block_end = t < omp_num_threads - 1 ? omp_block_stride * (t + 1) : ((fast_sint_t)n >> 1); + fast_sint_t count = ((fast_sint_t)m + ((fast_sint_t)n >> 1) + omp_block_end - thread_state[t].state.position); + + if (count > 0) + { + position -= count; memcpy(&SA[position], &SA[thread_state[t].state.position], (size_t)count * sizeof(sa_sint_t)); + } + } + + for (position = (fast_sint_t)n + (fast_sint_t)fs, t = omp_num_threads - 1; t >= 0; --t) + { + fast_sint_t omp_block_end = t < omp_num_threads - 1 ? omp_block_stride * (t + 1) : ((fast_sint_t)n >> 1); + fast_sint_t count = ((fast_sint_t)m + omp_block_end - thread_state[t].state.count); + + if (count > 0) + { + position -= count; memcpy(&SA[position], &SA[thread_state[t].state.count], (size_t)count * sizeof(sa_sint_t)); + } + } + } + } +#endif + } + + memcpy(&SA[(fast_sint_t)n + (fast_sint_t)fs - (fast_sint_t)m], &SA[(fast_sint_t)m - (fast_sint_t)f], (size_t)f * sizeof(sa_sint_t)); +} + +static sa_sint_t libsais_compact_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + sa_sint_t f = libsais_renumber_unique_and_nonunique_lms_suffixes_32s_omp(T, SA, m, threads, thread_state); + libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(SA, n, m, fs, f, threads, thread_state); + + return f; +} + +static void libsais_merge_unique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, fast_sint_t l, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + const sa_sint_t * RESTRICT SAnm = &SA[(fast_sint_t)n - (fast_sint_t)m - 1 + l]; + + sa_sint_t i, j; fast_sint_t tmp = *SAnm++; + for (i = (sa_sint_t)omp_block_start, j = (sa_sint_t)omp_block_start + (sa_sint_t)omp_block_size - 6; i < j; i += 4) + { + libsais_prefetch(&T[i + prefetch_distance]); + + sa_sint_t c0 = T[i + 0]; if (c0 < 0) { T[i + 0] = c0 & SAINT_MAX; SA[tmp] = i + 0; i++; tmp = *SAnm++; } + sa_sint_t c1 = T[i + 1]; if (c1 < 0) { T[i + 1] = c1 & SAINT_MAX; SA[tmp] = i + 1; i++; tmp = *SAnm++; } + sa_sint_t c2 = T[i + 2]; if (c2 < 0) { T[i + 2] = c2 & SAINT_MAX; SA[tmp] = i + 2; i++; tmp = *SAnm++; } + sa_sint_t c3 = T[i + 3]; if (c3 < 0) { T[i + 3] = c3 & SAINT_MAX; SA[tmp] = i + 3; i++; tmp = *SAnm++; } + } + + for (j += 6; i < j; i += 1) + { + sa_sint_t c = T[i]; if (c < 0) { T[i] = c & SAINT_MAX; SA[tmp] = i; i++; tmp = *SAnm++; } + } +} + +static void libsais_merge_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, fast_sint_t l, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + const fast_sint_t prefetch_distance = 32; + + const sa_sint_t * RESTRICT SAnm = &SA[(fast_sint_t)n - (fast_sint_t)m - 1 + l]; + + fast_sint_t i, j; sa_sint_t tmp = *SAnm++; + for (i = omp_block_start, j = omp_block_start + omp_block_size - 3; i < j; i += 4) + { + libsais_prefetch(&SA[i + prefetch_distance]); + + if (SA[i + 0] == 0) { SA[i + 0] = tmp; tmp = *SAnm++; } + if (SA[i + 1] == 0) { SA[i + 1] = tmp; tmp = *SAnm++; } + if (SA[i + 2] == 0) { SA[i + 2] = tmp; tmp = *SAnm++; } + if (SA[i + 3] == 0) { SA[i + 3] = tmp; tmp = *SAnm++; } + } + + for (j += 3; i < j; i += 1) + { + if (SA[i] == 0) { SA[i] = tmp; tmp = *SAnm++; } + } +} + +static void libsais_merge_unique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_merge_unique_lms_suffixes_32s(T, SA, n, m, 0, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_count_negative_marked_suffixes(T, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t t, count = 0; for (t = 0; t < omp_thread_num; ++t) { count += thread_state[t].state.count; } + + libsais_merge_unique_lms_suffixes_32s(T, SA, n, m, count, omp_block_start, omp_block_size); + } + } +#endif + } +} + +static void libsais_merge_nonunique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t f, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && m >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); UNUSED(thread_state); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start; + + if (omp_num_threads == 1) + { + libsais_merge_nonunique_lms_suffixes_32s(SA, n, m, f, omp_block_start, omp_block_size); + } +#if defined(_OPENMP) + else + { + { + thread_state[omp_thread_num].state.count = libsais_count_zero_marked_suffixes(SA, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t t, count = f; for (t = 0; t < omp_thread_num; ++t) { count += thread_state[t].state.count; } + + libsais_merge_nonunique_lms_suffixes_32s(SA, n, m, count, omp_block_start, omp_block_size); + } + } +#endif + } +} + +static void libsais_merge_compacted_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t f, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + libsais_merge_unique_lms_suffixes_32s_omp(T, SA, n, m, threads, thread_state); + libsais_merge_nonunique_lms_suffixes_32s_omp(SA, n, m, f, threads, thread_state); +} + +static void libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, sa_sint_t fs, sa_sint_t f, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (f > 0) + { + memmove(&SA[n - m - 1], &SA[n + fs - m], (size_t)f * sizeof(sa_sint_t)); + + libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state); + libsais_reconstruct_lms_suffixes_omp(SA, n, m - f, threads); + + memcpy(&SA[n - m - 1 + f], &SA[0], ((size_t)m - (size_t)f) * sizeof(sa_sint_t)); + memset(&SA[0], 0, (size_t)m * sizeof(sa_sint_t)); + + libsais_merge_compacted_lms_suffixes_32s_omp(T, SA, n, m, f, threads, thread_state); + } + else + { + libsais_count_and_gather_lms_suffixes_32s_2k(T, SA, n, k, buckets, 0, n); + libsais_reconstruct_lms_suffixes_omp(SA, n, m, threads); + } +} + +static void libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t f, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + if (f > 0) + { + memmove(&SA[n - m - 1], &SA[n + fs - m], (size_t)f * sizeof(sa_sint_t)); + + libsais_gather_compacted_lms_suffixes_32s(T, SA, n); + libsais_reconstruct_lms_suffixes_omp(SA, n, m - f, threads); + + memcpy(&SA[n - m - 1 + f], &SA[0], ((size_t)m - (size_t)f) * sizeof(sa_sint_t)); + memset(&SA[0], 0, (size_t)m * sizeof(sa_sint_t)); + + libsais_merge_compacted_lms_suffixes_32s_omp(T, SA, n, m, f, threads, thread_state); + } + else + { + libsais_gather_lms_suffixes_32s(T, SA, n); + libsais_reconstruct_lms_suffixes_omp(SA, n, m, threads); + } +} + +static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + fs = fs < (SAINT_MAX - n) ? fs : (SAINT_MAX - n); + + if (k > 0 && fs / k >= 6) + { + sa_sint_t alignment = (fs - 1024) / k >= 6 ? 1024 : 16; + sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 6 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 6 * k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 6 * k]; + + sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_4k_omp(T, SA, n, k, buckets, threads, thread_state); + if (m > 1) + { + memset(SA, 0, ((size_t)n - (size_t)m) * sizeof(sa_sint_t)); + + sa_sint_t first_lms_suffix = SA[n - m]; + sa_sint_t left_suffixes_count = libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(T, k, buckets, first_lms_suffix); + + libsais_radix_sort_lms_suffixes_32s_6k_omp(T, SA, n, m, &buckets[4 * k], threads, thread_state); + libsais_radix_sort_set_markers_32s_6k_omp(SA, k, &buckets[4 * k], threads); + + if (threads > 1 && n >= 65536) { memset(&SA[(fast_sint_t)n - (fast_sint_t)m], 0, (size_t)m * sizeof(sa_sint_t)); } + + libsais_initialize_buckets_for_partial_sorting_32s_6k(T, k, buckets, first_lms_suffix, left_suffixes_count); + libsais_induce_partial_order_32s_6k_omp(T, SA, n, k, buckets, first_lms_suffix, left_suffixes_count, threads, thread_state); + + sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(SA, n, m, threads, thread_state); + if (names < m) + { + sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state); + + if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads, thread_state) != 0) + { + return -2; + } + + libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, m, fs, f, buckets, threads, thread_state); + } + else + { + libsais_count_lms_suffixes_32s_2k(T, n, k, buckets); + } + + libsais_initialize_buckets_start_and_end_32s_4k(k, buckets); + libsais_place_lms_suffixes_histogram_32s_4k(SA, n, k, m, buckets); + libsais_induce_final_order_32s_4k(T, SA, n, k, buckets, threads, thread_state); + } + else + { + SA[0] = SA[n - 1]; + + libsais_initialize_buckets_start_and_end_32s_6k(k, buckets); + libsais_place_lms_suffixes_histogram_32s_6k(SA, n, k, m, buckets); + libsais_induce_final_order_32s_6k(T, SA, n, k, buckets, threads, thread_state); + } + + return 0; + } + else if (k > 0 && fs / k >= 4) + { + sa_sint_t alignment = (fs - 1024) / k >= 4 ? 1024 : 16; + sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 4 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 4 * k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 4 * k]; + + sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state); + if (m > 1) + { + libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(T, k, buckets, SA[n - m]); + + libsais_radix_sort_lms_suffixes_32s_2k_omp(T, SA, n, m, &buckets[1], threads, thread_state); + libsais_radix_sort_set_markers_32s_4k_omp(SA, k, &buckets[1], threads); + + libsais_place_lms_suffixes_interval_32s_4k(SA, n, k, m - 1, buckets); + libsais_induce_partial_order_32s_4k_omp(T, SA, n, k, buckets, threads, thread_state); + + sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(SA, n, m, threads, thread_state); + if (names < m) + { + sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state); + + if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads, thread_state) != 0) + { + return -2; + } + + libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, m, fs, f, buckets, threads, thread_state); + } + else + { + libsais_count_lms_suffixes_32s_2k(T, n, k, buckets); + } + } + else + { + SA[0] = SA[n - 1]; + } + + libsais_initialize_buckets_start_and_end_32s_4k(k, buckets); + libsais_place_lms_suffixes_histogram_32s_4k(SA, n, k, m, buckets); + libsais_induce_final_order_32s_4k(T, SA, n, k, buckets, threads, thread_state); + + return 0; + } + else if (k > 0 && fs / k >= 2) + { + sa_sint_t alignment = (fs - 1024) / k >= 2 ? 1024 : 16; + sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 2 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 2 * k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 2 * k]; + + sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state); + if (m > 1) + { + libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(T, k, buckets, SA[n - m]); + + libsais_radix_sort_lms_suffixes_32s_2k_omp(T, SA, n, m, &buckets[1], threads, thread_state); + libsais_place_lms_suffixes_interval_32s_2k(SA, n, k, m - 1, buckets); + + libsais_initialize_buckets_start_and_end_32s_2k(k, buckets); + libsais_induce_partial_order_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state); + + sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(T, SA, n, m, threads); + if (names < m) + { + sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state); + + if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads, thread_state) != 0) + { + return -2; + } + + libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, m, fs, f, buckets, threads, thread_state); + } + else + { + libsais_count_lms_suffixes_32s_2k(T, n, k, buckets); + } + } + else + { + SA[0] = SA[n - 1]; + } + + libsais_initialize_buckets_end_32s_2k(k, buckets); + libsais_place_lms_suffixes_histogram_32s_2k(SA, n, k, m, buckets); + + libsais_initialize_buckets_start_and_end_32s_2k(k, buckets); + libsais_induce_final_order_32s_2k(T, SA, n, k, buckets, threads, thread_state); + + return 0; + } + else + { + sa_sint_t * buffer = fs < k ? (sa_sint_t *)libsais_alloc_aligned((size_t)k * sizeof(sa_sint_t), 4096) : (sa_sint_t *)NULL; + + sa_sint_t alignment = fs - 1024 >= k ? 1024 : 16; + sa_sint_t * RESTRICT buckets = fs - alignment >= k ? (sa_sint_t *)libsais_align_up(&SA[n + fs - k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : fs >= k ? &SA[n + fs - k] : buffer; + + if (buckets == NULL) { return -2; } + + memset(SA, 0, (size_t)n * sizeof(sa_sint_t)); + + libsais_count_suffixes_32s(T, n, k, buckets); + libsais_initialize_buckets_end_32s_1k(k, buckets); + + sa_sint_t m = libsais_radix_sort_lms_suffixes_32s_1k(T, SA, n, buckets); + if (m > 1) + { + libsais_induce_partial_order_32s_1k_omp(T, SA, n, k, buckets, threads, thread_state); + + sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(T, SA, n, m, threads); + if (names < m) + { + if (buffer != NULL) { libsais_free_aligned(buffer); buckets = NULL; } + + sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state); + + if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads, thread_state) != 0) + { + return -2; + } + + libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(T, SA, n, m, fs, f, threads, thread_state); + + if (buckets == NULL) { buckets = buffer = (sa_sint_t *)libsais_alloc_aligned((size_t)k * sizeof(sa_sint_t), 4096); } + if (buckets == NULL) { return -2; } + } + + libsais_count_suffixes_32s(T, n, k, buckets); + libsais_initialize_buckets_end_32s_1k(k, buckets); + libsais_place_lms_suffixes_interval_32s_1k(T, SA, k, m, buckets); + } + + libsais_induce_final_order_32s_1k(T, SA, n, k, buckets, threads, thread_state); + libsais_free_aligned(buffer); + + return 0; + } +} + +static sa_sint_t libsais_main_8u(const uint8_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t bwt, sa_sint_t r, sa_sint_t * RESTRICT I, sa_sint_t fs, sa_sint_t * freq, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) +{ + fs = fs < (SAINT_MAX - n) ? fs : (SAINT_MAX - n); + + sa_sint_t m = libsais_count_and_gather_lms_suffixes_8u_omp(T, SA, n, buckets, threads, thread_state); + + libsais_initialize_buckets_start_and_end_8u(buckets, freq); + + if (m > 0) + { + sa_sint_t first_lms_suffix = SA[n - m]; + sa_sint_t left_suffixes_count = libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(T, buckets, first_lms_suffix); + + if (threads > 1 && n >= 65536) { memset(SA, 0, ((size_t)n - (size_t)m) * sizeof(sa_sint_t)); } + libsais_radix_sort_lms_suffixes_8u_omp(T, SA, n, m, buckets, threads, thread_state); + if (threads > 1 && n >= 65536) { memset(&SA[(fast_sint_t)n - (fast_sint_t)m], 0, (size_t)m * sizeof(sa_sint_t)); } + + libsais_initialize_buckets_for_partial_sorting_8u(T, buckets, first_lms_suffix, left_suffixes_count); + libsais_induce_partial_order_8u_omp(T, SA, n, buckets, first_lms_suffix, left_suffixes_count, threads, thread_state); + + sa_sint_t names = libsais_renumber_and_gather_lms_suffixes_8u_omp(SA, n, m, fs, threads, thread_state); + if (names < m) + { + if (libsais_main_32s(SA + n + fs - m, SA, m, names, fs + n - 2 * m, threads, thread_state) != 0) + { + return -2; + } + + libsais_gather_lms_suffixes_8u_omp(T, SA, n, threads, thread_state); + libsais_reconstruct_lms_suffixes_omp(SA, n, m, threads); + } + + libsais_place_lms_suffixes_interval_8u(SA, n, m, buckets); + } + else + { + memset(SA, 0, (size_t)n * sizeof(sa_sint_t)); + } + + return libsais_induce_final_order_8u_omp(T, SA, n, bwt, r, I, buckets, threads, thread_state); +} + +static sa_sint_t libsais_main(const uint8_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t bwt, sa_sint_t r, sa_sint_t * I, sa_sint_t fs, sa_sint_t * freq, sa_sint_t threads) +{ + LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL; + sa_sint_t * RESTRICT buckets = (sa_sint_t *)libsais_alloc_aligned(8 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096); + + sa_sint_t index = buckets != NULL && (thread_state != NULL || threads == 1) + ? libsais_main_8u(T, SA, n, buckets, bwt, r, I, fs, freq, threads, thread_state) + : -2; + + libsais_free_aligned(buckets); + libsais_free_thread_state(thread_state); + + return index; +} + +static int32_t libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs, sa_sint_t threads) +{ + LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL; + + sa_sint_t index = thread_state != NULL || threads == 1 + ? libsais_main_32s(T, SA, n, k, fs, threads, thread_state) + : -2; + + libsais_free_thread_state(thread_state); + + return index; +} + +static sa_sint_t libsais_main_ctx(const LIBSAIS_CONTEXT * ctx, const uint8_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t bwt, sa_sint_t r, sa_sint_t * I, sa_sint_t fs, sa_sint_t * freq) +{ + return ctx != NULL && (ctx->buckets != NULL && (ctx->thread_state != NULL || ctx->threads == 1)) + ? libsais_main_8u(T, SA, n, ctx->buckets, bwt, r, I, fs, freq, (sa_sint_t)ctx->threads, ctx->thread_state) + : -2; +} + +static void libsais_bwt_copy_8u(uint8_t * RESTRICT U, sa_sint_t * RESTRICT A, sa_sint_t n) +{ + const fast_sint_t prefetch_distance = 32; + + fast_sint_t i, j; + for (i = 0, j = (fast_sint_t)n - 7; i < j; i += 8) + { + libsais_prefetch(&A[i + prefetch_distance]); + + U[i + 0] = (uint8_t)A[i + 0]; + U[i + 1] = (uint8_t)A[i + 1]; + U[i + 2] = (uint8_t)A[i + 2]; + U[i + 3] = (uint8_t)A[i + 3]; + U[i + 4] = (uint8_t)A[i + 4]; + U[i + 5] = (uint8_t)A[i + 5]; + U[i + 6] = (uint8_t)A[i + 6]; + U[i + 7] = (uint8_t)A[i + 7]; + } + + for (j += 7; i < j; i += 1) + { + U[i] = (uint8_t)A[i]; + } +} + +#if defined(_OPENMP) + +static void libsais_bwt_copy_8u_omp(uint8_t * RESTRICT U, sa_sint_t * RESTRICT A, sa_sint_t n, sa_sint_t threads) +{ +#if defined(_OPENMP) + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + fast_sint_t omp_block_stride = ((fast_sint_t)n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : (fast_sint_t)n - omp_block_start; +#else + UNUSED(threads); + + fast_sint_t omp_block_start = 0; + fast_sint_t omp_block_size = (fast_sint_t)n; +#endif + + libsais_bwt_copy_8u(U + omp_block_start, A + omp_block_start, (sa_sint_t)omp_block_size); + } +} + +#endif + +void * libsais_create_ctx(void) +{ + return (void *)libsais_create_ctx_main(1); +} + +void libsais_free_ctx(void * ctx) +{ + libsais_free_ctx_main((LIBSAIS_CONTEXT *)ctx); +} + +int32_t libsais(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs, int32_t * freq) +{ + if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + return libsais_main(T, SA, n, 0, 0, NULL, fs, freq, 1); +} + +int32_t libsais_int(int32_t * T, int32_t * SA, int32_t n, int32_t k, int32_t fs) +{ + if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + return libsais_main_int(T, SA, n, k, fs, 1); +} + +int32_t libsais_ctx(const void * ctx, const uint8_t * T, int32_t * SA, int32_t n, int32_t fs, int32_t * freq) +{ + if ((ctx == NULL) || (T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + return libsais_main_ctx((const LIBSAIS_CONTEXT *)ctx, T, SA, n, 0, 0, NULL, fs, freq); +} + +int32_t libsais_bwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0)) + { + return -1; + } + else if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + return n; + } + + sa_sint_t index = libsais_main(T, A, n, 1, 0, NULL, fs, freq, 1); + if (index >= 0) + { + index++; + + U[0] = T[n - 1]; + libsais_bwt_copy_8u(U + 1, A, index - 1); + libsais_bwt_copy_8u(U + index, A + index, n - index); + } + + return index; +} + +int32_t libsais_bwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t r, int32_t * I) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (r < 2) || ((r & (r - 1)) != 0) || (I == NULL)) + { + return -1; + } + else if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + + I[0] = n; + return 0; + } + + if (libsais_main(T, A, n, 1, r, I, fs, freq, 1) != 0) + { + return -2; + } + + U[0] = T[n - 1]; + libsais_bwt_copy_8u(U + 1, A, I[0] - 1); + libsais_bwt_copy_8u(U + I[0], A + I[0], n - I[0]); + + return 0; +} + +int32_t libsais_bwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq) +{ + if ((ctx == NULL) || (T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0)) + { + return -1; + } + else if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + return n; + } + + sa_sint_t index = libsais_main_ctx((const LIBSAIS_CONTEXT *)ctx, T, A, n, 1, 0, NULL, fs, freq); + if (index >= 0) + { + index++; + + U[0] = T[n - 1]; + +#if defined(_OPENMP) + libsais_bwt_copy_8u_omp(U + 1, A, index - 1, (sa_sint_t)((const LIBSAIS_CONTEXT *)ctx)->threads); + libsais_bwt_copy_8u_omp(U + index, A + index, n - index, (sa_sint_t)((const LIBSAIS_CONTEXT *)ctx)->threads); +#else + libsais_bwt_copy_8u(U + 1, A, index - 1); + libsais_bwt_copy_8u(U + index, A + index, n - index); +#endif + } + + return index; +} + +int32_t libsais_bwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t r, int32_t * I) +{ + if ((ctx == NULL) || (T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (r < 2) || ((r & (r - 1)) != 0) || (I == NULL)) + { + return -1; + } + else if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + + I[0] = n; + return 0; + } + + if (libsais_main_ctx((const LIBSAIS_CONTEXT *)ctx, T, A, n, 1, r, I, fs, freq) != 0) + { + return -2; + } + + U[0] = T[n - 1]; + +#if defined(_OPENMP) + libsais_bwt_copy_8u_omp(U + 1, A, I[0] - 1, (sa_sint_t)((const LIBSAIS_CONTEXT *)ctx)->threads); + libsais_bwt_copy_8u_omp(U + I[0], A + I[0], n - I[0], (sa_sint_t)((const LIBSAIS_CONTEXT *)ctx)->threads); +#else + libsais_bwt_copy_8u(U + 1, A, I[0] - 1); + libsais_bwt_copy_8u(U + I[0], A + I[0], n - I[0]); +#endif + + return 0; +} + +#if defined(_OPENMP) + +void * libsais_create_ctx_omp(int32_t threads) +{ + if (threads < 0) { return NULL; } + + threads = threads > 0 ? threads : omp_get_max_threads(); + return (void *)libsais_create_ctx_main(threads); +} + +int32_t libsais_omp(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs, int32_t * freq, int32_t threads) +{ + if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0) || (threads < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + threads = threads > 0 ? threads : omp_get_max_threads(); + + return libsais_main(T, SA, n, 0, 0, NULL, fs, freq, threads); +} + +int32_t libsais_int_omp(int32_t * T, int32_t * SA, int32_t n, int32_t k, int32_t fs, int32_t threads) +{ + if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0) || (threads < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + threads = threads > 0 ? threads : omp_get_max_threads(); + + return libsais_main_int(T, SA, n, k, fs, threads); +} + +int32_t libsais_bwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t threads) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (threads < 0)) + { + return -1; + } + else if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + return n; + } + + threads = threads > 0 ? threads : omp_get_max_threads(); + + sa_sint_t index = libsais_main(T, A, n, 1, 0, NULL, fs, freq, threads); + if (index >= 0) + { + index++; + + U[0] = T[n - 1]; + libsais_bwt_copy_8u_omp(U + 1, A, index - 1, threads); + libsais_bwt_copy_8u_omp(U + index, A + index, n - index, threads); + } + + return index; +} + +int32_t libsais_bwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t r, int32_t * I, int32_t threads) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (r < 2) || ((r & (r - 1)) != 0) || (I == NULL) || (threads < 0)) + { + return -1; + } + else if (n <= 1) + { + if (n == 1) { U[0] = T[0];} + + I[0] = n; + return 0; + } + + threads = threads > 0 ? threads : omp_get_max_threads(); + + if (libsais_main(T, A, n, 1, r, I, fs, freq, threads) != 0) + { + return -2; + } + + U[0] = T[n - 1]; + libsais_bwt_copy_8u_omp(U + 1, A, I[0] - 1, threads); + libsais_bwt_copy_8u_omp(U + I[0], A + I[0], n - I[0], threads); + + return 0; +} + +#endif + +static LIBSAIS_UNBWT_CONTEXT * libsais_unbwt_create_ctx_main(sa_sint_t threads) +{ + LIBSAIS_UNBWT_CONTEXT * RESTRICT ctx = (LIBSAIS_UNBWT_CONTEXT *)libsais_alloc_aligned(sizeof(LIBSAIS_UNBWT_CONTEXT), 64); + sa_uint_t * RESTRICT bucket2 = (sa_uint_t *)libsais_alloc_aligned(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t), 4096); + uint16_t * RESTRICT fastbits = (uint16_t *)libsais_alloc_aligned((1 + (1 << UNBWT_FASTBITS)) * sizeof(uint16_t), 4096); + sa_uint_t * RESTRICT buckets = threads > 1 ? (sa_uint_t *)libsais_alloc_aligned((size_t)threads * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) * sizeof(sa_uint_t), 4096) : NULL; + + if (ctx != NULL && bucket2 != NULL && fastbits != NULL && (buckets != NULL || threads == 1)) + { + ctx->bucket2 = bucket2; + ctx->fastbits = fastbits; + ctx->buckets = buckets; + ctx->threads = threads; + + return ctx; + } + + libsais_free_aligned(buckets); + libsais_free_aligned(fastbits); + libsais_free_aligned(bucket2); + libsais_free_aligned(ctx); + + return NULL; +} + +static void libsais_unbwt_free_ctx_main(LIBSAIS_UNBWT_CONTEXT * ctx) +{ + if (ctx != NULL) + { + libsais_free_aligned(ctx->buckets); + libsais_free_aligned(ctx->fastbits); + libsais_free_aligned(ctx->bucket2); + libsais_free_aligned(ctx); + } +} + +static void libsais_unbwt_compute_histogram(const uint8_t * RESTRICT T, fast_sint_t n, sa_uint_t * RESTRICT count) +{ + const fast_sint_t prefetch_distance = 256; + + const uint8_t * RESTRICT T_p = T; + + if (n >= 1024) + { + sa_uint_t copy[4 * (ALPHABET_SIZE + 16)]; + + memset(copy, 0, 4 * (ALPHABET_SIZE + 16) * sizeof(sa_uint_t)); + + sa_uint_t * RESTRICT copy0 = copy + 0 * (ALPHABET_SIZE + 16); + sa_uint_t * RESTRICT copy1 = copy + 1 * (ALPHABET_SIZE + 16); + sa_uint_t * RESTRICT copy2 = copy + 2 * (ALPHABET_SIZE + 16); + sa_uint_t * RESTRICT copy3 = copy + 3 * (ALPHABET_SIZE + 16); + + for (; T_p < (uint8_t * )((ptrdiff_t)(T + 63) & (-64)); T_p += 1) { copy0[T_p[0]]++; } + + fast_uint_t x = ((const uint32_t *)(const void *)T_p)[0], y = ((const uint32_t *)(const void *)T_p)[1]; + + for (; T_p < (uint8_t * )((ptrdiff_t)(T + n - 8) & (-64)); T_p += 64) + { + libsais_prefetch(&T_p[prefetch_distance]); + + fast_uint_t z = ((const uint32_t *)(const void *)T_p)[2], w = ((const uint32_t *)(const void *)T_p)[3]; + copy0[(uint8_t)x]++; x >>= 8; copy1[(uint8_t)x]++; x >>= 8; copy2[(uint8_t)x]++; x >>= 8; copy3[x]++; + copy0[(uint8_t)y]++; y >>= 8; copy1[(uint8_t)y]++; y >>= 8; copy2[(uint8_t)y]++; y >>= 8; copy3[y]++; + + x = ((const uint32_t *)(const void *)T_p)[4]; y = ((const uint32_t *)(const void *)T_p)[5]; + copy0[(uint8_t)z]++; z >>= 8; copy1[(uint8_t)z]++; z >>= 8; copy2[(uint8_t)z]++; z >>= 8; copy3[z]++; + copy0[(uint8_t)w]++; w >>= 8; copy1[(uint8_t)w]++; w >>= 8; copy2[(uint8_t)w]++; w >>= 8; copy3[w]++; + + z = ((const uint32_t *)(const void *)T_p)[6]; w = ((const uint32_t *)(const void *)T_p)[7]; + copy0[(uint8_t)x]++; x >>= 8; copy1[(uint8_t)x]++; x >>= 8; copy2[(uint8_t)x]++; x >>= 8; copy3[x]++; + copy0[(uint8_t)y]++; y >>= 8; copy1[(uint8_t)y]++; y >>= 8; copy2[(uint8_t)y]++; y >>= 8; copy3[y]++; + + x = ((const uint32_t *)(const void *)T_p)[8]; y = ((const uint32_t *)(const void *)T_p)[9]; + copy0[(uint8_t)z]++; z >>= 8; copy1[(uint8_t)z]++; z >>= 8; copy2[(uint8_t)z]++; z >>= 8; copy3[z]++; + copy0[(uint8_t)w]++; w >>= 8; copy1[(uint8_t)w]++; w >>= 8; copy2[(uint8_t)w]++; w >>= 8; copy3[w]++; + + z = ((const uint32_t *)(const void *)T_p)[10]; w = ((const uint32_t *)(const void *)T_p)[11]; + copy0[(uint8_t)x]++; x >>= 8; copy1[(uint8_t)x]++; x >>= 8; copy2[(uint8_t)x]++; x >>= 8; copy3[x]++; + copy0[(uint8_t)y]++; y >>= 8; copy1[(uint8_t)y]++; y >>= 8; copy2[(uint8_t)y]++; y >>= 8; copy3[y]++; + + x = ((const uint32_t *)(const void *)T_p)[12]; y = ((const uint32_t *)(const void *)T_p)[13]; + copy0[(uint8_t)z]++; z >>= 8; copy1[(uint8_t)z]++; z >>= 8; copy2[(uint8_t)z]++; z >>= 8; copy3[z]++; + copy0[(uint8_t)w]++; w >>= 8; copy1[(uint8_t)w]++; w >>= 8; copy2[(uint8_t)w]++; w >>= 8; copy3[w]++; + + z = ((const uint32_t *)(const void *)T_p)[14]; w = ((const uint32_t *)(const void *)T_p)[15]; + copy0[(uint8_t)x]++; x >>= 8; copy1[(uint8_t)x]++; x >>= 8; copy2[(uint8_t)x]++; x >>= 8; copy3[x]++; + copy0[(uint8_t)y]++; y >>= 8; copy1[(uint8_t)y]++; y >>= 8; copy2[(uint8_t)y]++; y >>= 8; copy3[y]++; + + x = ((const uint32_t *)(const void *)T_p)[16]; y = ((const uint32_t *)(const void *)T_p)[17]; + copy0[(uint8_t)z]++; z >>= 8; copy1[(uint8_t)z]++; z >>= 8; copy2[(uint8_t)z]++; z >>= 8; copy3[z]++; + copy0[(uint8_t)w]++; w >>= 8; copy1[(uint8_t)w]++; w >>= 8; copy2[(uint8_t)w]++; w >>= 8; copy3[w]++; + } + + copy0[(uint8_t)x]++; x >>= 8; copy1[(uint8_t)x]++; x >>= 8; copy2[(uint8_t)x]++; x >>= 8; copy3[x]++; + copy0[(uint8_t)y]++; y >>= 8; copy1[(uint8_t)y]++; y >>= 8; copy2[(uint8_t)y]++; y >>= 8; copy3[y]++; + + T_p += 8; + + fast_uint_t i; for (i = 0; i < ALPHABET_SIZE; i++) { count[i] += copy0[i] + copy1[i] + copy2[i] + copy3[i]; } + } + + for (; T_p < T + n; T_p += 1) { count[T_p[0]]++; } +} + +static void libsais_unbwt_transpose_bucket2(sa_uint_t * RESTRICT bucket2) +{ + fast_uint_t x, y, c, d; + for (x = 0; x != ALPHABET_SIZE; x += 16) + { + for (c = x; c != x + 16; ++c) + { + for (d = c + 1; d != x + 16; ++d) + { + sa_uint_t tmp = bucket2[(d << 8) + c]; bucket2[(d << 8) + c] = bucket2[(c << 8) + d]; bucket2[(c << 8) + d] = tmp; + } + } + + for (y = x + 16; y != ALPHABET_SIZE; y += 16) + { + for (c = x; c != x + 16; ++c) + { + sa_uint_t * bucket2_yc = &bucket2[(y << 8) + c]; + sa_uint_t * bucket2_cy = &bucket2[(c << 8) + y]; + + sa_uint_t tmp00 = bucket2_yc[ 0 * 256]; bucket2_yc[ 0 * 256] = bucket2_cy[ 0]; bucket2_cy[ 0] = tmp00; + sa_uint_t tmp01 = bucket2_yc[ 1 * 256]; bucket2_yc[ 1 * 256] = bucket2_cy[ 1]; bucket2_cy[ 1] = tmp01; + sa_uint_t tmp02 = bucket2_yc[ 2 * 256]; bucket2_yc[ 2 * 256] = bucket2_cy[ 2]; bucket2_cy[ 2] = tmp02; + sa_uint_t tmp03 = bucket2_yc[ 3 * 256]; bucket2_yc[ 3 * 256] = bucket2_cy[ 3]; bucket2_cy[ 3] = tmp03; + sa_uint_t tmp04 = bucket2_yc[ 4 * 256]; bucket2_yc[ 4 * 256] = bucket2_cy[ 4]; bucket2_cy[ 4] = tmp04; + sa_uint_t tmp05 = bucket2_yc[ 5 * 256]; bucket2_yc[ 5 * 256] = bucket2_cy[ 5]; bucket2_cy[ 5] = tmp05; + sa_uint_t tmp06 = bucket2_yc[ 6 * 256]; bucket2_yc[ 6 * 256] = bucket2_cy[ 6]; bucket2_cy[ 6] = tmp06; + sa_uint_t tmp07 = bucket2_yc[ 7 * 256]; bucket2_yc[ 7 * 256] = bucket2_cy[ 7]; bucket2_cy[ 7] = tmp07; + sa_uint_t tmp08 = bucket2_yc[ 8 * 256]; bucket2_yc[ 8 * 256] = bucket2_cy[ 8]; bucket2_cy[ 8] = tmp08; + sa_uint_t tmp09 = bucket2_yc[ 9 * 256]; bucket2_yc[ 9 * 256] = bucket2_cy[ 9]; bucket2_cy[ 9] = tmp09; + sa_uint_t tmp10 = bucket2_yc[10 * 256]; bucket2_yc[10 * 256] = bucket2_cy[10]; bucket2_cy[10] = tmp10; + sa_uint_t tmp11 = bucket2_yc[11 * 256]; bucket2_yc[11 * 256] = bucket2_cy[11]; bucket2_cy[11] = tmp11; + sa_uint_t tmp12 = bucket2_yc[12 * 256]; bucket2_yc[12 * 256] = bucket2_cy[12]; bucket2_cy[12] = tmp12; + sa_uint_t tmp13 = bucket2_yc[13 * 256]; bucket2_yc[13 * 256] = bucket2_cy[13]; bucket2_cy[13] = tmp13; + sa_uint_t tmp14 = bucket2_yc[14 * 256]; bucket2_yc[14 * 256] = bucket2_cy[14]; bucket2_cy[14] = tmp14; + sa_uint_t tmp15 = bucket2_yc[15 * 256]; bucket2_yc[15 * 256] = bucket2_cy[15]; bucket2_cy[15] = tmp15; + } + } + } +} + +static void libsais_unbwt_compute_bigram_histogram_single(const uint8_t * RESTRICT T, sa_uint_t * RESTRICT bucket1, sa_uint_t * RESTRICT bucket2, fast_uint_t index) +{ + fast_uint_t sum, c; + for (sum = 1, c = 0; c < ALPHABET_SIZE; ++c) + { + fast_uint_t prev = sum; sum += bucket1[c]; bucket1[c] = (sa_uint_t)prev; + if (prev != sum) + { + sa_uint_t * RESTRICT bucket2_p = &bucket2[c << 8]; + + { + fast_uint_t hi = index; if (sum < hi) { hi = sum; } + libsais_unbwt_compute_histogram(&T[prev], (fast_sint_t)(hi - prev), bucket2_p); + } + + { + fast_uint_t lo = index + 1; if (prev > lo) { lo = prev; } + libsais_unbwt_compute_histogram(&T[lo - 1], (fast_sint_t)(sum - lo), bucket2_p); + } + } + } + + libsais_unbwt_transpose_bucket2(bucket2); +} + +static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t lastc, fast_uint_t shift) +{ + fast_uint_t v, w, sum, c, d; + for (v = 0, w = 0, sum = 1, c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == lastc) { sum += 1; } + + for (d = 0; d < ALPHABET_SIZE; ++d, ++w) + { + fast_uint_t prev = sum; sum += bucket2[w]; bucket2[w] = (sa_uint_t)prev; + if (prev != sum) + { + for (; v <= ((sum - 1) >> shift); ++v) { fastbits[v] = (uint16_t)w; } + } + } + } +} + +static void libsais_unbwt_calculate_biPSI(const uint8_t * RESTRICT T, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket1, sa_uint_t * RESTRICT bucket2, fast_uint_t index, fast_sint_t omp_block_start, fast_sint_t omp_block_end) +{ + { + fast_sint_t i = omp_block_start, j = (fast_sint_t)index; if (omp_block_end < j) { j = omp_block_end; } + for (; i < j; ++i) + { + fast_uint_t c = T[i]; + fast_uint_t p = bucket1[c]++; + fast_sint_t t = (fast_sint_t)(index - p); + + if (t != 0) + { + fast_uint_t w = (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))]) << 8) + c; + P[bucket2[w]++] = (sa_uint_t)i; + } + } + } + + { + fast_sint_t i = (fast_sint_t)index, j = omp_block_end; if (omp_block_start > i) { i = omp_block_start; } + for (i += 1; i <= j; ++i) + { + fast_uint_t c = T[i - 1]; + fast_uint_t p = bucket1[c]++; + fast_sint_t t = (fast_sint_t)(index - p); + + if (t != 0) + { + fast_uint_t w = (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))]) << 8) + c; + P[bucket2[w]++] = (sa_uint_t)i; + } + } + } +} + +static void libsais_unbwt_init_single(const uint8_t * RESTRICT T, sa_uint_t * RESTRICT P, sa_sint_t n, const sa_sint_t * freq, const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits) +{ + sa_uint_t bucket1[ALPHABET_SIZE]; + + fast_uint_t index = I[0]; + fast_uint_t lastc = T[0]; + fast_uint_t shift = 0; while ((n >> shift) > (1 << UNBWT_FASTBITS)) { shift++; } + + if (freq != NULL) + { + memcpy(bucket1, freq, ALPHABET_SIZE * sizeof(sa_uint_t)); + } + else + { + memset(bucket1, 0, ALPHABET_SIZE * sizeof(sa_uint_t)); + libsais_unbwt_compute_histogram(T, n, bucket1); + } + + memset(bucket2, 0, ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t)); + libsais_unbwt_compute_bigram_histogram_single(T, bucket1, bucket2, index); + + libsais_unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift); + libsais_unbwt_calculate_biPSI(T, P, bucket1, bucket2, index, 0, n); +} + +#if defined(_OPENMP) + +static void libsais_unbwt_compute_bigram_histogram_parallel(const uint8_t * RESTRICT T, fast_uint_t index, sa_uint_t * RESTRICT bucket1, sa_uint_t * RESTRICT bucket2, fast_sint_t omp_block_start, fast_sint_t omp_block_size) +{ + fast_sint_t i; + for (i = omp_block_start; i < omp_block_start + omp_block_size; ++i) + { + fast_uint_t c = T[i]; + fast_uint_t p = bucket1[c]++; + fast_sint_t t = (fast_sint_t)(index - p); + + if (t != 0) + { + fast_uint_t w = (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))]) << 8) + c; + bucket2[w]++; + } + } +} + +static void libsais_unbwt_init_parallel(const uint8_t * RESTRICT T, sa_uint_t * RESTRICT P, sa_sint_t n, const sa_sint_t * freq, const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, sa_uint_t * RESTRICT buckets, sa_sint_t threads) +{ + sa_uint_t bucket1[ALPHABET_SIZE]; + + fast_uint_t index = I[0]; + fast_uint_t lastc = T[0]; + fast_uint_t shift = 0; while ((n >> shift) > (1 << UNBWT_FASTBITS)) { shift++; } + + memset(bucket1, 0, ALPHABET_SIZE * sizeof(sa_uint_t)); + memset(bucket2, 0, ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t)); + + #pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536) + { + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); + + if (omp_num_threads == 1) + { + libsais_unbwt_init_single(T, P, n, freq, I, bucket2, fastbits); + } + else + { + sa_uint_t * RESTRICT bucket1_local = buckets + omp_thread_num * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)); + sa_uint_t * RESTRICT bucket2_local = bucket1_local + ALPHABET_SIZE; + + fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16); + fast_sint_t omp_block_start = omp_thread_num * omp_block_stride; + fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start; + + { + memset(bucket1_local, 0, ALPHABET_SIZE * sizeof(sa_uint_t)); + libsais_unbwt_compute_histogram(T + omp_block_start, omp_block_size, bucket1_local); + } + + #pragma omp barrier + + #pragma omp master + { + { + sa_uint_t * RESTRICT bucket1_temp = buckets; + + fast_sint_t t; + for (t = 0; t < omp_num_threads; ++t, bucket1_temp += ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) + { + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_uint_t A = bucket1[c], B = bucket1_temp[c]; bucket1[c] = A + B; bucket1_temp[c] = A; } + } + } + + { + fast_uint_t sum, c; + for (sum = 1, c = 0; c < ALPHABET_SIZE; ++c) { fast_uint_t prev = sum; sum += bucket1[c]; bucket1[c] = (sa_uint_t)prev; } + } + } + + #pragma omp barrier + + { + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE; c += 1) { sa_uint_t A = bucket1[c], B = bucket1_local[c]; bucket1_local[c] = A + B; } + + memset(bucket2_local, 0, ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t)); + libsais_unbwt_compute_bigram_histogram_parallel(T, index, bucket1_local, bucket2_local, omp_block_start, omp_block_size); + } + + #pragma omp barrier + + { + fast_sint_t omp_bucket2_stride = ((ALPHABET_SIZE * ALPHABET_SIZE) / omp_num_threads) & (-16); + fast_sint_t omp_bucket2_start = omp_thread_num * omp_bucket2_stride; + fast_sint_t omp_bucket2_size = omp_thread_num < omp_num_threads - 1 ? omp_bucket2_stride : (ALPHABET_SIZE * ALPHABET_SIZE) - omp_bucket2_start; + + sa_uint_t * RESTRICT bucket2_temp = buckets + ALPHABET_SIZE; + + fast_sint_t t; + for (t = 0; t < omp_num_threads; ++t, bucket2_temp += ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) + { + fast_sint_t c; for (c = omp_bucket2_start; c < omp_bucket2_start + omp_bucket2_size; c += 1) { sa_uint_t A = bucket2[c], B = bucket2_temp[c]; bucket2[c] = A + B; bucket2_temp[c] = A; } + } + } + + #pragma omp barrier + + #pragma omp master + { + + libsais_unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift); + + { + fast_sint_t t; + for (t = omp_num_threads - 1; t >= 1; --t) + { + sa_uint_t * RESTRICT dst_bucket1 = buckets + t * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)); + sa_uint_t * RESTRICT src_bucket1 = dst_bucket1 - (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)); + + memcpy(dst_bucket1, src_bucket1, ALPHABET_SIZE * sizeof(sa_uint_t)); + } + + memcpy(buckets, bucket1, ALPHABET_SIZE * sizeof(sa_uint_t)); + } + } + + #pragma omp barrier + + { + fast_sint_t c; for (c = 0; c < ALPHABET_SIZE * ALPHABET_SIZE; c += 1) { sa_uint_t A = bucket2[c], B = bucket2_local[c]; bucket2_local[c] = A + B; } + + libsais_unbwt_calculate_biPSI(T, P, bucket1_local, bucket2_local, index, omp_block_start, omp_block_start + omp_block_size); + } + + #pragma omp barrier + + #pragma omp master + { + memcpy(bucket2, buckets + ALPHABET_SIZE + (omp_num_threads - 1) * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)), ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t)); + } + } + } +} + +#endif + +static void libsais_unbwt_decode_1(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t * i0, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + + fast_uint_t i, p0 = *i0; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + } + + *i0 = p0; +} + +static void libsais_unbwt_decode_2(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + } + + *i0 = p0; *i1 = p1; +} + +static void libsais_unbwt_decode_3(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + uint16_t c2 = fastbits[p2 >> shift]; if (bucket2[c2] <= p2) { do { c2++; } while (bucket2[c2] <= p2); } p2 = P[p2]; U2[i] = libsais_bswap16(c2); + } + + *i0 = p0; *i1 = p1; *i2 = p2; +} + +static void libsais_unbwt_decode_4(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r); + uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + uint16_t c2 = fastbits[p2 >> shift]; if (bucket2[c2] <= p2) { do { c2++; } while (bucket2[c2] <= p2); } p2 = P[p2]; U2[i] = libsais_bswap16(c2); + uint16_t c3 = fastbits[p3 >> shift]; if (bucket2[c3] <= p3) { do { c3++; } while (bucket2[c3] <= p3); } p3 = P[p3]; U3[i] = libsais_bswap16(c3); + } + + *i0 = p0; *i1 = p1; *i2 = p2; *i3 = p3; +} + +static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r); + uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r); + uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + uint16_t c2 = fastbits[p2 >> shift]; if (bucket2[c2] <= p2) { do { c2++; } while (bucket2[c2] <= p2); } p2 = P[p2]; U2[i] = libsais_bswap16(c2); + uint16_t c3 = fastbits[p3 >> shift]; if (bucket2[c3] <= p3) { do { c3++; } while (bucket2[c3] <= p3); } p3 = P[p3]; U3[i] = libsais_bswap16(c3); + uint16_t c4 = fastbits[p4 >> shift]; if (bucket2[c4] <= p4) { do { c4++; } while (bucket2[c4] <= p4); } p4 = P[p4]; U4[i] = libsais_bswap16(c4); + } + + *i0 = p0; *i1 = p1; *i2 = p2; *i3 = p3; *i4 = p4; +} + +static void libsais_unbwt_decode_6(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4, fast_uint_t * i5, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r); + uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r); + uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r); + uint16_t * RESTRICT U5 = (uint16_t *)(void *)(((uint8_t *)U4) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4, p5 = *i5; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + uint16_t c2 = fastbits[p2 >> shift]; if (bucket2[c2] <= p2) { do { c2++; } while (bucket2[c2] <= p2); } p2 = P[p2]; U2[i] = libsais_bswap16(c2); + uint16_t c3 = fastbits[p3 >> shift]; if (bucket2[c3] <= p3) { do { c3++; } while (bucket2[c3] <= p3); } p3 = P[p3]; U3[i] = libsais_bswap16(c3); + uint16_t c4 = fastbits[p4 >> shift]; if (bucket2[c4] <= p4) { do { c4++; } while (bucket2[c4] <= p4); } p4 = P[p4]; U4[i] = libsais_bswap16(c4); + uint16_t c5 = fastbits[p5 >> shift]; if (bucket2[c5] <= p5) { do { c5++; } while (bucket2[c5] <= p5); } p5 = P[p5]; U5[i] = libsais_bswap16(c5); + } + + *i0 = p0; *i1 = p1; *i2 = p2; *i3 = p3; *i4 = p4; *i5 = p5; +} + +static void libsais_unbwt_decode_7(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4, fast_uint_t * i5, fast_uint_t * i6, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r); + uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r); + uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r); + uint16_t * RESTRICT U5 = (uint16_t *)(void *)(((uint8_t *)U4) + r); + uint16_t * RESTRICT U6 = (uint16_t *)(void *)(((uint8_t *)U5) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4, p5 = *i5, p6 = *i6; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + uint16_t c2 = fastbits[p2 >> shift]; if (bucket2[c2] <= p2) { do { c2++; } while (bucket2[c2] <= p2); } p2 = P[p2]; U2[i] = libsais_bswap16(c2); + uint16_t c3 = fastbits[p3 >> shift]; if (bucket2[c3] <= p3) { do { c3++; } while (bucket2[c3] <= p3); } p3 = P[p3]; U3[i] = libsais_bswap16(c3); + uint16_t c4 = fastbits[p4 >> shift]; if (bucket2[c4] <= p4) { do { c4++; } while (bucket2[c4] <= p4); } p4 = P[p4]; U4[i] = libsais_bswap16(c4); + uint16_t c5 = fastbits[p5 >> shift]; if (bucket2[c5] <= p5) { do { c5++; } while (bucket2[c5] <= p5); } p5 = P[p5]; U5[i] = libsais_bswap16(c5); + uint16_t c6 = fastbits[p6 >> shift]; if (bucket2[c6] <= p6) { do { c6++; } while (bucket2[c6] <= p6); } p6 = P[p6]; U6[i] = libsais_bswap16(c6); + } + + *i0 = p0; *i1 = p1; *i2 = p2; *i3 = p3; *i4 = p4; *i5 = p5; *i6 = p6; +} + +static void libsais_unbwt_decode_8(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4, fast_uint_t * i5, fast_uint_t * i6, fast_uint_t * i7, fast_uint_t k) +{ + uint16_t * RESTRICT U0 = (uint16_t *)(void *)U; + uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r); + uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r); + uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r); + uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r); + uint16_t * RESTRICT U5 = (uint16_t *)(void *)(((uint8_t *)U4) + r); + uint16_t * RESTRICT U6 = (uint16_t *)(void *)(((uint8_t *)U5) + r); + uint16_t * RESTRICT U7 = (uint16_t *)(void *)(((uint8_t *)U6) + r); + + fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4, p5 = *i5, p6 = *i6, p7 = *i7; + + for (i = 0; i != k; ++i) + { + uint16_t c0 = fastbits[p0 >> shift]; if (bucket2[c0] <= p0) { do { c0++; } while (bucket2[c0] <= p0); } p0 = P[p0]; U0[i] = libsais_bswap16(c0); + uint16_t c1 = fastbits[p1 >> shift]; if (bucket2[c1] <= p1) { do { c1++; } while (bucket2[c1] <= p1); } p1 = P[p1]; U1[i] = libsais_bswap16(c1); + uint16_t c2 = fastbits[p2 >> shift]; if (bucket2[c2] <= p2) { do { c2++; } while (bucket2[c2] <= p2); } p2 = P[p2]; U2[i] = libsais_bswap16(c2); + uint16_t c3 = fastbits[p3 >> shift]; if (bucket2[c3] <= p3) { do { c3++; } while (bucket2[c3] <= p3); } p3 = P[p3]; U3[i] = libsais_bswap16(c3); + uint16_t c4 = fastbits[p4 >> shift]; if (bucket2[c4] <= p4) { do { c4++; } while (bucket2[c4] <= p4); } p4 = P[p4]; U4[i] = libsais_bswap16(c4); + uint16_t c5 = fastbits[p5 >> shift]; if (bucket2[c5] <= p5) { do { c5++; } while (bucket2[c5] <= p5); } p5 = P[p5]; U5[i] = libsais_bswap16(c5); + uint16_t c6 = fastbits[p6 >> shift]; if (bucket2[c6] <= p6) { do { c6++; } while (bucket2[c6] <= p6); } p6 = P[p6]; U6[i] = libsais_bswap16(c6); + uint16_t c7 = fastbits[p7 >> shift]; if (bucket2[c7] <= p7) { do { c7++; } while (bucket2[c7] <= p7); } p7 = P[p7]; U7[i] = libsais_bswap16(c7); + } + + *i0 = p0; *i1 = p1; *i2 = p2; *i3 = p3; *i4 = p4; *i5 = p5; *i6 = p6; *i7 = p7; +} + +static void libsais_unbwt_decode(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_sint_t n, sa_sint_t r, const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, fast_sint_t blocks, fast_uint_t reminder) +{ + fast_uint_t shift = 0; while ((n >> shift) > (1 << UNBWT_FASTBITS)) { shift++; } + fast_uint_t offset = 0; + + while (blocks > 8) + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6], i7 = I[7]; + libsais_unbwt_decode_8(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, &i6, &i7, (fast_uint_t)r >> 1); + I += 8; blocks -= 8; offset += 8 * (fast_uint_t)r; + } + + if (blocks == 1) + { + fast_uint_t i0 = I[0]; + libsais_unbwt_decode_1(U + offset, P, bucket2, fastbits, shift, &i0, reminder >> 1); + } + else if (blocks == 2) + { + fast_uint_t i0 = I[0], i1 = I[1]; + libsais_unbwt_decode_2(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, reminder >> 1); + libsais_unbwt_decode_1(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, &i0, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } + else if (blocks == 3) + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2]; + libsais_unbwt_decode_3(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, reminder >> 1); + libsais_unbwt_decode_2(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } + else if (blocks == 4) + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3]; + libsais_unbwt_decode_4(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, reminder >> 1); + libsais_unbwt_decode_3(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } + else if (blocks == 5) + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4]; + libsais_unbwt_decode_5(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, reminder >> 1); + libsais_unbwt_decode_4(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } + else if (blocks == 6) + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5]; + libsais_unbwt_decode_6(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, reminder >> 1); + libsais_unbwt_decode_5(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } + else if (blocks == 7) + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6]; + libsais_unbwt_decode_7(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, &i6, reminder >> 1); + libsais_unbwt_decode_6(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } + else + { + fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6], i7 = I[7]; + libsais_unbwt_decode_8(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, &i6, &i7, reminder >> 1); + libsais_unbwt_decode_7(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, &i6, ((fast_uint_t)r >> 1) - (reminder >> 1)); + } +} + +static void libsais_unbwt_decode_omp(const uint8_t * RESTRICT T, uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_sint_t n, sa_sint_t r, const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, sa_sint_t threads) +{ + fast_uint_t lastc = T[0]; + fast_sint_t blocks = 1 + (((fast_sint_t)n - 1) / (fast_sint_t)r); + fast_uint_t reminder = (fast_uint_t)n - ((fast_uint_t)r * ((fast_uint_t)blocks - 1)); + +#if defined(_OPENMP) + fast_sint_t max_threads = blocks < threads ? blocks : threads; + #pragma omp parallel num_threads(max_threads) if(max_threads > 1 && n >= 65536) +#endif + { +#if defined(_OPENMP) + fast_sint_t omp_thread_num = omp_get_thread_num(); + fast_sint_t omp_num_threads = omp_get_num_threads(); +#else + UNUSED(threads); + + fast_sint_t omp_thread_num = 0; + fast_sint_t omp_num_threads = 1; +#endif + + fast_sint_t omp_block_stride = blocks / omp_num_threads; + fast_sint_t omp_block_reminder = blocks % omp_num_threads; + fast_sint_t omp_block_size = omp_block_stride + (omp_thread_num < omp_block_reminder); + fast_sint_t omp_block_start = omp_block_stride * omp_thread_num + (omp_thread_num < omp_block_reminder ? omp_thread_num : omp_block_reminder); + + libsais_unbwt_decode(U + r * omp_block_start, P, n, r, I + omp_block_start, bucket2, fastbits, omp_block_size, omp_thread_num < omp_num_threads - 1 ? (fast_uint_t)r : reminder); + } + + U[n - 1] = (uint8_t)lastc; +} + +static sa_sint_t libsais_unbwt_core(const uint8_t * RESTRICT T, uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_sint_t n, const sa_sint_t * freq, sa_sint_t r, const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits, sa_uint_t * RESTRICT buckets, sa_sint_t threads) +{ +#if defined(_OPENMP) + if (threads > 1 && n >= 262144) + { + libsais_unbwt_init_parallel(T, P, n, freq, I, bucket2, fastbits, buckets, threads); + } + else +#else + UNUSED(buckets); +#endif + { + libsais_unbwt_init_single(T, P, n, freq, I, bucket2, fastbits); + } + + libsais_unbwt_decode_omp(T, U, P, n, r, I, bucket2, fastbits, threads); + return 0; +} + +static sa_sint_t libsais_unbwt_main(const uint8_t * T, uint8_t * U, sa_uint_t * P, sa_sint_t n, const sa_sint_t * freq, sa_sint_t r, const sa_uint_t * I, sa_sint_t threads) +{ + fast_uint_t shift = 0; while ((n >> shift) > (1 << UNBWT_FASTBITS)) { shift++; } + + sa_uint_t * RESTRICT bucket2 = (sa_uint_t *)libsais_alloc_aligned(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t), 4096); + uint16_t * RESTRICT fastbits = (uint16_t *)libsais_alloc_aligned(((size_t)1 + (size_t)(n >> shift)) * sizeof(uint16_t), 4096); + sa_uint_t * RESTRICT buckets = threads > 1 && n >= 262144 ? (sa_uint_t *)libsais_alloc_aligned((size_t)threads * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) * sizeof(sa_uint_t), 4096) : NULL; + + sa_sint_t index = bucket2 != NULL && fastbits != NULL && (buckets != NULL || threads == 1 || n < 262144) + ? libsais_unbwt_core(T, U, P, n, freq, r, I, bucket2, fastbits, buckets, threads) + : -2; + + libsais_free_aligned(buckets); + libsais_free_aligned(fastbits); + libsais_free_aligned(bucket2); + + return index; +} + +static sa_sint_t libsais_unbwt_main_ctx(const LIBSAIS_UNBWT_CONTEXT * ctx, const uint8_t * T, uint8_t * U, sa_uint_t * P, sa_sint_t n, const sa_sint_t * freq, sa_sint_t r, const sa_uint_t * I) +{ + return ctx != NULL && ctx->bucket2 != NULL && ctx->fastbits != NULL && (ctx->buckets != NULL || ctx->threads == 1) + ? libsais_unbwt_core(T, U, P, n, freq, r, I, ctx->bucket2, ctx->fastbits, ctx->buckets, (sa_sint_t)ctx->threads) + : -2; +} + +void * libsais_unbwt_create_ctx(void) +{ + return (void *)libsais_unbwt_create_ctx_main(1); +} + +void libsais_unbwt_free_ctx(void * ctx) +{ + libsais_unbwt_free_ctx_main((LIBSAIS_UNBWT_CONTEXT *)ctx); +} + +int32_t libsais_unbwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t i) +{ + return libsais_unbwt_aux(T, U, A, n, freq, n, &i); +} + +int32_t libsais_unbwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t i) +{ + return libsais_unbwt_aux_ctx(ctx, T, U, A, n, freq, n, &i); +} + +int32_t libsais_unbwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t r, const int32_t * I) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL)) + { + return -1; + } + else if (n <= 1) + { + if (I[0] != n) { return -1; } + if (n == 1) { U[0] = T[0]; } + return 0; + } + + fast_sint_t t; for (t = 0; t <= (n - 1) / r; ++t) { if (I[t] <= 0 || I[t] > n) { return -1; } } + + return libsais_unbwt_main(T, U, (sa_uint_t *)A, n, freq, r, (const sa_uint_t *)I, 1); +} + +int32_t libsais_unbwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t r, const int32_t * I) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL)) + { + return -1; + } + else if (n <= 1) + { + if (I[0] != n) { return -1; } + if (n == 1) { U[0] = T[0]; } + return 0; + } + + fast_sint_t t; for (t = 0; t <= (n - 1) / r; ++t) { if (I[t] <= 0 || I[t] > n) { return -1; } } + + return libsais_unbwt_main_ctx((const LIBSAIS_UNBWT_CONTEXT *)ctx, T, U, (sa_uint_t *)A, n, freq, r, (const sa_uint_t *)I); +} + +#if defined(_OPENMP) + +void * libsais_unbwt_create_ctx_omp(int32_t threads) +{ + if (threads < 0) { return NULL; } + + threads = threads > 0 ? threads : omp_get_max_threads(); + return (void *)libsais_unbwt_create_ctx_main(threads); +} + +int32_t libsais_unbwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t i, int32_t threads) +{ + return libsais_unbwt_aux_omp(T, U, A, n, freq, n, &i, threads); +} + +int32_t libsais_unbwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t r, const int32_t * I, int32_t threads) +{ + if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL) || (threads < 0)) + { + return -1; + } + else if (n <= 1) + { + if (I[0] != n) { return -1; } + if (n == 1) { U[0] = T[0]; } + return 0; + } + + fast_sint_t t; for (t = 0; t <= (n - 1) / r; ++t) { if (I[t] <= 0 || I[t] > n) { return -1; } } + + threads = threads > 0 ? threads : omp_get_max_threads(); + return libsais_unbwt_main(T, U, (sa_uint_t *)A, n, freq, r, (const sa_uint_t *)I, threads); +} + +#endif diff --git a/libbsc/libbsc/bwt/libsais/libsais.h b/libbsc/libbsc/bwt/libsais/libsais.h new file mode 100644 index 00000000..cc3b50bd --- /dev/null +++ b/libbsc/libbsc/bwt/libsais/libsais.h @@ -0,0 +1,310 @@ +/*-- + +This file is a part of libsais, a library for linear time +suffix array and burrows wheeler transform construction. + + Copyright (c) 2021-2022 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information. + +--*/ + +#ifndef LIBSAIS_H +#define LIBSAIS_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + + #include + + /** + * Creates the libsais context that allows reusing allocated memory with each libsais operation. + * In multi-threaded environments, use one context per thread for parallel executions. + * @return the libsais context, NULL otherwise. + */ + void * libsais_create_ctx(void); + +#if defined(_OPENMP) + /** + * Creates the libsais context that allows reusing allocated memory with each parallel libsais operation using OpenMP. + * In multi-threaded environments, use one context per thread for parallel executions. + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return the libsais context, NULL otherwise. + */ + void * libsais_create_ctx_omp(int32_t threads); +#endif + + /** + * Destroys the libsass context and free previusly allocated memory. + * @param ctx The libsais context (can be NULL). + */ + void libsais_free_ctx(void * ctx); + + /** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the given string. + * @param fs The extra space available at the end of SA array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs, int32_t * freq); + + /** + * Constructs the suffix array of a given integer array. + * Note, during construction input array will be modified, but restored at the end if no errors occurred. + * @param T [0..n-1] The input integer array. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the integer array. + * @param k The alphabet size of the input integer array. + * @param fs Extra space available at the end of SA array (can be 0, but 4k or better 6k is recommended for optimal performance). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_int(int32_t * T, int32_t * SA, int32_t n, int32_t k, int32_t fs); + + /** + * Constructs the suffix array of a given string using libsais context. + * @param ctx The libsais context. + * @param T [0..n-1] The input string. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the given string. + * @param fs The extra space available at the end of SA array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_ctx(const void * ctx, const uint8_t * T, int32_t * SA, int32_t n, int32_t fs, int32_t * freq); + +#if defined(_OPENMP) + /** + * Constructs the suffix array of a given string in parallel using OpenMP. + * @param T [0..n-1] The input string. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the given string. + * @param fs The extra space available at the end of SA array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_omp(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs, int32_t * freq, int32_t threads); + + /** + * Constructs the suffix array of a given integer array in parallel using OpenMP. + * Note, during construction input array will be modified, but restored at the end if no errors occurred. + * @param T [0..n-1] The input integer array. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the integer array. + * @param k The alphabet size of the input integer array. + * @param fs Extra space available at the end of SA array (can be 0, but 4k or better 6k is recommended for optimal performance). + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_int_omp(int32_t * T, int32_t * SA, int32_t n, int32_t k, int32_t fs, int32_t threads); +#endif + + /** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n-1+fs] The temporary array. + * @param n The length of the given string. + * @param fs The extra space available at the end of A array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_bwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq); + + /** + * Constructs the burrows-wheeler transformed string of a given string with auxiliary indexes. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n-1+fs] The temporary array. + * @param n The length of the given string. + * @param fs The extra space available at the end of A array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @param r The sampling rate for auxiliary indexes (must be power of 2). + * @param I [0..(n-1)/r] The output auxiliary indexes. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_bwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t r, int32_t * I); + + /** + * Constructs the burrows-wheeler transformed string of a given string using libsais context. + * @param ctx The libsais context. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n-1+fs] The temporary array. + * @param n The length of the given string. + * @param fs The extra space available at the end of A array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_bwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq); + + /** + * Constructs the burrows-wheeler transformed string of a given string with auxiliary indexes using libsais context. + * @param ctx The libsais context. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n-1+fs] The temporary array. + * @param n The length of the given string. + * @param fs The extra space available at the end of A array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @param r The sampling rate for auxiliary indexes (must be power of 2). + * @param I [0..(n-1)/r] The output auxiliary indexes. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_bwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t r, int32_t * I); + +#if defined(_OPENMP) + /** + * Constructs the burrows-wheeler transformed string of a given string in parallel using OpenMP. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n-1+fs] The temporary array. + * @param n The length of the given string. + * @param fs The extra space available at the end of A array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_bwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t threads); + + /** + * Constructs the burrows-wheeler transformed string of a given string with auxiliary indexes in parallel using OpenMP. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n-1+fs] The temporary array. + * @param n The length of the given string. + * @param fs The extra space available at the end of A array (0 should be enough for most cases). + * @param freq [0..255] The output symbol frequency table (can be NULL). + * @param r The sampling rate for auxiliary indexes (must be power of 2). + * @param I [0..(n-1)/r] The output auxiliary indexes. + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_bwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, int32_t fs, int32_t * freq, int32_t r, int32_t * I, int32_t threads); +#endif + + /** + * Creates the libsais reverse BWT context that allows reusing allocated memory with each libsais_unbwt_* operation. + * In multi-threaded environments, use one context per thread for parallel executions. + * @return the libsais context, NULL otherwise. + */ + void * libsais_unbwt_create_ctx(void); + +#if defined(_OPENMP) + /** + * Creates the libsais reverse BWT context that allows reusing allocated memory with each parallel libsais_unbwt_* operation using OpenMP. + * In multi-threaded environments, use one context per thread for parallel executions. + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return the libsais context, NULL otherwise. + */ + void * libsais_unbwt_create_ctx_omp(int32_t threads); +#endif + + /** + * Destroys the libsass reverse BWT context and free previusly allocated memory. + * @param ctx The libsais context (can be NULL). + */ + void libsais_unbwt_free_ctx(void * ctx); + + /** + * Constructs the original string from a given burrows-wheeler transformed string with primary index. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1 size). + * @param n The length of the given string. + * @param freq [0..255] The input symbol frequency table (can be NULL). + * @param i The primary index. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_unbwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t i); + + /** + * Constructs the original string from a given burrows-wheeler transformed string with primary index using libsais reverse BWT context. + * @param ctx The libsais reverse BWT context. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1 size). + * @param n The length of the given string. + * @param freq [0..255] The input symbol frequency table (can be NULL). + * @param i The primary index. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_unbwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t i); + + /** + * Constructs the original string from a given burrows-wheeler transformed string with auxiliary indexes. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1 size). + * @param n The length of the given string. + * @param freq [0..255] The input symbol frequency table (can be NULL). + * @param r The sampling rate for auxiliary indexes (must be power of 2). + * @param I [0..(n-1)/r] The input auxiliary indexes. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_unbwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t r, const int32_t * I); + + /** + * Constructs the original string from a given burrows-wheeler transformed string with auxiliary indexes using libsais reverse BWT context. + * @param ctx The libsais reverse BWT context. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1 size). + * @param n The length of the given string. + * @param freq [0..255] The input symbol frequency table (can be NULL). + * @param r The sampling rate for auxiliary indexes (must be power of 2). + * @param I [0..(n-1)/r] The input auxiliary indexes. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_unbwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t r, const int32_t * I); + +#if defined(_OPENMP) + /** + * Constructs the original string from a given burrows-wheeler transformed string with primary index in parallel using OpenMP. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1 size). + * @param n The length of the given string. + * @param freq [0..255] The input symbol frequency table (can be NULL). + * @param i The primary index. + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_unbwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t i, int32_t threads); + + /** + * Constructs the original string from a given burrows-wheeler transformed string with auxiliary indexes in parallel using OpenMP. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string (can be T). + * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1 size). + * @param n The length of the given string. + * @param freq [0..255] The input symbol frequency table (can be NULL). + * @param r The sampling rate for auxiliary indexes (must be power of 2). + * @param I [0..(n-1)/r] The input auxiliary indexes. + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + int32_t libsais_unbwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n, const int32_t * freq, int32_t r, const int32_t * I, int32_t threads); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libbsc/libbsc/coder/coder.cpp b/libbsc/libbsc/coder/coder.cpp new file mode 100644 index 00000000..5fcd2c2b --- /dev/null +++ b/libbsc/libbsc/coder/coder.cpp @@ -0,0 +1,351 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Second stage encoding functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include + +#include "coder.h" + +#include "../libbsc.h" +#include "../platform/platform.h" + +#include "qlfc/qlfc.h" + +int bsc_coder_init(int features) +{ + int result = LIBBSC_NO_ERROR; + + if (result == LIBBSC_NO_ERROR) result = bsc_qlfc_init(features); + + return result; +} + +static INLINE int bsc_coder_num_blocks(int n) +{ + if (n < 256 * 1024) return 1; + if (n < 4 * 1024 * 1024) return 2; + if (n < 16 * 1024 * 1024) return 4; + + return 8; +} + +int bsc_coder_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize, int coder) +{ + if (coder == LIBBSC_CODER_QLFC_STATIC) return bsc_qlfc_static_encode_block (input, output, inputSize, outputSize); + if (coder == LIBBSC_CODER_QLFC_ADAPTIVE) return bsc_qlfc_adaptive_encode_block(input, output, inputSize, outputSize); + if (coder == LIBBSC_CODER_QLFC_FAST) return bsc_qlfc_fast_encode_block (input, output, inputSize, outputSize); + + return LIBBSC_BAD_PARAMETER; +} + +void bsc_coder_split_blocks(const unsigned char * input, int n, int nBlocks, int * blockStart, int * blockSize) +{ + int rankSize = 0; + for (int i = 1; i < n; i += 32) + { + if (input[i] != input[i - 1]) rankSize++; + } + + if (rankSize > nBlocks) + { + int blockRankSize = rankSize / nBlocks; + + blockStart[0] = 0; rankSize = 0; + for (int id = 0, i = 1; i < n; i += 32) + { + if (input[i] != input[i - 1]) + { + rankSize++; + if (rankSize == blockRankSize) + { + rankSize = 0; + + blockSize[id] = i - blockStart[id]; + id++; blockStart[id] = i; + + if (id == nBlocks - 1) break; + } + } + } + blockSize[nBlocks - 1] = n - blockStart[nBlocks - 1]; + } + else + { + for (int p = 0; p < nBlocks; ++p) + { + blockStart[p] = (n / nBlocks) * p; + blockSize[p] = (p != nBlocks - 1) ? n / nBlocks : n - (n / nBlocks) * (nBlocks - 1); + } + } +} + +int bsc_coder_compress_serial(const unsigned char * input, unsigned char * output, int n, int coder) +{ + if (bsc_coder_num_blocks(n) == 1) + { + int result = bsc_coder_encode_block(input, output + 1, n, n - 1, coder); + if (result >= LIBBSC_NO_ERROR) result = (output[0] = 1, result + 1); + + return result; + } + + int compressedStart[ALPHABET_SIZE]; + int compressedSize[ALPHABET_SIZE]; + + int nBlocks = bsc_coder_num_blocks(n); + int outputPtr = 1 + 8 * nBlocks; + + bsc_coder_split_blocks(input, n, nBlocks, compressedStart, compressedSize); + + output[0] = nBlocks; + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int inputStart = compressedStart[blockId]; + int inputSize = compressedSize[blockId]; + int outputSize = inputSize; if (outputSize > n - outputPtr) outputSize = n - outputPtr; + + int result = bsc_coder_encode_block(input + inputStart, output + outputPtr, inputSize, outputSize, coder); + if (result < LIBBSC_NO_ERROR) + { + if (outputPtr + inputSize >= n) return LIBBSC_NOT_COMPRESSIBLE; + result = inputSize; memcpy(output + outputPtr, input + inputStart, inputSize); + } + +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) + *(int *)(output + 1 + 8 * blockId + 0) = inputSize; + *(int *)(output + 1 + 8 * blockId + 4) = result; +#else + memcpy(output + 1 + 8 * blockId + 0, &inputSize, sizeof(int)); + memcpy(output + 1 + 8 * blockId + 4, &result, sizeof(int)); +#endif + + outputPtr += result; + } + + return outputPtr; +} + +#ifdef LIBBSC_OPENMP + +int bsc_coder_compress_parallel(const unsigned char * input, unsigned char * output, int n, int coder) +{ + if (unsigned char * buffer = (unsigned char *)bsc_malloc(n * sizeof(unsigned char))) + { + int compressionResult[ALPHABET_SIZE]; + int compressedStart[ALPHABET_SIZE]; + int compressedSize[ALPHABET_SIZE]; + + int nBlocks = bsc_coder_num_blocks(n); + int result = LIBBSC_NO_ERROR; + + int numThreads = omp_get_max_threads(); + if (numThreads > nBlocks) numThreads = nBlocks; + + output[0] = nBlocks; + #pragma omp parallel num_threads(numThreads) if(numThreads > 1) + { + if (omp_get_num_threads() == 1) + { + result = bsc_coder_compress_serial(input, output, n, coder); + } + else + { + #pragma omp single + { + bsc_coder_split_blocks(input, n, nBlocks, compressedStart, compressedSize); + } + + #pragma omp for schedule(dynamic) + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int blockStart = compressedStart[blockId]; + int blockSize = compressedSize[blockId]; + + compressionResult[blockId] = bsc_coder_encode_block(input + blockStart, buffer + blockStart, blockSize, blockSize, coder); + if (compressionResult[blockId] < LIBBSC_NO_ERROR) compressionResult[blockId] = blockSize; + + *(int *)(output + 1 + 8 * blockId + 0) = blockSize; + *(int *)(output + 1 + 8 * blockId + 4) = compressionResult[blockId]; + } + + #pragma omp single + { + result = 1 + 8 * nBlocks; + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + result += compressionResult[blockId]; + } + + if (result >= n) result = LIBBSC_NOT_COMPRESSIBLE; + } + + if (result >= LIBBSC_NO_ERROR) + { + #pragma omp for schedule(dynamic) + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int blockStart = compressedStart[blockId]; + int blockSize = compressedSize[blockId]; + + int outputPtr = 1 + 8 * nBlocks; + for (int p = 0; p < blockId; ++p) outputPtr += compressionResult[p]; + + if (compressionResult[blockId] != blockSize) + { + memcpy(output + outputPtr, buffer + blockStart, compressionResult[blockId]); + } + else + { + memcpy(output + outputPtr, input + blockStart, compressionResult[blockId]); + } + } + } + } + } + + bsc_free(buffer); + + return result; + } + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +#endif + +int bsc_coder_compress(const unsigned char * input, unsigned char * output, int n, int coder, int features) +{ + if ((coder != LIBBSC_CODER_QLFC_STATIC) && (coder != LIBBSC_CODER_QLFC_ADAPTIVE) && (coder != LIBBSC_CODER_QLFC_FAST)) + { + return LIBBSC_BAD_PARAMETER; + } + +#ifdef LIBBSC_OPENMP + + if ((bsc_coder_num_blocks(n) != 1) && (features & LIBBSC_FEATURE_MULTITHREADING)) + { + return bsc_coder_compress_parallel(input, output, n, coder); + } + +#endif + + return bsc_coder_compress_serial(input, output, n, coder); +} + + +int bsc_coder_decode_block(const unsigned char * input, unsigned char * output, int coder) +{ + if (coder == LIBBSC_CODER_QLFC_STATIC) return bsc_qlfc_static_decode_block (input, output); + if (coder == LIBBSC_CODER_QLFC_ADAPTIVE) return bsc_qlfc_adaptive_decode_block(input, output); + if (coder == LIBBSC_CODER_QLFC_FAST) return bsc_qlfc_fast_decode_block (input, output); + + return LIBBSC_BAD_PARAMETER; +} + +int bsc_coder_decompress(const unsigned char * input, unsigned char * output, int coder, int features) +{ + if ((coder != LIBBSC_CODER_QLFC_STATIC) && (coder != LIBBSC_CODER_QLFC_ADAPTIVE) && (coder != LIBBSC_CODER_QLFC_FAST)) + { + return LIBBSC_BAD_PARAMETER; + } + + int nBlocks = input[0]; + if (nBlocks == 1) + { + return bsc_coder_decode_block(input + 1, output, coder); + } + + int decompressionResult[ALPHABET_SIZE]; + +#ifdef LIBBSC_OPENMP + + if (features & LIBBSC_FEATURE_MULTITHREADING) + { + #pragma omp parallel for schedule(dynamic) + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4); + int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0); + + inputPtr += 1 + 8 * nBlocks; + + int inputSize = *(int *)(input + 1 + 8 * blockId + 4); + int outputSize = *(int *)(input + 1 + 8 * blockId + 0); + + if (inputSize != outputSize) + { + decompressionResult[blockId] = bsc_coder_decode_block(input + inputPtr, output + outputPtr, coder); + } + else + { + decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize); + } + } + } + else + +#endif + + { + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4); + int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0); + + inputPtr += 1 + 8 * nBlocks; + + int inputSize = *(int *)(input + 1 + 8 * blockId + 4); + int outputSize = *(int *)(input + 1 + 8 * blockId + 0); + + if (inputSize != outputSize) + { + decompressionResult[blockId] = bsc_coder_decode_block(input + inputPtr, output + outputPtr, coder); + } + else + { + decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize); + } + } + } + + int dataSize = 0, result = LIBBSC_NO_ERROR; + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + if (decompressionResult[blockId] < LIBBSC_NO_ERROR) result = decompressionResult[blockId]; + dataSize += decompressionResult[blockId]; + } + + return (result == LIBBSC_NO_ERROR) ? dataSize : result; +} + +/*-----------------------------------------------------------*/ +/* End coder.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/coder.h b/libbsc/libbsc/coder/coder.h new file mode 100644 index 00000000..97bc040a --- /dev/null +++ b/libbsc/libbsc/coder/coder.h @@ -0,0 +1,76 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to second stage encoding functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_CODER_H +#define _LIBBSC_CODER_H + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * You should call this function before you call any of the other functions in coder. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_coder_init(int features); + + /** + * Compress a memory block using Quantized Local Frequency Coding. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @param n - the length of the input memory block. + * @param coder - the entropy coding algorithm. + * @param features - the set of additional features. + * @return the length of compressed memory block if no error occurred, error code otherwise. + */ + int bsc_coder_compress(const unsigned char * input, unsigned char * output, int n, int coder, int features); + + /** + * Decompress a memory block using Quantized Local Frequency Coding. + * @param input - the input memory block. + * @param output - the output memory block. + * @param coder - the entropy coding algorithm. + * @param features - the set of additional features. + * @return the length of decompressed memory block if no error occurred, error code otherwise. + */ + int bsc_coder_decompress(const unsigned char * input, unsigned char * output, int coder, int features); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End coder.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/common/predictor.h b/libbsc/libbsc/coder/common/predictor.h new file mode 100644 index 00000000..34ca792d --- /dev/null +++ b/libbsc/libbsc/coder/common/predictor.h @@ -0,0 +1,220 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Probability counter and logistic mixer */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_CODER_PREDICTOR_H +#define _LIBBSC_CODER_PREDICTOR_H + +#include "../../platform/platform.h" + +#include "tables.h" + +struct ProbabilityCounter +{ + +public: + + static INLINE void UpdateBit(unsigned int bit, short & probability, const int threshold0, const int adaptationRate0, const int threshold1, const int adaptationRate1) + { + int delta0 = probability * adaptationRate0 - ((4096 - threshold0) * adaptationRate0 - 4095); + int delta1 = probability * adaptationRate1 - (threshold1 * adaptationRate1); + + probability = probability - ((bit ? delta1 : delta0) >> 12); + } + + static INLINE void UpdateBit0(short & probability, const int threshold, const int adaptationRate) + { + probability = probability + (((4096 - threshold - probability) * adaptationRate) >> 12); + }; + + static INLINE void UpdateBit1(short & probability, const int threshold, const int adaptationRate) + { + probability = probability - (((probability - threshold) * adaptationRate) >> 12); + }; + + template static INLINE void UpdateBit(unsigned int bit, short & probability, const int threshold0, const int threshold1) + { + probability = probability - ((probability - (bit ? threshold1 : threshold0)) >> R); + } + + template static INLINE void UpdateBit(short & probability, const int threshold) + { + probability = probability - ((probability - threshold) >> R); + }; +}; + +struct ProbabilityMixer +{ + +private: + + short stretchedProbability0; + short stretchedProbability1; + short stretchedProbability2; + int mixedProbability; + int index; + + short probabilityMap[17]; + + int weight0; + int weight1; + int weight2; + +public: + + INLINE void Init() + { + weight0 = weight1 = 2048 << 5; weight2 = 0; + for (int p = 0; p < 17; ++p) + { + probabilityMap[p] = bsc_squash((p - 8) * 256); + } + } + + INLINE int Mixup(const int probability0, const int probability1, const int probability2) + { + stretchedProbability0 = bsc_stretch(probability0); + stretchedProbability1 = bsc_stretch(probability1); + stretchedProbability2 = bsc_stretch(probability2); + + short stretchedProbability = (stretchedProbability0 * weight0 + stretchedProbability1 * weight1 + stretchedProbability2 * weight2) >> 17; + + if (stretchedProbability < -2047) stretchedProbability = -2047; + if (stretchedProbability > 2047) stretchedProbability = 2047; + + index = (stretchedProbability + 2048) >> 8; + const int weight = stretchedProbability & 255; + const int probability = bsc_squash(stretchedProbability); + const int mappedProbability = probabilityMap[index] + (((probabilityMap[index + 1] - probabilityMap[index]) * weight) >> 8); + + return mixedProbability = (3 * probability + mappedProbability) >> 2; + }; + + INLINE int MixupAndUpdateBit0(const int probability0, const int probability1, const int probability2, + const int learningRate0, const int learningRate1, const int learningRate2, + const int threshold, const int adaptationRate + ) + { + const short stretchedProbability0 = bsc_stretch(probability0); + const short stretchedProbability1 = bsc_stretch(probability1); + const short stretchedProbability2 = bsc_stretch(probability2); + + short stretchedProbability = (stretchedProbability0 * weight0 + stretchedProbability1 * weight1 + stretchedProbability2 * weight2) >> 17; + + if (stretchedProbability < -2047) stretchedProbability = -2047; + if (stretchedProbability > 2047) stretchedProbability = 2047; + + const int weight = stretchedProbability & 255; + const int index = (stretchedProbability + 2048) >> 8; + const int probability = bsc_squash(stretchedProbability); + const int mappedProbability = probabilityMap[index] + (((probabilityMap[index + 1] - probabilityMap[index]) * weight) >> 8); + const int mixedProbability = (3 * probability + mappedProbability) >> 2; + + ProbabilityCounter::UpdateBit0(probabilityMap[index], threshold, adaptationRate); + ProbabilityCounter::UpdateBit0(probabilityMap[index + 1], threshold, adaptationRate); + + const int eps = mixedProbability - 4095; + + weight0 -= (learningRate0 * eps * stretchedProbability0) >> 16; + weight1 -= (learningRate1 * eps * stretchedProbability1) >> 16; + weight2 -= (learningRate2 * eps * stretchedProbability2) >> 16; + + return mixedProbability; + }; + + INLINE int MixupAndUpdateBit1(const int probability0, const int probability1, const int probability2, + const int learningRate0, const int learningRate1, const int learningRate2, + const int threshold, const int adaptationRate + ) + { + const short stretchedProbability0 = bsc_stretch(probability0); + const short stretchedProbability1 = bsc_stretch(probability1); + const short stretchedProbability2 = bsc_stretch(probability2); + + short stretchedProbability = (stretchedProbability0 * weight0 + stretchedProbability1 * weight1 + stretchedProbability2 * weight2) >> 17; + + if (stretchedProbability < -2047) stretchedProbability = -2047; + if (stretchedProbability > 2047) stretchedProbability = 2047; + + const int weight = stretchedProbability & 255; + const int index = (stretchedProbability + 2048) >> 8; + const int probability = bsc_squash(stretchedProbability); + const int mappedProbability = probabilityMap[index] + (((probabilityMap[index + 1] - probabilityMap[index]) * weight) >> 8); + const int mixedProbability = (3 * probability + mappedProbability) >> 2; + + ProbabilityCounter::UpdateBit1(probabilityMap[index], threshold, adaptationRate); + ProbabilityCounter::UpdateBit1(probabilityMap[index + 1], threshold, adaptationRate); + + const int eps = mixedProbability - 1; + + weight0 -= (learningRate0 * eps * stretchedProbability0) >> 16; + weight1 -= (learningRate1 * eps * stretchedProbability1) >> 16; + weight2 -= (learningRate2 * eps * stretchedProbability2) >> 16; + + return mixedProbability; + }; + + INLINE void UpdateBit0(const int learningRate0, const int learningRate1, const int learningRate2, + const int threshold, const int adaptationRate + ) + { + ProbabilityCounter::UpdateBit0(probabilityMap[index], threshold, adaptationRate); + ProbabilityCounter::UpdateBit0(probabilityMap[index + 1], threshold, adaptationRate); + + const int eps = mixedProbability - 4095; + + weight0 -= (learningRate0 * eps * stretchedProbability0) >> 16; + weight1 -= (learningRate1 * eps * stretchedProbability1) >> 16; + weight2 -= (learningRate2 * eps * stretchedProbability2) >> 16; + }; + + INLINE void UpdateBit1(const int learningRate0, const int learningRate1, const int learningRate2, + const int threshold, const int adaptationRate + ) + { + ProbabilityCounter::UpdateBit1(probabilityMap[index], threshold, adaptationRate); + ProbabilityCounter::UpdateBit1(probabilityMap[index + 1], threshold, adaptationRate); + + const int eps = mixedProbability - 1; + + weight0 -= (learningRate0 * eps * stretchedProbability0) >> 16; + weight1 -= (learningRate1 * eps * stretchedProbability1) >> 16; + weight2 -= (learningRate2 * eps * stretchedProbability2) >> 16; + }; + +}; + + +#endif + +/*-----------------------------------------------------------*/ +/* End predictor.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/common/rangecoder.h b/libbsc/libbsc/coder/common/rangecoder.h new file mode 100644 index 00000000..42752f1e --- /dev/null +++ b/libbsc/libbsc/coder/common/rangecoder.h @@ -0,0 +1,261 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Range coder */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_CODER_RANGECODER_H +#define _LIBBSC_CODER_RANGECODER_H + +#include "../../platform/platform.h" + +class RangeCoder +{ + +private: + + union ari + { + struct u + { + unsigned int low32; + unsigned int carry; + } u; + unsigned long long low; + } ari; + + unsigned int ari_code; + unsigned int ari_ffnum; + unsigned int ari_cache; + unsigned int ari_range; + + const unsigned short * RESTRICT ari_input; + unsigned short * RESTRICT ari_output; + unsigned short * RESTRICT ari_outputEOB; + unsigned short * RESTRICT ari_outputStart; + + INLINE void OutputShort(unsigned short s) + { +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) + *ari_output++ = s; +#else + memcpy(ari_output++, &s, sizeof(unsigned short)); +#endif + }; + + INLINE unsigned short InputShort() + { +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) + return *ari_input++; +#else + unsigned short ret; + memcpy(&ret, ari_input++, sizeof(unsigned short)); + return ret; +#endif + }; + + NOINLINE unsigned int ShiftLow() + { + if (ari.u.low32 < 0xffff0000U || ari.u.carry) + { + OutputShort(ari_cache + ari.u.carry); + if (ari_ffnum) + { + unsigned short s = ari.u.carry - 1; + do { OutputShort(s); } while (--ari_ffnum); + } + ari_cache = ari.u.low32 >> 16; ari.u.carry = 0; + } else ari_ffnum++; + ari.u.low32 <<= 16; + + return ari_range << 16; + } + +public: + + INLINE bool CheckEOB() + { + return ari_output >= ari_outputEOB; + } + + INLINE void InitEncoder(unsigned char * output, int outputSize) + { + ari_outputStart = (unsigned short *)output; + ari_output = (unsigned short *)output; + ari_outputEOB = (unsigned short *)(output + outputSize - 16); + ari.low = 0; + ari_ffnum = 0; + ari_cache = 0; + ari_range = 0xffffffff; + }; + + INLINE int FinishEncoder() + { + if (ari_range < 0x10000) + { + ShiftLow(); + } + + ShiftLow(); ShiftLow(); ShiftLow(); + return (int)(ari_output - ari_outputStart) * sizeof(ari_output[0]); + } + + template INLINE void EncodeBit0(int probability) + { + if (ari_range < 0x10000) + { + ari_range = ShiftLow(); + } + + ari_range = (ari_range >> P) * probability; + } + + template INLINE void EncodeBit1(int probability) + { + if (ari_range < 0x10000) + { + ari_range = ShiftLow(); + } + + unsigned int range = (ari_range >> P) * probability; + ari.low += range; ari_range -= range; + } + + template INLINE void EncodeBit(unsigned int bit, int probability) + { + if (ari_range < 0x10000) + { + ari_range = ShiftLow(); + } + + unsigned int range = (ari_range >> P) * probability; + + ari.low = ari.low + ((~bit + 1u) & range); + ari_range = range + ((~bit + 1u) & (ari_range - range - range)); + } + + INLINE void EncodeBit(unsigned int bit) + { + if (bit) EncodeBit1(2048); else EncodeBit0(2048); + }; + + INLINE void EncodeByte(unsigned int byte) + { + for (int bit = 7; bit >= 0; --bit) + { + EncodeBit(byte & (1 << bit)); + } + }; + + INLINE void EncodeWord(unsigned int word) + { + for (int bit = 31; bit >= 0; --bit) + { + EncodeBit(word & (1 << bit)); + } + }; + + INLINE void InitDecoder(const unsigned char * input) + { + ari_input = (unsigned short *)input; + ari_code = 0; + ari_range = 0xffffffff; + ari_code = (ari_code << 16) | InputShort(); + ari_code = (ari_code << 16) | InputShort(); + ari_code = (ari_code << 16) | InputShort(); + }; + + template INLINE int PeakBit(int probability) + { + if (ari_range < 0x10000) + { + ari_range <<= 16; ari_code = (ari_code << 16) | InputShort(); + } + + return ari_code >= (ari_range >> P) * probability; + } + + template INLINE int DecodeBit(int probability) + { + if (ari_range < 0x10000) + { + ari_range <<= 16; ari_code = (ari_code << 16) | InputShort(); + } + + unsigned int range = (ari_range >> P) * probability; + int bit = ari_code >= range; + + ari_range = bit ? ari_range - range : range; + ari_code = bit ? ari_code - range : ari_code; + + return bit; + } + + template INLINE void DecodeBit0(int probability) + { + ari_range = (ari_range >> P) * probability; + } + + template INLINE void DecodeBit1(int probability) + { + unsigned int range = (ari_range >> P) * probability; + ari_code -= range; ari_range -= range; + } + + INLINE unsigned int DecodeBit() + { + return DecodeBit(2048); + } + + INLINE unsigned int DecodeByte() + { + unsigned int byte = 0; + for (int bit = 7; bit >= 0; --bit) + { + byte += byte + DecodeBit(); + } + return byte; + } + + INLINE unsigned int DecodeWord() + { + unsigned int word = 0; + for (int bit = 31; bit >= 0; --bit) + { + word += word + DecodeBit(); + } + return word; + } +}; + +#endif + +/*-----------------------------------------------------------*/ +/* End rangecoder.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/common/tables.h b/libbsc/libbsc/coder/common/tables.h new file mode 100644 index 00000000..a11bc9c3 --- /dev/null +++ b/libbsc/libbsc/coder/common/tables.h @@ -0,0 +1,1872 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Static tables of constant values */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_CODER_TABLES_H +#define _LIBBSC_CODER_TABLES_H + +#include "../../platform/platform.h" + +static const short bsc_stretch_table[4097] = +{ + -2047,-2047,-1952,-1848,-1774,-1717,-1670,-1631,-1597,-1566,-1539,-1515,-1492,-1472,-1453,-1435, + -1419,-1403,-1388,-1374,-1361,-1349,-1337,-1325,-1314,-1304,-1294,-1284,-1275,-1266,-1257,-1248, + -1240,-1232,-1224,-1217,-1210,-1203,-1196,-1189,-1182,-1176,-1170,-1164,-1158,-1152,-1146,-1141, + -1135,-1130,-1125,-1120,-1115,-1110,-1105,-1100,-1095,-1091,-1086,-1082,-1077,-1073,-1069,-1065, + -1061,-1057,-1053,-1049,-1045,-1041,-1037,-1034,-1030,-1026,-1023,-1019,-1016,-1012,-1009,-1006, + -1002, -999, -996, -993, -990, -987, -984, -981, -978, -975, -972, -969, -966, -963, -960, -958, + -955, -952, -949, -947, -944, -941, -939, -936, -934, -931, -929, -926, -924, -921, -919, -917, + -914, -912, -910, -907, -905, -903, -901, -898, -896, -894, -892, -890, -887, -885, -883, -881, + -879, -877, -875, -873, -871, -869, -867, -865, -863, -861, -859, -857, -855, -853, -852, -850, + -848, -846, -844, -842, -841, -839, -837, -835, -834, -832, -830, -828, -827, -825, -823, -822, + -820, -818, -817, -815, -813, -812, -810, -808, -807, -805, -804, -802, -801, -799, -798, -796, + -794, -793, -791, -790, -788, -787, -785, -784, -783, -781, -780, -778, -777, -775, -774, -773, + -771, -770, -768, -767, -766, -764, -763, -762, -760, -759, -757, -756, -755, -754, -752, -751, + -750, -748, -747, -746, -744, -743, -742, -741, -739, -738, -737, -736, -734, -733, -732, -731, + -730, -728, -727, -726, -725, -724, -722, -721, -720, -719, -718, -717, -715, -714, -713, -712, + -711, -710, -709, -707, -706, -705, -704, -703, -702, -701, -700, -699, -698, -696, -695, -694, + -693, -692, -691, -690, -689, -688, -687, -686, -685, -684, -683, -682, -681, -680, -679, -678, + -677, -676, -675, -674, -673, -672, -671, -670, -669, -668, -667, -666, -665, -664, -663, -662, + -661, -660, -659, -658, -657, -656, -655, -654, -653, -652, -652, -651, -650, -649, -648, -647, + -646, -645, -644, -643, -642, -642, -641, -640, -639, -638, -637, -636, -635, -634, -634, -633, + -632, -631, -630, -629, -628, -628, -627, -626, -625, -624, -623, -622, -622, -621, -620, -619, + -618, -617, -617, -616, -615, -614, -613, -613, -612, -611, -610, -609, -608, -608, -607, -606, + -605, -604, -604, -603, -602, -601, -601, -600, -599, -598, -597, -597, -596, -595, -594, -594, + -593, -592, -591, -590, -590, -589, -588, -587, -587, -586, -585, -584, -584, -583, -582, -582, + -581, -580, -579, -579, -578, -577, -576, -576, -575, -574, -574, -573, -572, -571, -571, -570, + -569, -569, -568, -567, -566, -566, -565, -564, -564, -563, -562, -562, -561, -560, -559, -559, + -558, -557, -557, -556, -555, -555, -554, -553, -553, -552, -551, -551, -550, -549, -549, -548, + -547, -547, -546, -545, -545, -544, -543, -543, -542, -541, -541, -540, -539, -539, -538, -538, + -537, -536, -536, -535, -534, -534, -533, -532, -532, -531, -531, -530, -529, -529, -528, -527, + -527, -526, -526, -525, -524, -524, -523, -522, -522, -521, -521, -520, -519, -519, -518, -518, + -517, -516, -516, -515, -515, -514, -513, -513, -512, -512, -511, -510, -510, -509, -509, -508, + -507, -507, -506, -506, -505, -504, -504, -503, -503, -502, -502, -501, -500, -500, -499, -499, + -498, -498, -497, -496, -496, -495, -495, -494, -494, -493, -492, -492, -491, -491, -490, -490, + -489, -489, -488, -487, -487, -486, -486, -485, -485, -484, -484, -483, -483, -482, -481, -481, + -480, -480, -479, -479, -478, -478, -477, -477, -476, -475, -475, -474, -474, -473, -473, -472, + -472, -471, -471, -470, -470, -469, -469, -468, -468, -467, -467, -466, -465, -465, -464, -464, + -463, -463, -462, -462, -461, -461, -460, -460, -459, -459, -458, -458, -457, -457, -456, -456, + -455, -455, -454, -454, -453, -453, -452, -452, -451, -451, -450, -450, -449, -449, -448, -448, + -447, -447, -446, -446, -445, -445, -444, -444, -443, -443, -442, -442, -441, -441, -440, -440, + -439, -439, -438, -438, -437, -437, -436, -436, -436, -435, -435, -434, -434, -433, -433, -432, + -432, -431, -431, -430, -430, -429, -429, -428, -428, -427, -427, -427, -426, -426, -425, -425, + -424, -424, -423, -423, -422, -422, -421, -421, -421, -420, -420, -419, -419, -418, -418, -417, + -417, -416, -416, -415, -415, -415, -414, -414, -413, -413, -412, -412, -411, -411, -411, -410, + -410, -409, -409, -408, -408, -407, -407, -407, -406, -406, -405, -405, -404, -404, -403, -403, + -403, -402, -402, -401, -401, -400, -400, -399, -399, -399, -398, -398, -397, -397, -396, -396, + -396, -395, -395, -394, -394, -393, -393, -393, -392, -392, -391, -391, -390, -390, -390, -389, + -389, -388, -388, -387, -387, -387, -386, -386, -385, -385, -385, -384, -384, -383, -383, -382, + -382, -382, -381, -381, -380, -380, -380, -379, -379, -378, -378, -377, -377, -377, -376, -376, + -375, -375, -375, -374, -374, -373, -373, -373, -372, -372, -371, -371, -370, -370, -370, -369, + -369, -368, -368, -368, -367, -367, -366, -366, -366, -365, -365, -364, -364, -364, -363, -363, + -362, -362, -362, -361, -361, -360, -360, -360, -359, -359, -359, -358, -358, -357, -357, -357, + -356, -356, -355, -355, -355, -354, -354, -353, -353, -353, -352, -352, -351, -351, -351, -350, + -350, -350, -349, -349, -348, -348, -348, -347, -347, -346, -346, -346, -345, -345, -345, -344, + -344, -343, -343, -343, -342, -342, -342, -341, -341, -340, -340, -340, -339, -339, -338, -338, + -338, -337, -337, -337, -336, -336, -335, -335, -335, -334, -334, -334, -333, -333, -333, -332, + -332, -331, -331, -331, -330, -330, -330, -329, -329, -328, -328, -328, -327, -327, -327, -326, + -326, -326, -325, -325, -324, -324, -324, -323, -323, -323, -322, -322, -322, -321, -321, -320, + -320, -320, -319, -319, -319, -318, -318, -318, -317, -317, -316, -316, -316, -315, -315, -315, + -314, -314, -314, -313, -313, -313, -312, -312, -311, -311, -311, -310, -310, -310, -309, -309, + -309, -308, -308, -308, -307, -307, -307, -306, -306, -305, -305, -305, -304, -304, -304, -303, + -303, -303, -302, -302, -302, -301, -301, -301, -300, -300, -300, -299, -299, -299, -298, -298, + -298, -297, -297, -296, -296, -296, -295, -295, -295, -294, -294, -294, -293, -293, -293, -292, + -292, -292, -291, -291, -291, -290, -290, -290, -289, -289, -289, -288, -288, -288, -287, -287, + -287, -286, -286, -286, -285, -285, -285, -284, -284, -284, -283, -283, -283, -282, -282, -282, + -281, -281, -281, -280, -280, -280, -279, -279, -279, -278, -278, -278, -277, -277, -277, -276, + -276, -276, -275, -275, -275, -274, -274, -274, -273, -273, -273, -272, -272, -272, -271, -271, + -271, -270, -270, -270, -269, -269, -269, -268, -268, -268, -267, -267, -267, -266, -266, -266, + -265, -265, -265, -265, -264, -264, -264, -263, -263, -263, -262, -262, -262, -261, -261, -261, + -260, -260, -260, -259, -259, -259, -258, -258, -258, -257, -257, -257, -257, -256, -256, -256, + -255, -255, -255, -254, -254, -254, -253, -253, -253, -252, -252, -252, -251, -251, -251, -250, + -250, -250, -250, -249, -249, -249, -248, -248, -248, -247, -247, -247, -246, -246, -246, -245, + -245, -245, -245, -244, -244, -244, -243, -243, -243, -242, -242, -242, -241, -241, -241, -241, + -240, -240, -240, -239, -239, -239, -238, -238, -238, -237, -237, -237, -236, -236, -236, -236, + -235, -235, -235, -234, -234, -234, -233, -233, -233, -233, -232, -232, -232, -231, -231, -231, + -230, -230, -230, -229, -229, -229, -229, -228, -228, -228, -227, -227, -227, -226, -226, -226, + -226, -225, -225, -225, -224, -224, -224, -223, -223, -223, -223, -222, -222, -222, -221, -221, + -221, -220, -220, -220, -220, -219, -219, -219, -218, -218, -218, -217, -217, -217, -217, -216, + -216, -216, -215, -215, -215, -214, -214, -214, -214, -213, -213, -213, -212, -212, -212, -212, + -211, -211, -211, -210, -210, -210, -209, -209, -209, -209, -208, -208, -208, -207, -207, -207, + -207, -206, -206, -206, -205, -205, -205, -204, -204, -204, -204, -203, -203, -203, -202, -202, + -202, -202, -201, -201, -201, -200, -200, -200, -200, -199, -199, -199, -198, -198, -198, -197, + -197, -197, -197, -196, -196, -196, -195, -195, -195, -195, -194, -194, -194, -193, -193, -193, + -193, -192, -192, -192, -191, -191, -191, -191, -190, -190, -190, -189, -189, -189, -189, -188, + -188, -188, -187, -187, -187, -187, -186, -186, -186, -185, -185, -185, -185, -184, -184, -184, + -183, -183, -183, -183, -182, -182, -182, -181, -181, -181, -181, -180, -180, -180, -180, -179, + -179, -179, -178, -178, -178, -178, -177, -177, -177, -176, -176, -176, -176, -175, -175, -175, + -174, -174, -174, -174, -173, -173, -173, -172, -172, -172, -172, -171, -171, -171, -171, -170, + -170, -170, -169, -169, -169, -169, -168, -168, -168, -167, -167, -167, -167, -166, -166, -166, + -166, -165, -165, -165, -164, -164, -164, -164, -163, -163, -163, -162, -162, -162, -162, -161, + -161, -161, -161, -160, -160, -160, -159, -159, -159, -159, -158, -158, -158, -158, -157, -157, + -157, -156, -156, -156, -156, -155, -155, -155, -155, -154, -154, -154, -153, -153, -153, -153, + -152, -152, -152, -152, -151, -151, -151, -150, -150, -150, -150, -149, -149, -149, -149, -148, + -148, -148, -147, -147, -147, -147, -146, -146, -146, -146, -145, -145, -145, -144, -144, -144, + -144, -143, -143, -143, -143, -142, -142, -142, -141, -141, -141, -141, -140, -140, -140, -140, + -139, -139, -139, -139, -138, -138, -138, -137, -137, -137, -137, -136, -136, -136, -136, -135, + -135, -135, -135, -134, -134, -134, -133, -133, -133, -133, -132, -132, -132, -132, -131, -131, + -131, -131, -130, -130, -130, -129, -129, -129, -129, -128, -128, -128, -128, -127, -127, -127, + -127, -126, -126, -126, -125, -125, -125, -125, -124, -124, -124, -124, -123, -123, -123, -123, + -122, -122, -122, -121, -121, -121, -121, -120, -120, -120, -120, -119, -119, -119, -119, -118, + -118, -118, -118, -117, -117, -117, -116, -116, -116, -116, -115, -115, -115, -115, -114, -114, + -114, -114, -113, -113, -113, -113, -112, -112, -112, -111, -111, -111, -111, -110, -110, -110, + -110, -109, -109, -109, -109, -108, -108, -108, -108, -107, -107, -107, -107, -106, -106, -106, + -105, -105, -105, -105, -104, -104, -104, -104, -103, -103, -103, -103, -102, -102, -102, -102, + -101, -101, -101, -101, -100, -100, -100, -99, -99, -99, -99, -98, -98, -98, -98, -97, + -97, -97, -97, -96, -96, -96, -96, -95, -95, -95, -95, -94, -94, -94, -94, -93, + -93, -93, -92, -92, -92, -92, -91, -91, -91, -91, -90, -90, -90, -90, -89, -89, + -89, -89, -88, -88, -88, -88, -87, -87, -87, -87, -86, -86, -86, -86, -85, -85, + -85, -85, -84, -84, -84, -83, -83, -83, -83, -82, -82, -82, -82, -81, -81, -81, + -81, -80, -80, -80, -80, -79, -79, -79, -79, -78, -78, -78, -78, -77, -77, -77, + -77, -76, -76, -76, -76, -75, -75, -75, -75, -74, -74, -74, -74, -73, -73, -73, + -72, -72, -72, -72, -71, -71, -71, -71, -70, -70, -70, -70, -69, -69, -69, -69, + -68, -68, -68, -68, -67, -67, -67, -67, -66, -66, -66, -66, -65, -65, -65, -65, + -64, -64, -64, -64, -63, -63, -63, -63, -62, -62, -62, -62, -61, -61, -61, -61, + -60, -60, -60, -60, -59, -59, -59, -59, -58, -58, -58, -57, -57, -57, -57, -56, + -56, -56, -56, -55, -55, -55, -55, -54, -54, -54, -54, -53, -53, -53, -53, -52, + -52, -52, -52, -51, -51, -51, -51, -50, -50, -50, -50, -49, -49, -49, -49, -48, + -48, -48, -48, -47, -47, -47, -47, -46, -46, -46, -46, -45, -45, -45, -45, -44, + -44, -44, -44, -43, -43, -43, -43, -42, -42, -42, -42, -41, -41, -41, -41, -40, + -40, -40, -40, -39, -39, -39, -39, -38, -38, -38, -38, -37, -37, -37, -37, -36, + -36, -36, -36, -35, -35, -35, -35, -34, -34, -34, -34, -33, -33, -33, -33, -32, + -32, -32, -32, -31, -31, -31, -31, -30, -30, -30, -30, -29, -29, -29, -29, -28, + -28, -28, -28, -27, -27, -27, -27, -26, -26, -26, -26, -25, -25, -25, -25, -24, + -24, -24, -24, -23, -23, -23, -23, -22, -22, -22, -22, -21, -21, -21, -21, -20, + -20, -20, -20, -19, -19, -19, -19, -18, -18, -18, -18, -17, -17, -17, -17, -16, + -16, -16, -16, -15, -15, -15, -15, -14, -14, -14, -14, -13, -13, -13, -13, -12, + -12, -12, -12, -11, -11, -11, -11, -10, -10, -10, -10, -9, -9, -9, -9, -8, + -8, -8, -8, -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, + -4, -4, -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, + 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, + 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, + 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, + 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, + 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, + 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, + 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 32, 32, + 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, + 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 40, 40, + 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, + 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 48, 48, + 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, 52, 52, + 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 56, 56, + 56, 56, 57, 57, 57, 57, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, + 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, + 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, + 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, + 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, + 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, + 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 84, 85, + 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, 88, 88, 88, 88, 89, + 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, 92, 92, 92, 92, 93, + 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, 96, 96, 96, 96, 97, 97, + 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, 100, 100, 100, 101, 101, 101, + 101, 102, 102, 102, 102, 103, 103, 103, 103, 104, 104, 104, 104, 105, 105, 105, + 105, 106, 106, 106, 107, 107, 107, 107, 108, 108, 108, 108, 109, 109, 109, 109, + 110, 110, 110, 110, 111, 111, 111, 111, 112, 112, 112, 113, 113, 113, 113, 114, + 114, 114, 114, 115, 115, 115, 115, 116, 116, 116, 116, 117, 117, 117, 118, 118, + 118, 118, 119, 119, 119, 119, 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, + 122, 123, 123, 123, 123, 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, + 127, 127, 127, 127, 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 131, + 131, 131, 131, 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 135, 135, + 135, 135, 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 139, 139, 139, + 139, 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 143, 143, 143, 143, + 144, 144, 144, 144, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, 148, + 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 152, 152, 152, + 152, 153, 153, 153, 153, 154, 154, 154, 155, 155, 155, 155, 156, 156, 156, 156, + 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, 160, 160, 160, 161, 161, + 161, 161, 162, 162, 162, 162, 163, 163, 163, 164, 164, 164, 164, 165, 165, 165, + 166, 166, 166, 166, 167, 167, 167, 167, 168, 168, 168, 169, 169, 169, 169, 170, + 170, 170, 171, 171, 171, 171, 172, 172, 172, 172, 173, 173, 173, 174, 174, 174, + 174, 175, 175, 175, 176, 176, 176, 176, 177, 177, 177, 178, 178, 178, 178, 179, + 179, 179, 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 183, 183, 183, + 183, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 187, 187, 187, 187, 188, + 188, 188, 189, 189, 189, 189, 190, 190, 190, 191, 191, 191, 191, 192, 192, 192, + 193, 193, 193, 193, 194, 194, 194, 195, 195, 195, 195, 196, 196, 196, 197, 197, + 197, 197, 198, 198, 198, 199, 199, 199, 200, 200, 200, 200, 201, 201, 201, 202, + 202, 202, 202, 203, 203, 203, 204, 204, 204, 204, 205, 205, 205, 206, 206, 206, + 207, 207, 207, 207, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 211, 211, + 211, 212, 212, 212, 212, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 216, + 216, 216, 217, 217, 217, 217, 218, 218, 218, 219, 219, 219, 220, 220, 220, 220, + 221, 221, 221, 222, 222, 222, 223, 223, 223, 223, 224, 224, 224, 225, 225, 225, + 226, 226, 226, 226, 227, 227, 227, 228, 228, 228, 229, 229, 229, 229, 230, 230, + 230, 231, 231, 231, 232, 232, 232, 233, 233, 233, 233, 234, 234, 234, 235, 235, + 235, 236, 236, 236, 236, 237, 237, 237, 238, 238, 238, 239, 239, 239, 240, 240, + 240, 241, 241, 241, 241, 242, 242, 242, 243, 243, 243, 244, 244, 244, 245, 245, + 245, 245, 246, 246, 246, 247, 247, 247, 248, 248, 248, 249, 249, 249, 250, 250, + 250, 250, 251, 251, 251, 252, 252, 252, 253, 253, 253, 254, 254, 254, 255, 255, + 255, 256, 256, 256, 257, 257, 257, 257, 258, 258, 258, 259, 259, 259, 260, 260, + 260, 261, 261, 261, 262, 262, 262, 263, 263, 263, 264, 264, 264, 265, 265, 265, + 265, 266, 266, 266, 267, 267, 267, 268, 268, 268, 269, 269, 269, 270, 270, 270, + 271, 271, 271, 272, 272, 272, 273, 273, 273, 274, 274, 274, 275, 275, 275, 276, + 276, 276, 277, 277, 277, 278, 278, 278, 279, 279, 279, 280, 280, 280, 281, 281, + 281, 282, 282, 282, 283, 283, 283, 284, 284, 284, 285, 285, 285, 286, 286, 286, + 287, 287, 287, 288, 288, 288, 289, 289, 289, 290, 290, 290, 291, 291, 291, 292, + 292, 292, 293, 293, 293, 294, 294, 294, 295, 295, 295, 296, 296, 296, 297, 297, + 298, 298, 298, 299, 299, 299, 300, 300, 300, 301, 301, 301, 302, 302, 302, 303, + 303, 303, 304, 304, 304, 305, 305, 305, 306, 306, 307, 307, 307, 308, 308, 308, + 309, 309, 309, 310, 310, 310, 311, 311, 311, 312, 312, 313, 313, 313, 314, 314, + 314, 315, 315, 315, 316, 316, 316, 317, 317, 318, 318, 318, 319, 319, 319, 320, + 320, 320, 321, 321, 322, 322, 322, 323, 323, 323, 324, 324, 324, 325, 325, 326, + 326, 326, 327, 327, 327, 328, 328, 328, 329, 329, 330, 330, 330, 331, 331, 331, + 332, 332, 333, 333, 333, 334, 334, 334, 335, 335, 335, 336, 336, 337, 337, 337, + 338, 338, 338, 339, 339, 340, 340, 340, 341, 341, 342, 342, 342, 343, 343, 343, + 344, 344, 345, 345, 345, 346, 346, 346, 347, 347, 348, 348, 348, 349, 349, 350, + 350, 350, 351, 351, 351, 352, 352, 353, 353, 353, 354, 354, 355, 355, 355, 356, + 356, 357, 357, 357, 358, 358, 359, 359, 359, 360, 360, 360, 361, 361, 362, 362, + 362, 363, 363, 364, 364, 364, 365, 365, 366, 366, 366, 367, 367, 368, 368, 368, + 369, 369, 370, 370, 370, 371, 371, 372, 372, 373, 373, 373, 374, 374, 375, 375, + 375, 376, 376, 377, 377, 377, 378, 378, 379, 379, 380, 380, 380, 381, 381, 382, + 382, 382, 383, 383, 384, 384, 385, 385, 385, 386, 386, 387, 387, 387, 388, 388, + 389, 389, 390, 390, 390, 391, 391, 392, 392, 393, 393, 393, 394, 394, 395, 395, + 396, 396, 396, 397, 397, 398, 398, 399, 399, 399, 400, 400, 401, 401, 402, 402, + 403, 403, 403, 404, 404, 405, 405, 406, 406, 407, 407, 407, 408, 408, 409, 409, + 410, 410, 411, 411, 411, 412, 412, 413, 413, 414, 414, 415, 415, 415, 416, 416, + 417, 417, 418, 418, 419, 419, 420, 420, 421, 421, 421, 422, 422, 423, 423, 424, + 424, 425, 425, 426, 426, 427, 427, 427, 428, 428, 429, 429, 430, 430, 431, 431, + 432, 432, 433, 433, 434, 434, 435, 435, 436, 436, 436, 437, 437, 438, 438, 439, + 439, 440, 440, 441, 441, 442, 442, 443, 443, 444, 444, 445, 445, 446, 446, 447, + 447, 448, 448, 449, 449, 450, 450, 451, 451, 452, 452, 453, 453, 454, 454, 455, + 455, 456, 456, 457, 457, 458, 458, 459, 459, 460, 460, 461, 461, 462, 462, 463, + 463, 464, 464, 465, 465, 466, 467, 467, 468, 468, 469, 469, 470, 470, 471, 471, + 472, 472, 473, 473, 474, 474, 475, 475, 476, 477, 477, 478, 478, 479, 479, 480, + 480, 481, 481, 482, 483, 483, 484, 484, 485, 485, 486, 486, 487, 487, 488, 489, + 489, 490, 490, 491, 491, 492, 492, 493, 494, 494, 495, 495, 496, 496, 497, 498, + 498, 499, 499, 500, 500, 501, 502, 502, 503, 503, 504, 504, 505, 506, 506, 507, + 507, 508, 509, 509, 510, 510, 511, 512, 512, 513, 513, 514, 515, 515, 516, 516, + 517, 518, 518, 519, 519, 520, 521, 521, 522, 522, 523, 524, 524, 525, 526, 526, + 527, 527, 528, 529, 529, 530, 531, 531, 532, 532, 533, 534, 534, 535, 536, 536, + 537, 538, 538, 539, 539, 540, 541, 541, 542, 543, 543, 544, 545, 545, 546, 547, + 547, 548, 549, 549, 550, 551, 551, 552, 553, 553, 554, 555, 555, 556, 557, 557, + 558, 559, 559, 560, 561, 562, 562, 563, 564, 564, 565, 566, 566, 567, 568, 569, + 569, 570, 571, 571, 572, 573, 574, 574, 575, 576, 576, 577, 578, 579, 579, 580, + 581, 582, 582, 583, 584, 584, 585, 586, 587, 587, 588, 589, 590, 590, 591, 592, + 593, 594, 594, 595, 596, 597, 597, 598, 599, 600, 601, 601, 602, 603, 604, 604, + 605, 606, 607, 608, 608, 609, 610, 611, 612, 613, 613, 614, 615, 616, 617, 617, + 618, 619, 620, 621, 622, 622, 623, 624, 625, 626, 627, 628, 628, 629, 630, 631, + 632, 633, 634, 634, 635, 636, 637, 638, 639, 640, 641, 642, 642, 643, 644, 645, + 646, 647, 648, 649, 650, 651, 652, 652, 653, 654, 655, 656, 657, 658, 659, 660, + 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, + 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, + 693, 694, 695, 696, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 709, 710, + 711, 712, 713, 714, 715, 717, 718, 719, 720, 721, 722, 724, 725, 726, 727, 728, + 730, 731, 732, 733, 734, 736, 737, 738, 739, 741, 742, 743, 744, 746, 747, 748, + 750, 751, 752, 754, 755, 756, 757, 759, 760, 762, 763, 764, 766, 767, 768, 770, + 771, 773, 774, 775, 777, 778, 780, 781, 783, 784, 785, 787, 788, 790, 791, 793, + 794, 796, 798, 799, 801, 802, 804, 805, 807, 808, 810, 812, 813, 815, 817, 818, + 820, 822, 823, 825, 827, 828, 830, 832, 834, 835, 837, 839, 841, 842, 844, 846, + 848, 850, 852, 853, 855, 857, 859, 861, 863, 865, 867, 869, 871, 873, 875, 877, + 879, 881, 883, 885, 887, 890, 892, 894, 896, 898, 901, 903, 905, 907, 910, 912, + 914, 917, 919, 921, 924, 926, 929, 931, 934, 936, 939, 941, 944, 947, 949, 952, + 955, 958, 960, 963, 966, 969, 972, 975, 978, 981, 984, 987, 990, 993, 996, 999, + 1002, 1006, 1009, 1012, 1016, 1019, 1023, 1026, 1030, 1034, 1037, 1041, 1045, 1049, 1053, 1057, + 1061, 1065, 1069, 1073, 1077, 1082, 1086, 1091, 1095, 1100, 1105, 1110, 1115, 1120, 1125, 1130, + 1135, 1141, 1146, 1152, 1158, 1164, 1170, 1176, 1182, 1189, 1196, 1203, 1210, 1217, 1224, 1232, + 1240, 1248, 1257, 1266, 1275, 1284, 1294, 1304, 1314, 1325, 1337, 1349, 1361, 1374, 1388, 1403, + 1419, 1435, 1453, 1472, 1492, 1515, 1539, 1566, 1597, 1631, 1670, 1717, 1774, 1848, 1952, 2047, + 2047 +}; + +static const short bsc_squash_table[4097] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, + 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, + 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, + 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, + 39, 39, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 42, 42, + 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, + 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, + 47, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, + 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 54, + 54, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 56, 57, 57, 57, + 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, + 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 65, + 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, + 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73, + 73, 74, 74, 74, 74, 75, 75, 75, 76, 76, 76, 76, 77, 77, 77, 78, + 78, 78, 79, 79, 79, 79, 80, 80, 80, 81, 81, 81, 82, 82, 82, 83, + 83, 83, 84, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, + 88, 89, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 93, 93, 93, + 94, 94, 95, 95, 95, 96, 96, 96, 97, 97, 97, 98, 98, 99, 99, 99, + 100, 100, 100, 101, 101, 102, 102, 102, 103, 103, 104, 104, 104, 105, 105, 106, + 106, 106, 107, 107, 108, 108, 108, 109, 109, 110, 110, 111, 111, 111, 112, 112, + 113, 113, 114, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 118, 119, 119, + 120, 120, 121, 121, 122, 122, 123, 123, 123, 124, 124, 125, 125, 126, 126, 127, + 127, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132, 133, 133, 134, 134, 135, + 135, 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, 141, 142, 142, 143, 143, + 144, 144, 145, 145, 146, 146, 147, 148, 148, 149, 149, 150, 150, 151, 152, 152, + 153, 153, 154, 154, 155, 156, 156, 157, 157, 158, 159, 159, 160, 160, 161, 162, + 162, 163, 163, 164, 165, 165, 166, 166, 167, 168, 168, 169, 170, 170, 171, 172, + 172, 173, 174, 174, 175, 175, 176, 177, 177, 178, 179, 179, 180, 181, 181, 182, + 183, 184, 184, 185, 186, 186, 187, 188, 188, 189, 190, 191, 191, 192, 193, 193, + 194, 195, 196, 196, 197, 198, 199, 199, 200, 201, 201, 202, 203, 204, 205, 205, + 206, 207, 208, 208, 209, 210, 211, 211, 212, 213, 214, 215, 215, 216, 217, 218, + 219, 219, 220, 221, 222, 223, 224, 224, 225, 226, 227, 228, 229, 229, 230, 231, + 232, 233, 234, 235, 235, 236, 237, 238, 239, 240, 241, 242, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 252, 253, 254, 255, 256, 257, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, + 293, 294, 295, 296, 297, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 310, + 311, 312, 313, 314, 315, 316, 317, 319, 320, 321, 322, 323, 324, 326, 327, 328, + 329, 330, 331, 333, 334, 335, 336, 337, 339, 340, 341, 342, 344, 345, 346, 347, + 348, 350, 351, 352, 353, 355, 356, 357, 359, 360, 361, 362, 364, 365, 366, 368, + 369, 370, 371, 373, 374, 375, 377, 378, 379, 381, 382, 384, 385, 386, 388, 389, + 390, 392, 393, 395, 396, 397, 399, 400, 402, 403, 404, 406, 407, 409, 410, 412, + 413, 414, 416, 417, 419, 420, 422, 423, 425, 426, 428, 429, 431, 432, 434, 435, + 437, 438, 440, 441, 443, 444, 446, 448, 449, 451, 452, 454, 455, 457, 459, 460, + 462, 463, 465, 467, 468, 470, 471, 473, 475, 476, 478, 480, 481, 483, 485, 486, + 488, 490, 491, 493, 495, 496, 498, 500, 501, 503, 505, 507, 508, 510, 512, 514, + 515, 517, 519, 521, 522, 524, 526, 528, 530, 531, 533, 535, 537, 539, 541, 542, + 544, 546, 548, 550, 552, 553, 555, 557, 559, 561, 563, 565, 567, 569, 571, 572, + 574, 576, 578, 580, 582, 584, 586, 588, 590, 592, 594, 596, 598, 600, 602, 604, + 606, 608, 610, 612, 614, 616, 618, 620, 622, 624, 626, 628, 631, 633, 635, 637, + 639, 641, 643, 645, 647, 650, 652, 654, 656, 658, 660, 663, 665, 667, 669, 671, + 673, 676, 678, 680, 682, 685, 687, 689, 691, 694, 696, 698, 700, 703, 705, 707, + 709, 712, 714, 716, 719, 721, 723, 726, 728, 730, 733, 735, 737, 740, 742, 745, + 747, 749, 752, 754, 757, 759, 761, 764, 766, 769, 771, 774, 776, 778, 781, 783, + 786, 788, 791, 793, 796, 798, 801, 803, 806, 809, 811, 814, 816, 819, 821, 824, + 826, 829, 832, 834, 837, 839, 842, 845, 847, 850, 853, 855, 858, 860, 863, 866, + 868, 871, 874, 877, 879, 882, 885, 887, 890, 893, 896, 898, 901, 904, 907, 909, + 912, 915, 918, 920, 923, 926, 929, 932, 934, 937, 940, 943, 946, 949, 951, 954, + 957, 960, 963, 966, 969, 972, 975, 977, 980, 983, 986, 989, 992, 995, 998, 1001, + 1004, 1007, 1010, 1013, 1016, 1019, 1022, 1025, 1028, 1031, 1034, 1037, 1040, 1043, 1046, 1049, + 1052, 1055, 1058, 1061, 1064, 1067, 1070, 1073, 1077, 1080, 1083, 1086, 1089, 1092, 1095, 1098, + 1102, 1105, 1108, 1111, 1114, 1117, 1120, 1124, 1127, 1130, 1133, 1136, 1140, 1143, 1146, 1149, + 1153, 1156, 1159, 1162, 1165, 1169, 1172, 1175, 1179, 1182, 1185, 1188, 1192, 1195, 1198, 1202, + 1205, 1208, 1212, 1215, 1218, 1222, 1225, 1228, 1232, 1235, 1238, 1242, 1245, 1249, 1252, 1255, + 1259, 1262, 1266, 1269, 1272, 1276, 1279, 1283, 1286, 1290, 1293, 1296, 1300, 1303, 1307, 1310, + 1314, 1317, 1321, 1324, 1328, 1331, 1335, 1338, 1342, 1345, 1349, 1352, 1356, 1360, 1363, 1367, + 1370, 1374, 1377, 1381, 1384, 1388, 1392, 1395, 1399, 1402, 1406, 1410, 1413, 1417, 1420, 1424, + 1428, 1431, 1435, 1439, 1442, 1446, 1450, 1453, 1457, 1461, 1464, 1468, 1472, 1475, 1479, 1483, + 1486, 1490, 1494, 1497, 1501, 1505, 1509, 1512, 1516, 1520, 1524, 1527, 1531, 1535, 1539, 1542, + 1546, 1550, 1554, 1557, 1561, 1565, 1569, 1572, 1576, 1580, 1584, 1588, 1591, 1595, 1599, 1603, + 1607, 1610, 1614, 1618, 1622, 1626, 1630, 1633, 1637, 1641, 1645, 1649, 1653, 1656, 1660, 1664, + 1668, 1672, 1676, 1680, 1683, 1687, 1691, 1695, 1699, 1703, 1707, 1711, 1715, 1718, 1722, 1726, + 1730, 1734, 1738, 1742, 1746, 1750, 1754, 1758, 1761, 1765, 1769, 1773, 1777, 1781, 1785, 1789, + 1793, 1797, 1801, 1805, 1809, 1813, 1817, 1820, 1824, 1828, 1832, 1836, 1840, 1844, 1848, 1852, + 1856, 1860, 1864, 1868, 1872, 1876, 1880, 1884, 1888, 1892, 1896, 1900, 1904, 1908, 1912, 1916, + 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, + 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032, 2036, 2040, 2044, + 2048, 2052, 2056, 2059, 2063, 2067, 2071, 2075, 2079, 2083, 2087, 2091, 2095, 2099, 2103, 2107, + 2111, 2115, 2119, 2123, 2127, 2131, 2135, 2139, 2143, 2147, 2151, 2155, 2159, 2163, 2167, 2171, + 2175, 2179, 2183, 2187, 2191, 2195, 2199, 2203, 2207, 2211, 2215, 2219, 2223, 2227, 2231, 2235, + 2239, 2243, 2247, 2251, 2255, 2259, 2263, 2267, 2271, 2275, 2279, 2282, 2286, 2290, 2294, 2298, + 2302, 2306, 2310, 2314, 2318, 2322, 2326, 2330, 2334, 2338, 2341, 2345, 2349, 2353, 2357, 2361, + 2365, 2369, 2373, 2377, 2381, 2384, 2388, 2392, 2396, 2400, 2404, 2408, 2412, 2416, 2419, 2423, + 2427, 2431, 2435, 2439, 2443, 2446, 2450, 2454, 2458, 2462, 2466, 2469, 2473, 2477, 2481, 2485, + 2489, 2492, 2496, 2500, 2504, 2508, 2511, 2515, 2519, 2523, 2527, 2530, 2534, 2538, 2542, 2545, + 2549, 2553, 2557, 2560, 2564, 2568, 2572, 2575, 2579, 2583, 2587, 2590, 2594, 2598, 2602, 2605, + 2609, 2613, 2616, 2620, 2624, 2627, 2631, 2635, 2638, 2642, 2646, 2649, 2653, 2657, 2660, 2664, + 2668, 2671, 2675, 2679, 2682, 2686, 2689, 2693, 2697, 2700, 2704, 2707, 2711, 2715, 2718, 2722, + 2725, 2729, 2732, 2736, 2739, 2743, 2747, 2750, 2754, 2757, 2761, 2764, 2768, 2771, 2775, 2778, + 2782, 2785, 2789, 2792, 2796, 2799, 2803, 2806, 2809, 2813, 2816, 2820, 2823, 2827, 2830, 2833, + 2837, 2840, 2844, 2847, 2850, 2854, 2857, 2861, 2864, 2867, 2871, 2874, 2877, 2881, 2884, 2887, + 2891, 2894, 2897, 2901, 2904, 2907, 2911, 2914, 2917, 2920, 2924, 2927, 2930, 2934, 2937, 2940, + 2943, 2946, 2950, 2953, 2956, 2959, 2963, 2966, 2969, 2972, 2975, 2979, 2982, 2985, 2988, 2991, + 2994, 2997, 3001, 3004, 3007, 3010, 3013, 3016, 3019, 3022, 3026, 3029, 3032, 3035, 3038, 3041, + 3044, 3047, 3050, 3053, 3056, 3059, 3062, 3065, 3068, 3071, 3074, 3077, 3080, 3083, 3086, 3089, + 3092, 3095, 3098, 3101, 3104, 3107, 3110, 3113, 3116, 3118, 3121, 3124, 3127, 3130, 3133, 3136, + 3139, 3142, 3144, 3147, 3150, 3153, 3156, 3159, 3161, 3164, 3167, 3170, 3173, 3175, 3178, 3181, + 3184, 3186, 3189, 3192, 3195, 3197, 3200, 3203, 3206, 3208, 3211, 3214, 3216, 3219, 3222, 3225, + 3227, 3230, 3233, 3235, 3238, 3240, 3243, 3246, 3248, 3251, 3254, 3256, 3259, 3261, 3264, 3267, + 3269, 3272, 3274, 3277, 3279, 3282, 3284, 3287, 3290, 3292, 3295, 3297, 3300, 3302, 3305, 3307, + 3310, 3312, 3315, 3317, 3319, 3322, 3324, 3327, 3329, 3332, 3334, 3336, 3339, 3341, 3344, 3346, + 3348, 3351, 3353, 3356, 3358, 3360, 3363, 3365, 3367, 3370, 3372, 3374, 3377, 3379, 3381, 3384, + 3386, 3388, 3390, 3393, 3395, 3397, 3399, 3402, 3404, 3406, 3408, 3411, 3413, 3415, 3417, 3420, + 3422, 3424, 3426, 3428, 3430, 3433, 3435, 3437, 3439, 3441, 3443, 3446, 3448, 3450, 3452, 3454, + 3456, 3458, 3460, 3462, 3465, 3467, 3469, 3471, 3473, 3475, 3477, 3479, 3481, 3483, 3485, 3487, + 3489, 3491, 3493, 3495, 3497, 3499, 3501, 3503, 3505, 3507, 3509, 3511, 3513, 3515, 3517, 3519, + 3521, 3523, 3525, 3526, 3528, 3530, 3532, 3534, 3536, 3538, 3540, 3542, 3544, 3545, 3547, 3549, + 3551, 3553, 3555, 3556, 3558, 3560, 3562, 3564, 3566, 3567, 3569, 3571, 3573, 3575, 3576, 3578, + 3580, 3582, 3583, 3585, 3587, 3589, 3590, 3592, 3594, 3596, 3597, 3599, 3601, 3602, 3604, 3606, + 3607, 3609, 3611, 3612, 3614, 3616, 3617, 3619, 3621, 3622, 3624, 3626, 3627, 3629, 3630, 3632, + 3634, 3635, 3637, 3638, 3640, 3642, 3643, 3645, 3646, 3648, 3649, 3651, 3653, 3654, 3656, 3657, + 3659, 3660, 3662, 3663, 3665, 3666, 3668, 3669, 3671, 3672, 3674, 3675, 3677, 3678, 3680, 3681, + 3683, 3684, 3685, 3687, 3688, 3690, 3691, 3693, 3694, 3695, 3697, 3698, 3700, 3701, 3702, 3704, + 3705, 3707, 3708, 3709, 3711, 3712, 3713, 3715, 3716, 3718, 3719, 3720, 3722, 3723, 3724, 3726, + 3727, 3728, 3729, 3731, 3732, 3733, 3735, 3736, 3737, 3738, 3740, 3741, 3742, 3744, 3745, 3746, + 3747, 3749, 3750, 3751, 3752, 3753, 3755, 3756, 3757, 3758, 3760, 3761, 3762, 3763, 3764, 3766, + 3767, 3768, 3769, 3770, 3771, 3773, 3774, 3775, 3776, 3777, 3778, 3780, 3781, 3782, 3783, 3784, + 3785, 3786, 3787, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3800, 3801, 3802, + 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, + 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, + 3835, 3836, 3837, 3838, 3839, 3840, 3841, 3842, 3843, 3843, 3844, 3845, 3846, 3847, 3848, 3849, + 3850, 3851, 3852, 3853, 3853, 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3860, 3861, 3862, 3863, + 3864, 3865, 3866, 3866, 3867, 3868, 3869, 3870, 3871, 3871, 3872, 3873, 3874, 3875, 3876, 3876, + 3877, 3878, 3879, 3880, 3880, 3881, 3882, 3883, 3884, 3884, 3885, 3886, 3887, 3887, 3888, 3889, + 3890, 3890, 3891, 3892, 3893, 3894, 3894, 3895, 3896, 3896, 3897, 3898, 3899, 3899, 3900, 3901, + 3902, 3902, 3903, 3904, 3904, 3905, 3906, 3907, 3907, 3908, 3909, 3909, 3910, 3911, 3911, 3912, + 3913, 3914, 3914, 3915, 3916, 3916, 3917, 3918, 3918, 3919, 3920, 3920, 3921, 3921, 3922, 3923, + 3923, 3924, 3925, 3925, 3926, 3927, 3927, 3928, 3929, 3929, 3930, 3930, 3931, 3932, 3932, 3933, + 3933, 3934, 3935, 3935, 3936, 3936, 3937, 3938, 3938, 3939, 3939, 3940, 3941, 3941, 3942, 3942, + 3943, 3943, 3944, 3945, 3945, 3946, 3946, 3947, 3947, 3948, 3949, 3949, 3950, 3950, 3951, 3951, + 3952, 3952, 3953, 3953, 3954, 3955, 3955, 3956, 3956, 3957, 3957, 3958, 3958, 3959, 3959, 3960, + 3960, 3961, 3961, 3962, 3962, 3963, 3963, 3964, 3964, 3965, 3965, 3966, 3966, 3967, 3967, 3968, + 3968, 3969, 3969, 3970, 3970, 3971, 3971, 3972, 3972, 3972, 3973, 3973, 3974, 3974, 3975, 3975, + 3976, 3976, 3977, 3977, 3977, 3978, 3978, 3979, 3979, 3980, 3980, 3981, 3981, 3981, 3982, 3982, + 3983, 3983, 3984, 3984, 3984, 3985, 3985, 3986, 3986, 3987, 3987, 3987, 3988, 3988, 3989, 3989, + 3989, 3990, 3990, 3991, 3991, 3991, 3992, 3992, 3993, 3993, 3993, 3994, 3994, 3995, 3995, 3995, + 3996, 3996, 3996, 3997, 3997, 3998, 3998, 3998, 3999, 3999, 3999, 4000, 4000, 4000, 4001, 4001, + 4002, 4002, 4002, 4003, 4003, 4003, 4004, 4004, 4004, 4005, 4005, 4005, 4006, 4006, 4006, 4007, + 4007, 4007, 4008, 4008, 4008, 4009, 4009, 4009, 4010, 4010, 4010, 4011, 4011, 4011, 4012, 4012, + 4012, 4013, 4013, 4013, 4014, 4014, 4014, 4015, 4015, 4015, 4016, 4016, 4016, 4016, 4017, 4017, + 4017, 4018, 4018, 4018, 4019, 4019, 4019, 4019, 4020, 4020, 4020, 4021, 4021, 4021, 4021, 4022, + 4022, 4022, 4023, 4023, 4023, 4023, 4024, 4024, 4024, 4024, 4025, 4025, 4025, 4026, 4026, 4026, + 4026, 4027, 4027, 4027, 4027, 4028, 4028, 4028, 4028, 4029, 4029, 4029, 4029, 4030, 4030, 4030, + 4030, 4031, 4031, 4031, 4031, 4032, 4032, 4032, 4032, 4033, 4033, 4033, 4033, 4034, 4034, 4034, + 4034, 4035, 4035, 4035, 4035, 4036, 4036, 4036, 4036, 4036, 4037, 4037, 4037, 4037, 4038, 4038, + 4038, 4038, 4038, 4039, 4039, 4039, 4039, 4040, 4040, 4040, 4040, 4040, 4041, 4041, 4041, 4041, + 4041, 4042, 4042, 4042, 4042, 4042, 4043, 4043, 4043, 4043, 4043, 4044, 4044, 4044, 4044, 4044, + 4045, 4045, 4045, 4045, 4045, 4046, 4046, 4046, 4046, 4046, 4047, 4047, 4047, 4047, 4047, 4048, + 4048, 4048, 4048, 4048, 4048, 4049, 4049, 4049, 4049, 4049, 4050, 4050, 4050, 4050, 4050, 4050, + 4051, 4051, 4051, 4051, 4051, 4051, 4052, 4052, 4052, 4052, 4052, 4052, 4053, 4053, 4053, 4053, + 4053, 4053, 4054, 4054, 4054, 4054, 4054, 4054, 4055, 4055, 4055, 4055, 4055, 4055, 4056, 4056, + 4056, 4056, 4056, 4056, 4056, 4057, 4057, 4057, 4057, 4057, 4057, 4057, 4058, 4058, 4058, 4058, + 4058, 4058, 4058, 4059, 4059, 4059, 4059, 4059, 4059, 4059, 4060, 4060, 4060, 4060, 4060, 4060, + 4060, 4061, 4061, 4061, 4061, 4061, 4061, 4061, 4062, 4062, 4062, 4062, 4062, 4062, 4062, 4062, + 4063, 4063, 4063, 4063, 4063, 4063, 4063, 4063, 4064, 4064, 4064, 4064, 4064, 4064, 4064, 4064, + 4065, 4065, 4065, 4065, 4065, 4065, 4065, 4065, 4065, 4066, 4066, 4066, 4066, 4066, 4066, 4066, + 4066, 4066, 4067, 4067, 4067, 4067, 4067, 4067, 4067, 4067, 4067, 4068, 4068, 4068, 4068, 4068, + 4068, 4068, 4068, 4068, 4069, 4069, 4069, 4069, 4069, 4069, 4069, 4069, 4069, 4069, 4070, 4070, + 4070, 4070, 4070, 4070, 4070, 4070, 4070, 4070, 4071, 4071, 4071, 4071, 4071, 4071, 4071, 4071, + 4071, 4071, 4072, 4072, 4072, 4072, 4072, 4072, 4072, 4072, 4072, 4072, 4072, 4073, 4073, 4073, + 4073, 4073, 4073, 4073, 4073, 4073, 4073, 4073, 4073, 4074, 4074, 4074, 4074, 4074, 4074, 4074, + 4074, 4074, 4074, 4074, 4074, 4075, 4075, 4075, 4075, 4075, 4075, 4075, 4075, 4075, 4075, 4075, + 4075, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4076, 4077, 4077, + 4077, 4077, 4077, 4077, 4077, 4077, 4077, 4077, 4077, 4077, 4077, 4077, 4078, 4078, 4078, 4078, + 4078, 4078, 4078, 4078, 4078, 4078, 4078, 4078, 4078, 4078, 4078, 4079, 4079, 4079, 4079, 4079, + 4079, 4079, 4079, 4079, 4079, 4079, 4079, 4079, 4079, 4079, 4079, 4080, 4080, 4080, 4080, 4080, + 4080, 4080, 4080, 4080, 4080, 4080, 4080, 4080, 4080, 4080, 4080, 4081, 4081, 4081, 4081, 4081, + 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4081, 4082, 4082, 4082, + 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, 4082, + 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, 4083, + 4083, 4083, 4083, 4083, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, + 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4084, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, 4085, + 4085, 4085, 4085, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, + 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4086, 4087, 4087, + 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, + 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4087, 4088, 4088, 4088, + 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, + 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4088, 4089, + 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, + 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, 4089, + 4089, 4089, 4089, 4089, 4089, 4089, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, + 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, + 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, 4090, + 4090, 4090, 4090, 4090, 4090, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, + 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, + 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, + 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4091, 4092, 4092, + 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, + 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, + 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, + 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, + 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4092, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, 4093, + 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, + 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, + 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, + 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, + 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, + 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4094, 4095, + 4095 +}; + +static const unsigned char model_rank_state_table[32768] = +{ + 224, 0, 0, 0, 0, 0, 0, 0, 205, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 6, 176, 18, 11, 35, 46, 124, 159, 6, 75, 207, 180, 229, 24, 0, 234, 6, 176, 124, 231, 53, 209, 0, 110, 62, 176, 243, 53, 41, 38, + 0, 140, 6, 176, 241, 109, 42, 65, 120, 234, 6, 44, 92, 156, 183, 20, 0, 234, 206, 75, 18, 165, 253, 190, 0, 62, 62, 176, 99, 99, 198, 20, + 0, 54, 62, 176, 109, 42, 111, 244, 0, 74, 62, 176, 219, 170, 42, 0, 0, 21, 62, 241, 138, 5, 167, 12, 0, 107, 110, 176, 5, 218, 111, 183, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 234, 74, 254, 161, 180, 0, 0, 0, 16, 228, 176, 127, 232, 197, 166, 0, 141, 21, 197, 5, 212, 182, 0, 0, 246, 21, 3, 211, 92, 35, 150, + 0, 185, 52, 176, 1, 235, 10, 253, 0, 191, 54, 197, 3, 216, 219, 46, 0, 45, 47, 197, 73, 35, 0, 50, 0, 230, 6, 5, 243, 42, 76, 60, + 0, 82, 47, 115, 204, 31, 126, 164, 0, 172, 2, 176, 161, 4, 23, 0, 0, 57, 74, 30, 222, 0, 53, 0, 0, 62, 62, 176, 5, 128, 109, 190, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 26, 89, 192, 99, 62, 187, 0, 47, 74, 176, 194, 156, 216, 218, 0, 234, 62, 207, 153, 166, 167, 180, 23, 62, 62, 176, 122, 170, 231, 120, + 0, 172, 74, 151, 241, 109, 99, 23, 0, 139, 110, 237, 203, 92, 251, 14, 0, 172, 74, 3, 75, 11, 5, 0, 0, 39, 62, 89, 251, 23, 92, 163, + 17, 238, 123, 56, 161, 92, 165, 165, 0, 6, 110, 127, 122, 23, 170, 50, 0, 54, 54, 177, 122, 194, 11, 120, 35, 110, 110, 176, 5, 42, 167, 163, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 0, 0, 0, 0, 233, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 162, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 16, 71, 189, 18, 138, 12, 233, 0, 22, 214, 75, 153, 28, 0, 0, 0, 159, 2, 44, 99, 181, 243, 68, 0, 238, 30, 202, 128, 132, 46, 221, + 0, 141, 119, 91, 115, 124, 65, 159, 0, 16, 135, 94, 74, 17, 0, 0, 0, 191, 106, 110, 166, 0, 116, 108, 0, 136, 22, 75, 119, 208, 244, 0, + 0, 47, 6, 176, 156, 180, 231, 112, 78, 103, 6, 254, 194, 18, 153, 0, 0, 58, 206, 3, 217, 188, 0, 229, 0, 6, 74, 241, 18, 157, 204, 252, + 238, 0, 0, 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 145, 74, 62, 241, 255, 107, 0, 0, 159, 110, 49, 176, 86, 92, 49, 0, 22, 223, 240, 194, 178, 56, 168, 108, 54, 74, 176, 194, 28, 28, 180, + 0, 140, 74, 241, 3, 157, 221, 120, 0, 10, 2, 127, 106, 165, 239, 0, 0, 8, 117, 91, 18, 96, 112, 46, 0, 74, 62, 69, 174, 218, 224, 73, + 0, 238, 110, 176, 232, 144, 28, 46, 0, 110, 62, 32, 199, 194, 221, 254, 0, 52, 2, 176, 5, 35, 42, 50, 12, 110, 62, 176, 138, 53, 60, 24, + 238, 0, 0, 0, 0, 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 185, 234, 40, 161, 198, 220, 203, 0, 16, 234, 49, 237, 197, 239, 19, 0, 87, 57, 85, 126, 195, 62, 0, 0, 96, 235, 148, 210, 219, 0, 0, + 0, 185, 6, 75, 219, 232, 250, 218, 0, 141, 6, 49, 129, 243, 195, 0, 0, 58, 2, 219, 195, 233, 0, 0, 231, 201, 85, 79, 73, 206, 0, 253, + 0, 19, 110, 3, 219, 42, 20, 0, 0, 54, 74, 31, 122, 198, 32, 253, 0, 58, 6, 169, 176, 94, 53, 0, 0, 172, 62, 176, 5, 109, 231, 168, + 64, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 34, 0, 0, 0, 0, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 226, 0, 0, 0, 0, 0, 0, + 0, 16, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 162, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 45, 0, 0, 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 162, 0, 0, 0, 0, 0, 0, 0, 119, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 175, 0, 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, + 0, 54, 62, 176, 216, 42, 100, 221, 0, 8, 74, 176, 18, 86, 12, 244, 0, 6, 6, 217, 156, 128, 149, 231, 0, 238, 110, 176, 109, 218, 231, 111, + 0, 54, 74, 176, 192, 144, 232, 20, 0, 57, 74, 62, 31, 42, 0, 0, 0, 141, 22, 192, 208, 42, 0, 20, 0, 107, 62, 176, 99, 18, 126, 128, + 180, 74, 62, 176, 5, 128, 144, 17, 12, 47, 74, 176, 219, 170, 168, 20, 179, 74, 62, 13, 5, 166, 187, 167, 0, 110, 110, 176, 5, 144, 126, 120, + 1, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, 0, 233, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 139, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 82, 94, 204, 141, 0, 0, 0, 0, 237, 174, 240, 180, 203, 0, 0, 0, 134, 110, 204, 92, 0, 0, 0, 0, 62, 176, 161, 42, 76, 198, + 76, 0, 172, 91, 13, 189, 34, 0, 0, 0, 9, 100, 25, 243, 244, 0, 0, 0, 2, 142, 151, 45, 196, 91, 179, 0, 62, 110, 5, 53, 172, 11, + 0, 0, 54, 90, 216, 109, 176, 168, 0, 0, 71, 75, 153, 251, 50, 209, 0, 0, 110, 56, 5, 229, 20, 28, 247, 0, 110, 176, 219, 12, 229, 163, + 1, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 107, 56, 18, 166, 72, 0, 72, 0, 172, 217, 5, 180, 208, 18, 0, 0, 47, 30, 42, 161, 50, 0, 199, 0, 62, 176, 99, 42, 76, 231, + 0, 0, 172, 107, 138, 128, 6, 183, 0, 0, 6, 91, 241, 198, 219, 170, 30, 0, 6, 240, 56, 11, 126, 170, 58, 0, 62, 176, 18, 211, 42, 65, + 53, 0, 233, 56, 161, 166, 35, 17, 0, 0, 110, 176, 99, 92, 28, 50, 0, 0, 62, 62, 99, 72, 76, 46, 221, 0, 110, 241, 5, 218, 17, 17, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 6, 176, 5, 11, 168, 111, 0, 159, 6, 75, 207, 195, 231, 163, 68, 234, 6, 176, 241, 92, 111, 126, 203, 74, 62, 176, 5, 11, 38, 0, + 0, 234, 6, 176, 99, 194, 231, 209, 0, 234, 206, 176, 99, 198, 190, 244, 0, 234, 206, 176, 49, 194, 53, 231, 0, 110, 62, 176, 5, 42, 168, 112, + 0, 110, 62, 176, 5, 42, 198, 28, 0, 47, 62, 176, 5, 109, 165, 137, 0, 6, 62, 176, 153, 72, 187, 231, 0, 110, 110, 176, 5, 42, 168, 24, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 3, 176, 23, 50, 0, 0, 140, 6, 3, 194, 170, 161, 211, 0, 140, 6, 179, 122, 11, 42, 253, 0, 145, 6, 176, 156, 144, 76, 65, + 0, 234, 6, 40, 72, 23, 53, 30, 0, 234, 6, 69, 219, 5, 7, 211, 85, 141, 22, 30, 219, 203, 81, 86, 0, 47, 6, 176, 207, 5, 231, 108, + 0, 54, 110, 69, 153, 166, 218, 138, 0, 159, 6, 176, 207, 168, 231, 6, 0, 110, 62, 91, 127, 2, 168, 46, 0, 62, 62, 176, 156, 166, 229, 120, + 51, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 110, 89, 56, 109, 109, 203, 0, 246, 110, 176, 219, 144, 218, 111, 0, 54, 62, 176, 5, 42, 157, 221, 20, 54, 62, 176, 5, 218, 168, 190, + 0, 172, 62, 176, 204, 218, 24, 5, 0, 172, 62, 75, 31, 48, 229, 170, 0, 74, 6, 69, 176, 12, 231, 24, 0, 110, 62, 176, 156, 53, 120, 0, + 0, 19, 110, 56, 192, 166, 166, 168, 0, 62, 110, 176, 138, 218, 111, 112, 0, 6, 110, 176, 161, 203, 168, 157, 18, 110, 110, 176, 5, 218, 167, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 254, 71, 48, 0, 152, 0, 136, 228, 69, 148, 28, 56, 0, 0, 162, 2, 10, 146, 0, 0, 219, 0, 57, 21, 94, 85, 245, 0, 0, + 0, 45, 6, 197, 197, 72, 111, 0, 0, 242, 71, 4, 147, 112, 0, 30, 0, 58, 129, 197, 164, 0, 0, 0, 0, 234, 21, 240, 198, 74, 189, 48, + 0, 222, 62, 75, 180, 23, 203, 18, 0, 172, 62, 40, 122, 156, 120, 28, 0, 6, 62, 31, 32, 0, 0, 53, 0, 62, 139, 49, 86, 42, 108, 218, + 181, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 254, 139, 5, 5, 253, 18, 0, 74, 62, 235, 138, 111, 17, 17, 0, 101, 21, 219, 192, 5, 211, 190, 0, 107, 62, 176, 156, 72, 195, 168, + 252, 118, 6, 176, 11, 28, 60, 241, 0, 16, 62, 3, 232, 232, 0, 253, 0, 242, 206, 44, 115, 3, 153, 181, 0, 123, 6, 30, 31, 232, 55, 50, + 23, 172, 110, 176, 5, 109, 111, 38, 0, 172, 62, 176, 99, 144, 167, 50, 0, 139, 6, 176, 122, 231, 108, 8, 0, 62, 176, 176, 5, 128, 168, 209, + 51, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 4, 5, 2, 31, 0, 0, 140, 52, 204, 73, 136, 38, 0, 0, 87, 206, 179, 128, 23, 105, 192, 0, 228, 6, 241, 18, 72, 53, 0, + 0, 140, 6, 40, 237, 177, 209, 0, 0, 141, 6, 219, 130, 203, 194, 0, 0, 141, 54, 147, 243, 179, 190, 168, 0, 74, 62, 240, 18, 69, 231, 20, + 0, 140, 74, 3, 156, 17, 35, 172, 0, 246, 2, 134, 122, 142, 115, 73, 0, 234, 74, 3, 243, 204, 44, 0, 0, 123, 62, 176, 243, 124, 35, 28, + 181, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 181, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 6, 176, 18, 249, 86, 38, 0, 47, 74, 176, 219, 195, 211, 108, 0, 74, 74, 30, 91, 251, 108, 108, 0, 21, 62, 176, 99, 53, 126, 221, + 0, 123, 62, 89, 124, 240, 221, 194, 0, 6, 62, 176, 12, 11, 176, 37, 0, 6, 215, 75, 99, 194, 46, 244, 0, 47, 110, 75, 5, 35, 65, 17, + 0, 123, 110, 176, 5, 53, 111, 112, 0, 254, 110, 176, 31, 109, 126, 28, 0, 74, 62, 176, 5, 20, 76, 190, 0, 110, 62, 176, 5, 11, 111, 17, + 51, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 119, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 54, 89, 166, 145, 45, 0, 0, 0, 6, 75, 132, 20, 212, 0, 0, 0, 6, 30, 195, 195, 211, 0, 0, 0, 62, 176, 99, 72, 187, 20, + 0, 0, 62, 100, 109, 47, 0, 251, 0, 0, 62, 10, 243, 170, 136, 0, 0, 0, 62, 18, 242, 237, 73, 0, 0, 0, 62, 176, 195, 144, 170, 144, + 0, 0, 74, 89, 166, 3, 209, 1, 0, 0, 110, 85, 161, 166, 221, 221, 0, 0, 21, 91, 211, 180, 203, 237, 0, 0, 110, 176, 5, 218, 168, 221, + 1, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 62, 89, 5, 166, 38, 183, 0, 0, 62, 91, 219, 23, 208, 116, 0, 0, 110, 176, 18, 128, 99, 231, 0, 0, 110, 176, 5, 11, 187, 17, + 0, 0, 74, 176, 216, 23, 178, 253, 0, 0, 74, 30, 208, 180, 5, 65, 0, 0, 206, 176, 127, 176, 126, 251, 13, 0, 62, 176, 5, 198, 167, 112, + 211, 0, 110, 56, 161, 166, 35, 168, 100, 0, 62, 176, 18, 11, 168, 168, 0, 0, 110, 176, 122, 42, 72, 24, 214, 0, 110, 176, 5, 218, 167, 50, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 28, 159, 6, 176, 31, 11, 35, 253, 0, 159, 6, 75, 5, 109, 229, 111, 0, 234, 6, 30, 249, 195, 112, 126, 221, 110, 62, 176, 208, 198, 157, 108, + 0, 140, 6, 176, 99, 195, 35, 112, 0, 234, 6, 176, 156, 53, 112, 46, 0, 234, 6, 75, 18, 42, 20, 253, 0, 74, 62, 176, 219, 53, 24, 20, + 238, 54, 62, 176, 5, 109, 28, 190, 74, 74, 62, 176, 5, 42, 120, 0, 12, 110, 62, 176, 18, 198, 163, 231, 98, 110, 110, 176, 5, 218, 183, 50, + 150, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 16, 133, 89, 161, 214, 198, 20, 0, 57, 21, 44, 192, 177, 187, 231, 0, 141, 6, 91, 208, 33, 20, 99, 0, 54, 6, 30, 23, 53, 218, 112, + 196, 67, 6, 13, 18, 99, 111, 20, 0, 58, 52, 44, 53, 148, 0, 128, 0, 234, 110, 44, 3, 194, 100, 21, 0, 47, 6, 75, 138, 92, 253, 211, + 0, 172, 74, 127, 56, 31, 1, 0, 0, 96, 62, 207, 11, 229, 160, 0, 0, 134, 110, 75, 176, 180, 62, 20, 211, 107, 62, 176, 122, 210, 203, 74, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 62, 89, 251, 229, 229, 107, 0, 6, 6, 91, 92, 216, 219, 142, 0, 57, 56, 176, 99, 53, 0, 203, 73, 107, 62, 176, 122, 180, 168, 73, + 0, 172, 233, 176, 161, 56, 157, 126, 0, 110, 110, 5, 99, 132, 18, 88, 0, 134, 62, 75, 73, 0, 180, 116, 0, 2, 74, 176, 5, 28, 17, 157, + 0, 54, 110, 62, 204, 109, 166, 111, 0, 62, 62, 176, 249, 180, 35, 244, 0, 6, 62, 176, 161, 72, 72, 84, 0, 222, 62, 176, 156, 218, 17, 17, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 191, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 62, 75, 99, 56, 28, 221, 0, 45, 215, 100, 211, 147, 144, 104, 0, 45, 22, 217, 211, 219, 140, 143, 0, 26, 62, 100, 174, 72, 220, 120, + 0, 140, 74, 179, 251, 11, 128, 65, 0, 141, 6, 75, 208, 92, 209, 211, 0, 214, 74, 176, 73, 38, 35, 24, 0, 123, 2, 176, 122, 72, 22, 60, + 0, 246, 74, 176, 138, 210, 35, 209, 0, 47, 6, 75, 5, 42, 20, 60, 0, 74, 6, 176, 177, 221, 38, 227, 0, 110, 62, 176, 5, 180, 17, 244, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 74, 176, 99, 198, 157, 221, 0, 47, 6, 30, 122, 92, 23, 25, 0, 230, 62, 4, 5, 170, 127, 50, 0, 254, 62, 30, 208, 198, 218, 120, + 187, 238, 62, 75, 99, 12, 35, 253, 0, 71, 6, 69, 207, 216, 23, 24, 0, 234, 22, 200, 124, 12, 216, 60, 50, 110, 62, 200, 5, 42, 163, 253, + 0, 238, 62, 176, 5, 198, 35, 108, 0, 54, 62, 176, 5, 180, 229, 0, 0, 74, 62, 176, 156, 218, 168, 111, 0, 62, 110, 176, 5, 218, 167, 108, + 238, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 74, 176, 99, 198, 109, 65, 0, 141, 6, 30, 170, 216, 11, 163, 0, 234, 6, 176, 219, 69, 0, 46, 0, 52, 62, 62, 170, 166, 216, 190, + 0, 140, 6, 176, 31, 124, 28, 116, 0, 141, 6, 75, 5, 218, 120, 174, 0, 140, 6, 75, 124, 22, 35, 0, 0, 54, 6, 30, 5, 195, 53, 233, + 46, 246, 74, 176, 5, 195, 35, 221, 0, 47, 6, 176, 5, 12, 50, 137, 0, 110, 62, 176, 208, 128, 120, 0, 237, 110, 62, 3, 122, 42, 198, 168, + 82, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 67, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 103, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 119, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 62, 176, 31, 166, 229, 35, 0, 47, 62, 75, 18, 11, 53, 50, 0, 74, 6, 176, 124, 99, 28, 72, 0, 110, 62, 176, 5, 92, 168, 46, + 0, 54, 62, 176, 99, 11, 187, 253, 0, 47, 62, 176, 31, 53, 76, 120, 0, 54, 6, 176, 99, 161, 20, 111, 0, 110, 62, 176, 156, 128, 128, 0, + 212, 110, 110, 176, 5, 42, 168, 0, 187, 74, 62, 176, 5, 11, 231, 65, 0, 110, 62, 176, 5, 11, 76, 190, 167, 110, 110, 176, 5, 144, 167, 190, + 238, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 123, 62, 122, 79, 124, 0, 0, 0, 47, 3, 86, 139, 34, 0, 0, 0, 8, 197, 4, 185, 191, 16, 0, 0, 6, 30, 251, 187, 128, 153, + 0, 0, 110, 123, 232, 18, 6, 2, 0, 0, 6, 89, 153, 198, 65, 196, 0, 0, 6, 235, 40, 220, 97, 0, 0, 0, 6, 115, 211, 42, 241, 167, + 0, 0, 110, 56, 31, 109, 111, 229, 0, 0, 6, 176, 208, 218, 203, 168, 0, 0, 62, 75, 243, 195, 183, 108, 0, 0, 62, 176, 5, 218, 38, 112, + 238, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 62, 56, 161, 166, 35, 48, 0, 0, 6, 89, 90, 42, 110, 176, 0, 0, 110, 241, 249, 5, 168, 128, 0, 0, 110, 176, 5, 42, 53, 0, + 0, 0, 110, 176, 99, 170, 198, 30, 0, 0, 6, 179, 241, 180, 16, 110, 0, 0, 62, 127, 156, 219, 164, 72, 0, 0, 110, 176, 5, 194, 231, 35, + 0, 0, 110, 56, 192, 166, 229, 46, 0, 0, 110, 176, 99, 11, 231, 209, 133, 0, 110, 176, 99, 92, 168, 209, 244, 0, 110, 176, 5, 218, 167, 60, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 25, 159, 6, 176, 207, 42, 231, 221, 0, 159, 6, 176, 207, 203, 28, 46, 0, 140, 6, 176, 5, 92, 111, 0, 0, 110, 62, 176, 5, 170, 167, 244, + 0, 140, 6, 176, 18, 180, 20, 126, 223, 234, 6, 176, 18, 12, 231, 209, 0, 234, 6, 176, 153, 194, 253, 244, 0, 74, 62, 176, 156, 128, 35, 20, + 101, 110, 62, 176, 5, 42, 111, 209, 140, 110, 62, 176, 5, 249, 11, 35, 75, 74, 62, 176, 208, 198, 187, 0, 168, 110, 62, 176, 5, 218, 112, 190, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 103, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 62, 179, 5, 198, 166, 167, 0, 140, 74, 75, 207, 38, 203, 16, 0, 141, 6, 91, 164, 126, 50, 120, 0, 47, 6, 176, 99, 198, 137, 0, + 0, 234, 6, 176, 192, 166, 180, 187, 0, 234, 6, 217, 75, 243, 244, 109, 0, 140, 6, 75, 3, 124, 56, 111, 0, 47, 74, 176, 5, 42, 23, 120, + 0, 62, 62, 176, 18, 53, 108, 163, 0, 119, 6, 176, 153, 210, 120, 190, 0, 110, 6, 100, 30, 5, 50, 20, 0, 110, 110, 176, 5, 144, 168, 221, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 74, 176, 5, 166, 161, 166, 0, 47, 62, 176, 138, 195, 168, 190, 0, 123, 110, 176, 5, 198, 11, 221, 0, 123, 62, 176, 5, 42, 168, 244, + 0, 110, 62, 176, 86, 124, 20, 137, 0, 62, 62, 176, 219, 170, 38, 3, 0, 52, 110, 91, 99, 128, 142, 153, 144, 6, 62, 176, 5, 218, 165, 0, + 0, 107, 110, 176, 99, 92, 28, 167, 0, 74, 110, 176, 243, 42, 126, 209, 0, 74, 62, 176, 99, 195, 218, 116, 0, 110, 110, 176, 5, 218, 120, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, + 0, 150, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 105, 140, 6, 176, 73, 92, 35, 35, 0, 234, 6, 142, 251, 220, 46, 225, 0, 140, 6, 4, 32, 210, 136, 163, 0, 47, 6, 30, 5, 42, 72, 153, + 0, 141, 6, 176, 18, 177, 35, 190, 0, 141, 6, 174, 92, 72, 253, 221, 0, 234, 6, 30, 243, 137, 50, 0, 0, 54, 62, 176, 124, 72, 229, 180, + 0, 47, 74, 176, 5, 198, 53, 221, 0, 47, 6, 176, 5, 11, 35, 46, 0, 52, 62, 176, 216, 12, 120, 72, 0, 110, 62, 176, 122, 92, 41, 28, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 74, 176, 122, 30, 35, 65, 0, 47, 6, 176, 208, 23, 46, 60, 0, 74, 6, 30, 122, 76, 195, 35, 0, 107, 62, 176, 99, 42, 35, 24, + 0, 123, 62, 176, 156, 12, 209, 245, 0, 74, 62, 100, 99, 5, 231, 224, 0, 234, 62, 75, 208, 128, 28, 24, 0, 47, 62, 176, 5, 194, 35, 168, + 0, 54, 110, 176, 5, 42, 35, 108, 0, 110, 62, 176, 5, 128, 168, 209, 0, 54, 62, 176, 156, 218, 76, 244, 0, 110, 110, 176, 5, 218, 120, 244, + 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 46, 140, 6, 75, 18, 180, 165, 163, 0, 234, 6, 176, 31, 122, 168, 0, 0, 234, 6, 179, 208, 170, 208, 163, 0, 74, 62, 75, 161, 187, 229, 0, + 0, 140, 6, 75, 75, 194, 12, 221, 0, 141, 6, 75, 156, 249, 167, 166, 0, 141, 6, 3, 199, 3, 42, 72, 0, 6, 62, 30, 122, 109, 118, 38, + 0, 47, 74, 176, 5, 109, 35, 112, 0, 47, 6, 176, 156, 42, 157, 0, 0, 140, 62, 176, 5, 42, 28, 12, 0, 110, 62, 176, 5, 11, 35, 46, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 62, 176, 99, 11, 229, 229, 0, 47, 6, 75, 207, 42, 109, 89, 0, 246, 62, 176, 122, 203, 161, 0, 0, 110, 62, 176, 5, 195, 229, 23, + 0, 54, 62, 176, 176, 11, 165, 111, 0, 6, 74, 176, 207, 12, 38, 108, 0, 74, 62, 30, 5, 192, 20, 221, 0, 47, 62, 176, 99, 42, 76, 183, + 231, 110, 110, 176, 5, 42, 35, 209, 22, 47, 62, 176, 5, 11, 229, 24, 157, 110, 62, 176, 5, 42, 20, 209, 133, 110, 110, 176, 5, 218, 28, 190, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 62, 89, 249, 109, 172, 16, 0, 0, 74, 142, 96, 211, 165, 109, 0, 0, 6, 3, 127, 11, 126, 111, 0, 0, 110, 176, 5, 198, 35, 20, + 0, 0, 110, 176, 153, 23, 166, 1, 0, 0, 6, 56, 75, 99, 108, 183, 0, 0, 6, 176, 207, 203, 116, 0, 0, 0, 110, 176, 5, 180, 167, 108, + 0, 0, 110, 176, 161, 42, 120, 229, 0, 0, 6, 176, 207, 11, 229, 112, 0, 0, 110, 176, 5, 194, 20, 120, 13, 0, 110, 176, 5, 218, 168, 190, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 56, 176, 194, 42, 187, 0, 0, 74, 176, 99, 216, 231, 53, 0, 0, 110, 176, 5, 232, 195, 46, 65, 0, 110, 176, 5, 218, 231, 108, + 0, 0, 110, 176, 5, 11, 170, 120, 0, 0, 110, 75, 207, 31, 53, 220, 0, 0, 62, 30, 5, 42, 166, 126, 232, 0, 110, 176, 5, 218, 168, 112, + 194, 0, 110, 176, 192, 42, 35, 46, 80, 0, 110, 176, 18, 42, 229, 116, 19, 0, 62, 176, 5, 42, 38, 60, 120, 0, 110, 176, 5, 218, 120, 190, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 170, 159, 6, 176, 99, 11, 229, 112, 0, 159, 6, 176, 207, 195, 111, 116, 0, 234, 6, 176, 99, 42, 42, 112, 0, 54, 62, 176, 243, 5, 20, 168, + 0, 140, 62, 176, 99, 198, 157, 203, 0, 234, 6, 75, 5, 11, 76, 112, 0, 234, 62, 176, 99, 218, 46, 112, 0, 62, 62, 176, 219, 42, 20, 168, + 237, 238, 62, 176, 18, 42, 42, 183, 0, 54, 62, 176, 99, 168, 120, 60, 0, 62, 62, 176, 156, 42, 11, 244, 0, 110, 62, 176, 156, 128, 231, 65, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 110, 235, 192, 161, 35, 65, 0, 140, 74, 176, 5, 53, 42, 120, 0, 140, 62, 4, 75, 138, 167, 76, 108, 62, 62, 176, 5, 72, 203, 244, + 0, 140, 74, 89, 204, 11, 187, 0, 0, 16, 8, 44, 208, 36, 125, 28, 0, 234, 6, 30, 5, 199, 28, 24, 36, 74, 62, 176, 99, 218, 229, 219, + 0, 172, 107, 89, 161, 128, 56, 244, 0, 123, 62, 176, 18, 128, 168, 157, 0, 62, 62, 69, 156, 12, 168, 126, 0, 233, 62, 69, 5, 218, 221, 46, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 170, 47, 110, 176, 192, 109, 165, 111, 90, 47, 74, 176, 5, 42, 35, 65, 0, 47, 6, 176, 86, 11, 218, 231, 163, 110, 62, 176, 5, 23, 168, 65, + 0, 54, 110, 176, 192, 161, 165, 35, 0, 47, 62, 176, 99, 198, 72, 111, 0, 62, 6, 30, 5, 11, 38, 190, 0, 74, 62, 176, 243, 218, 17, 163, + 0, 238, 110, 56, 192, 166, 165, 183, 240, 47, 110, 176, 31, 180, 165, 46, 0, 54, 110, 176, 156, 72, 180, 65, 163, 110, 110, 176, 5, 218, 38, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 224, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 4, 243, 144, 229, 187, 0, 141, 6, 100, 174, 180, 190, 119, 0, 16, 2, 164, 99, 231, 0, 0, 0, 6, 62, 30, 216, 164, 0, 244, + 0, 140, 21, 176, 32, 28, 168, 231, 0, 234, 22, 176, 197, 100, 229, 241, 0, 6, 22, 179, 5, 18, 31, 0, 0, 74, 110, 241, 73, 0, 116, 0, + 0, 172, 110, 176, 219, 218, 92, 231, 0, 47, 110, 30, 156, 180, 253, 244, 0, 2, 110, 49, 11, 50, 170, 174, 0, 172, 62, 176, 5, 194, 128, 108, + 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 110, 200, 192, 128, 72, 116, 0, 47, 74, 30, 5, 144, 168, 46, 0, 2, 62, 40, 219, 144, 221, 24, 0, 110, 62, 176, 86, 232, 168, 163, + 0, 246, 110, 176, 122, 170, 218, 20, 0, 47, 134, 176, 5, 218, 192, 218, 0, 10, 22, 176, 5, 231, 38, 60, 0, 62, 110, 241, 5, 42, 167, 48, + 65, 238, 62, 176, 5, 42, 168, 0, 0, 6, 62, 75, 5, 92, 229, 24, 0, 74, 62, 176, 5, 72, 42, 209, 0, 110, 62, 176, 5, 12, 167, 65, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 47, 81, 243, 128, 161, 28, 0, 246, 6, 179, 69, 129, 168, 126, 0, 234, 22, 179, 240, 0, 0, 0, 0, 62, 107, 3, 73, 99, 181, 24, + 0, 47, 74, 22, 122, 124, 99, 235, 0, 140, 62, 3, 113, 2, 0, 0, 0, 45, 57, 30, 167, 220, 38, 65, 0, 45, 254, 215, 148, 0, 76, 0, + 0, 19, 62, 176, 18, 23, 46, 1, 0, 74, 22, 176, 5, 111, 190, 46, 0, 62, 62, 75, 179, 0, 60, 20, 0, 110, 176, 176, 99, 23, 209, 157, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 182, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 141, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 52, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 246, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 119, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 74, 127, 176, 203, 12, 163, 0, 119, 74, 32, 153, 170, 65, 128, 0, 131, 62, 79, 211, 42, 35, 0, 0, 123, 74, 176, 5, 251, 209, 24, + 0, 54, 228, 115, 5, 109, 249, 41, 0, 21, 6, 91, 198, 49, 0, 209, 249, 62, 6, 75, 127, 170, 0, 38, 0, 54, 62, 89, 86, 18, 231, 120, + 0, 238, 110, 176, 5, 166, 229, 157, 0, 6, 62, 30, 208, 195, 53, 24, 0, 21, 56, 176, 156, 144, 128, 244, 0, 110, 62, 176, 5, 42, 111, 65, + 181, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 107, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 71, 107, 219, 151, 48, 203, 0, 0, 8, 164, 208, 96, 38, 128, 0, 0, 189, 220, 21, 144, 8, 0, 0, 0, 110, 40, 5, 72, 157, 231, + 0, 0, 62, 179, 248, 192, 65, 203, 0, 0, 134, 176, 192, 195, 0, 0, 0, 0, 62, 254, 198, 0, 157, 0, 0, 0, 62, 176, 180, 138, 116, 61, + 0, 0, 74, 107, 122, 109, 76, 228, 0, 0, 74, 127, 219, 198, 11, 0, 0, 0, 74, 75, 241, 23, 73, 38, 0, 0, 62, 176, 5, 144, 157, 17, + 181, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 56, 31, 166, 166, 76, 0, 0, 74, 176, 249, 237, 218, 231, 0, 0, 110, 176, 176, 5, 38, 32, 11, 0, 62, 176, 5, 11, 157, 65, + 0, 0, 54, 56, 161, 109, 229, 35, 0, 0, 110, 62, 208, 42, 65, 25, 0, 0, 6, 75, 177, 31, 20, 111, 0, 0, 110, 176, 5, 144, 170, 209, + 0, 0, 233, 56, 192, 166, 165, 17, 0, 0, 110, 176, 31, 195, 112, 65, 0, 0, 110, 176, 99, 170, 11, 183, 77, 0, 110, 176, 5, 218, 167, 190, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 12, 159, 6, 75, 5, 11, 35, 116, 43, 159, 6, 176, 207, 11, 229, 221, 0, 140, 6, 176, 5, 42, 35, 137, 0, 54, 62, 176, 5, 42, 35, 221, + 0, 234, 6, 176, 5, 12, 157, 38, 0, 234, 6, 176, 5, 11, 120, 50, 0, 234, 6, 176, 5, 92, 231, 50, 126, 47, 62, 176, 5, 42, 38, 253, + 0, 54, 62, 176, 5, 198, 28, 0, 0, 110, 62, 176, 5, 218, 231, 108, 0, 74, 62, 176, 5, 12, 157, 46, 64, 110, 62, 176, 5, 218, 120, 190, + 63, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 107, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 53, 111, 187, 0, 140, 6, 176, 219, 170, 35, 50, 0, 140, 6, 176, 207, 11, 165, 111, 199, 246, 74, 176, 5, 218, 35, 209, + 0, 234, 62, 176, 99, 170, 11, 107, 0, 234, 6, 176, 5, 42, 168, 20, 0, 141, 6, 176, 5, 11, 20, 111, 231, 47, 6, 176, 5, 218, 35, 112, + 0, 74, 62, 176, 161, 166, 165, 111, 0, 74, 62, 176, 5, 198, 231, 111, 0, 74, 62, 176, 243, 92, 35, 11, 0, 110, 62, 176, 5, 218, 167, 50, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 166, 47, 74, 176, 5, 92, 165, 28, 98, 246, 74, 176, 5, 42, 35, 112, 232, 172, 74, 176, 99, 198, 187, 46, 221, 172, 62, 176, 5, 42, 168, 209, + 216, 74, 62, 176, 99, 42, 35, 46, 0, 74, 62, 176, 5, 198, 35, 112, 111, 47, 6, 176, 5, 198, 38, 28, 38, 74, 62, 176, 5, 42, 168, 253, + 42, 172, 110, 176, 161, 42, 165, 28, 180, 110, 110, 176, 5, 42, 35, 28, 0, 110, 62, 176, 5, 11, 157, 112, 244, 110, 110, 176, 5, 218, 167, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 99, 195, 126, 112, 0, 141, 6, 176, 5, 3, 112, 108, 0, 234, 6, 75, 156, 128, 38, 206, 0, 123, 62, 75, 31, 157, 0, 0, + 0, 140, 6, 176, 243, 249, 126, 161, 0, 234, 6, 174, 156, 42, 112, 76, 0, 234, 6, 75, 49, 72, 60, 203, 0, 62, 6, 91, 153, 204, 50, 243, + 0, 172, 62, 176, 99, 53, 0, 120, 0, 123, 6, 176, 5, 11, 12, 231, 186, 52, 62, 176, 211, 5, 214, 60, 0, 62, 62, 176, 5, 166, 198, 203, + 224, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 210, 47, 74, 176, 99, 194, 76, 111, 0, 159, 62, 176, 99, 198, 168, 0, 0, 54, 6, 75, 5, 211, 28, 190, 0, 110, 62, 176, 5, 218, 229, 221, + 0, 54, 62, 176, 5, 53, 0, 0, 0, 246, 62, 176, 156, 120, 221, 204, 0, 22, 6, 176, 208, 5, 50, 221, 0, 74, 110, 176, 219, 72, 198, 126, + 0, 123, 110, 176, 5, 42, 28, 209, 0, 123, 62, 176, 5, 128, 167, 0, 0, 110, 110, 176, 5, 11, 76, 65, 0, 110, 62, 176, 5, 218, 167, 163, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 234, 6, 176, 5, 116, 76, 46, 0, 140, 6, 69, 161, 176, 255, 1, 0, 234, 6, 30, 176, 238, 89, 24, 0, 47, 74, 176, 243, 35, 221, 206, + 0, 141, 6, 117, 219, 155, 0, 249, 0, 140, 6, 217, 177, 17, 194, 38, 0, 234, 6, 179, 122, 109, 72, 50, 0, 47, 62, 75, 18, 243, 35, 179, + 0, 74, 62, 176, 5, 42, 120, 111, 0, 54, 6, 75, 99, 157, 166, 128, 0, 110, 6, 176, 11, 170, 38, 195, 0, 107, 110, 176, 5, 180, 168, 35, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 67, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 62, 75, 122, 12, 157, 209, 0, 47, 6, 176, 99, 211, 28, 190, 0, 140, 62, 176, 56, 72, 53, 229, 153, 123, 62, 176, 5, 92, 38, 46, + 0, 62, 62, 176, 208, 5, 112, 213, 0, 6, 6, 91, 122, 203, 92, 28, 0, 214, 6, 176, 86, 53, 209, 42, 0, 74, 62, 176, 156, 11, 229, 163, + 0, 54, 62, 176, 5, 170, 38, 209, 0, 74, 62, 176, 5, 198, 229, 190, 0, 62, 62, 176, 5, 218, 28, 120, 2, 107, 62, 176, 5, 218, 120, 190, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 74, 251, 126, 168, 0, 0, 0, 74, 69, 249, 248, 221, 30, 0, 0, 62, 176, 18, 42, 11, 77, 23, 0, 62, 30, 5, 72, 109, 46, + 0, 0, 6, 176, 217, 35, 198, 53, 0, 0, 6, 94, 208, 198, 231, 32, 0, 0, 6, 100, 210, 120, 127, 195, 0, 0, 62, 176, 5, 198, 38, 209, + 0, 0, 110, 176, 99, 128, 11, 163, 0, 0, 110, 179, 18, 192, 229, 168, 0, 0, 74, 176, 208, 11, 72, 76, 0, 0, 110, 176, 5, 218, 38, 244, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 62, 99, 42, 92, 183, 0, 0, 110, 176, 99, 218, 38, 168, 0, 0, 74, 176, 5, 5, 12, 111, 0, 0, 110, 176, 5, 218, 168, 65, + 0, 0, 110, 176, 161, 11, 38, 163, 0, 0, 74, 176, 31, 198, 203, 111, 0, 0, 74, 176, 219, 218, 11, 231, 0, 0, 62, 176, 5, 42, 167, 41, + 190, 0, 110, 176, 192, 166, 165, 28, 0, 0, 110, 176, 5, 42, 35, 0, 0, 0, 110, 176, 5, 11, 229, 137, 190, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 6, 176, 5, 218, 35, 221, 215, 159, 6, 75, 219, 11, 231, 60, 0, 234, 6, 176, 207, 198, 168, 244, 0, 54, 62, 176, 5, 42, 35, 108, + 0, 140, 6, 176, 99, 11, 111, 65, 157, 234, 6, 176, 124, 53, 76, 41, 0, 234, 6, 176, 73, 23, 120, 60, 0, 74, 62, 176, 5, 23, 60, 244, + 0, 110, 62, 176, 99, 218, 38, 112, 0, 74, 62, 176, 5, 218, 168, 65, 0, 110, 62, 176, 156, 42, 157, 183, 0, 110, 62, 176, 5, 72, 167, 65, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 74, 75, 122, 161, 72, 210, 0, 141, 6, 176, 99, 166, 86, 209, 0, 140, 6, 75, 122, 195, 210, 174, 0, 172, 74, 176, 99, 128, 168, 209, + 0, 234, 6, 176, 204, 72, 11, 76, 0, 140, 6, 176, 124, 40, 111, 2, 0, 140, 6, 75, 122, 53, 5, 75, 0, 47, 62, 176, 156, 243, 138, 50, + 0, 54, 74, 176, 161, 161, 157, 168, 0, 6, 62, 176, 211, 109, 54, 98, 0, 110, 62, 176, 86, 192, 30, 20, 0, 62, 62, 176, 5, 128, 109, 168, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 74, 176, 161, 170, 111, 111, 0, 159, 6, 176, 5, 11, 60, 167, 0, 54, 62, 176, 5, 144, 167, 0, 0, 110, 62, 176, 5, 218, 168, 50, + 0, 54, 110, 176, 5, 42, 35, 209, 0, 6, 62, 176, 5, 128, 76, 28, 0, 234, 6, 176, 241, 12, 12, 76, 0, 74, 62, 176, 5, 218, 167, 0, + 138, 238, 110, 176, 161, 42, 35, 17, 0, 74, 62, 176, 99, 42, 42, 76, 0, 110, 62, 176, 5, 198, 23, 24, 81, 110, 110, 176, 5, 218, 167, 112, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 125, 140, 6, 176, 124, 11, 168, 0, 0, 141, 6, 75, 99, 128, 187, 221, 0, 234, 6, 176, 5, 195, 231, 35, 0, 107, 62, 44, 5, 23, 180, 126, + 0, 140, 6, 75, 207, 12, 111, 0, 0, 141, 6, 75, 5, 195, 170, 38, 0, 234, 6, 176, 124, 170, 218, 76, 0, 54, 62, 176, 18, 41, 46, 53, + 24, 246, 74, 176, 5, 11, 168, 253, 0, 47, 6, 176, 5, 12, 35, 108, 0, 230, 62, 176, 5, 42, 38, 46, 0, 110, 62, 176, 5, 218, 168, 209, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 74, 176, 5, 42, 38, 0, 0, 47, 6, 176, 5, 42, 38, 112, 129, 6, 62, 176, 99, 92, 42, 65, 0, 110, 62, 176, 5, 218, 76, 137, + 111, 54, 74, 176, 5, 195, 168, 112, 0, 47, 62, 176, 5, 12, 218, 41, 0, 6, 62, 176, 5, 42, 168, 209, 0, 62, 62, 176, 5, 42, 46, 0, + 231, 238, 110, 176, 5, 42, 168, 209, 0, 47, 62, 176, 5, 42, 168, 190, 168, 74, 62, 176, 5, 218, 168, 190, 0, 62, 62, 176, 5, 218, 167, 190, + 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 219, 109, 111, 111, 0, 234, 6, 4, 5, 180, 218, 163, 0, 234, 6, 75, 216, 76, 112, 168, 0, 47, 62, 176, 251, 99, 80, 34, + 0, 140, 6, 176, 99, 195, 20, 126, 0, 141, 6, 75, 153, 11, 219, 0, 0, 140, 6, 179, 156, 249, 112, 46, 0, 246, 6, 179, 30, 203, 65, 0, + 0, 47, 74, 176, 5, 128, 229, 46, 0, 47, 6, 176, 99, 198, 126, 167, 0, 74, 6, 176, 5, 12, 163, 190, 0, 110, 62, 176, 5, 92, 165, 253, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 74, 176, 18, 166, 35, 24, 0, 159, 6, 176, 99, 72, 20, 116, 0, 6, 62, 176, 99, 42, 35, 231, 0, 107, 62, 176, 5, 198, 168, 253, + 0, 172, 62, 176, 99, 210, 168, 24, 0, 6, 6, 176, 5, 53, 187, 120, 0, 6, 2, 217, 153, 232, 231, 137, 0, 6, 62, 176, 99, 42, 167, 163, + 73, 110, 110, 176, 5, 42, 35, 112, 0, 47, 62, 176, 5, 194, 168, 0, 0, 74, 62, 176, 5, 42, 120, 50, 0, 62, 110, 176, 5, 218, 120, 209, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 176, 161, 38, 166, 0, 0, 0, 6, 176, 31, 24, 120, 123, 0, 0, 107, 241, 243, 17, 11, 231, 0, 0, 62, 176, 122, 180, 168, 35, + 0, 0, 74, 176, 192, 11, 65, 17, 0, 0, 6, 179, 31, 62, 20, 255, 0, 0, 62, 89, 3, 198, 85, 0, 0, 0, 62, 176, 5, 203, 92, 253, + 0, 0, 110, 176, 5, 42, 35, 167, 0, 0, 74, 176, 5, 53, 180, 28, 0, 0, 62, 176, 156, 42, 28, 244, 0, 0, 110, 176, 5, 42, 35, 65, + 238, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 176, 31, 166, 170, 35, 0, 0, 6, 176, 192, 92, 165, 35, 0, 0, 62, 176, 5, 198, 198, 0, 0, 0, 110, 176, 5, 170, 38, 46, + 0, 0, 110, 176, 161, 42, 35, 229, 0, 0, 74, 176, 138, 128, 35, 157, 0, 0, 62, 176, 5, 218, 126, 163, 231, 0, 110, 176, 5, 12, 229, 244, + 155, 0, 110, 176, 161, 166, 35, 46, 60, 0, 110, 176, 5, 42, 187, 0, 0, 0, 110, 176, 5, 198, 38, 112, 229, 0, 110, 176, 5, 218, 167, 190, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 4, 159, 6, 176, 5, 11, 229, 112, 253, 159, 6, 176, 5, 195, 35, 65, 38, 140, 6, 176, 5, 198, 229, 112, 42, 54, 62, 176, 5, 42, 35, 112, + 122, 140, 6, 176, 5, 11, 229, 126, 0, 234, 6, 176, 5, 42, 20, 0, 1, 234, 6, 176, 5, 11, 35, 137, 0, 74, 62, 176, 5, 218, 231, 112, + 236, 110, 62, 176, 99, 42, 231, 65, 0, 110, 62, 176, 5, 42, 35, 60, 0, 110, 62, 176, 5, 12, 229, 0, 126, 110, 62, 176, 5, 218, 168, 190, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, + 190, 141, 6, 176, 99, 195, 38, 60, 0, 140, 6, 176, 5, 11, 203, 17, 0, 140, 6, 176, 5, 195, 46, 111, 0, 47, 74, 176, 5, 42, 35, 253, + 0, 234, 6, 176, 122, 42, 72, 253, 0, 234, 6, 176, 5, 128, 168, 111, 0, 141, 6, 176, 5, 12, 0, 198, 91, 47, 6, 176, 5, 42, 231, 24, + 2, 74, 62, 176, 5, 203, 126, 187, 0, 74, 62, 176, 5, 42, 187, 120, 0, 110, 62, 176, 5, 11, 218, 187, 218, 110, 62, 176, 5, 218, 168, 190, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 241, 47, 62, 176, 5, 42, 165, 65, 42, 172, 74, 176, 5, 42, 229, 60, 20, 54, 74, 176, 5, 42, 35, 221, 187, 172, 62, 176, 5, 218, 168, 209, + 0, 54, 62, 176, 5, 42, 35, 187, 0, 6, 6, 176, 5, 194, 231, 190, 209, 47, 6, 176, 5, 12, 168, 209, 253, 47, 62, 176, 5, 218, 168, 190, + 0, 110, 110, 176, 5, 42, 35, 28, 180, 110, 62, 176, 5, 198, 231, 112, 0, 110, 62, 176, 5, 218, 229, 112, 20, 110, 62, 176, 5, 218, 120, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 99, 12, 28, 60, 0, 141, 6, 176, 99, 198, 42, 111, 0, 234, 6, 176, 5, 124, 92, 24, 0, 54, 62, 176, 5, 198, 42, 209, + 0, 140, 6, 176, 5, 11, 111, 111, 0, 141, 6, 176, 5, 203, 76, 28, 0, 234, 6, 176, 219, 218, 23, 28, 0, 54, 62, 176, 5, 198, 120, 180, + 0, 172, 74, 176, 5, 42, 231, 65, 43, 47, 6, 176, 5, 12, 168, 0, 0, 54, 62, 176, 5, 11, 229, 28, 0, 110, 62, 176, 5, 42, 120, 209, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 94, 172, 74, 176, 5, 42, 35, 209, 148, 47, 6, 176, 5, 198, 231, 46, 0, 54, 6, 176, 5, 42, 157, 65, 227, 110, 110, 176, 5, 218, 168, 112, + 0, 172, 74, 176, 5, 12, 168, 253, 0, 47, 6, 176, 5, 198, 168, 209, 154, 6, 62, 176, 5, 198, 35, 190, 0, 110, 62, 176, 5, 218, 28, 0, + 167, 172, 110, 176, 5, 42, 168, 209, 93, 47, 62, 176, 5, 218, 168, 209, 120, 74, 62, 176, 5, 218, 120, 24, 0, 110, 62, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 18, 53, 20, 221, 0, 140, 6, 176, 5, 203, 232, 0, 0, 234, 6, 176, 208, 195, 46, 253, 0, 47, 62, 176, 5, 23, 46, 163, + 0, 140, 6, 176, 5, 195, 231, 111, 0, 141, 6, 176, 5, 198, 20, 111, 0, 140, 6, 176, 5, 72, 195, 112, 198, 172, 62, 176, 5, 42, 92, 111, + 0, 47, 62, 176, 5, 42, 229, 60, 0, 47, 74, 176, 5, 11, 35, 108, 0, 54, 62, 176, 5, 11, 187, 137, 42, 110, 110, 176, 5, 42, 167, 24, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 74, 176, 5, 92, 35, 112, 0, 47, 74, 176, 5, 194, 229, 157, 0, 110, 62, 176, 5, 72, 109, 231, 0, 110, 62, 176, 5, 218, 168, 46, + 0, 54, 62, 176, 99, 11, 229, 187, 0, 74, 62, 176, 5, 11, 221, 46, 0, 74, 62, 176, 99, 42, 72, 28, 0, 110, 62, 176, 5, 42, 38, 231, + 0, 123, 110, 176, 5, 42, 168, 112, 0, 74, 62, 176, 5, 218, 38, 112, 0, 110, 62, 176, 5, 11, 120, 163, 166, 110, 110, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 176, 161, 72, 229, 157, 0, 0, 74, 176, 219, 170, 161, 187, 0, 0, 74, 176, 5, 166, 192, 7, 0, 0, 110, 176, 5, 218, 168, 35, + 0, 0, 74, 176, 99, 198, 42, 76, 0, 0, 74, 176, 99, 194, 243, 231, 0, 0, 6, 176, 219, 203, 28, 126, 112, 0, 110, 176, 5, 12, 168, 112, + 0, 0, 110, 176, 5, 42, 231, 111, 0, 0, 110, 176, 5, 42, 35, 46, 0, 0, 110, 176, 5, 92, 229, 60, 168, 0, 110, 176, 5, 42, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 176, 5, 92, 38, 111, 0, 0, 74, 176, 5, 42, 229, 111, 0, 0, 110, 176, 5, 218, 120, 46, 12, 0, 110, 176, 5, 218, 168, 209, + 0, 0, 110, 176, 99, 42, 38, 38, 0, 0, 110, 176, 5, 218, 229, 111, 0, 0, 74, 176, 5, 11, 229, 46, 44, 0, 110, 176, 5, 218, 168, 209, + 120, 0, 110, 176, 5, 42, 168, 60, 231, 0, 110, 176, 5, 42, 168, 112, 112, 0, 110, 176, 5, 218, 168, 0, 28, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 207, 159, 6, 176, 5, 11, 35, 65, 0, 159, 6, 176, 31, 11, 28, 253, 0, 234, 22, 75, 5, 23, 194, 76, 0, 62, 62, 176, 216, 11, 244, 120, + 128, 140, 6, 176, 99, 180, 35, 35, 0, 234, 62, 176, 5, 203, 231, 24, 0, 234, 6, 75, 122, 99, 244, 190, 0, 74, 62, 30, 5, 42, 92, 0, + 28, 54, 62, 176, 5, 92, 35, 46, 0, 47, 62, 75, 5, 218, 120, 187, 0, 62, 62, 176, 5, 166, 120, 108, 0, 110, 110, 176, 5, 218, 35, 190, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 181, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 213, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 16, 107, 222, 207, 224, 168, 0, 0, 159, 21, 89, 18, 180, 224, 203, 0, 141, 21, 30, 66, 122, 73, 229, 0, 172, 6, 176, 241, 23, 165, 17, + 0, 58, 10, 176, 18, 92, 230, 0, 0, 58, 6, 174, 1, 213, 167, 0, 0, 234, 2, 237, 5, 11, 165, 0, 0, 26, 9, 91, 177, 42, 157, 53, + 0, 123, 54, 254, 56, 72, 170, 23, 0, 134, 6, 62, 11, 128, 195, 218, 0, 9, 21, 91, 251, 187, 76, 70, 0, 123, 254, 30, 99, 92, 23, 108, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 246, 62, 79, 5, 207, 198, 16, 0, 159, 74, 89, 18, 128, 124, 124, 0, 123, 62, 241, 170, 98, 62, 223, 0, 172, 74, 176, 5, 12, 112, 50, + 0, 47, 56, 233, 127, 180, 174, 111, 0, 215, 21, 4, 122, 92, 253, 0, 0, 133, 62, 44, 251, 219, 19, 0, 0, 107, 110, 176, 5, 203, 203, 0, + 0, 238, 62, 56, 192, 166, 35, 46, 0, 110, 62, 241, 122, 166, 128, 209, 0, 74, 62, 30, 5, 198, 76, 38, 0, 62, 110, 176, 5, 23, 35, 163, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 234, 6, 94, 177, 80, 202, 0, 0, 82, 6, 56, 32, 86, 0, 0, 0, 140, 134, 32, 232, 173, 0, 0, 0, 119, 74, 49, 219, 0, 0, 0, + 0, 234, 110, 142, 176, 124, 118, 190, 0, 234, 10, 164, 71, 153, 0, 0, 0, 140, 117, 174, 210, 83, 194, 70, 0, 62, 134, 30, 148, 107, 0, 88, + 0, 123, 123, 69, 18, 42, 110, 220, 0, 145, 74, 179, 75, 209, 72, 0, 0, 71, 223, 49, 5, 208, 0, 0, 0, 62, 110, 30, 218, 198, 38, 168, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 54, 110, 89, 170, 166, 86, 5, 0, 123, 6, 200, 243, 207, 32, 41, 0, 235, 110, 142, 5, 124, 3, 0, 0, 74, 62, 75, 156, 128, 218, 0, + 0, 118, 62, 30, 56, 153, 23, 151, 0, 62, 6, 100, 138, 218, 0, 0, 0, 21, 6, 240, 176, 32, 120, 198, 0, 6, 3, 176, 194, 99, 187, 126, + 35, 238, 110, 176, 18, 11, 231, 46, 126, 74, 62, 176, 122, 203, 244, 221, 0, 10, 74, 75, 251, 72, 76, 17, 0, 62, 62, 176, 5, 42, 120, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 30, 18, 180, 35, 229, 0, 141, 6, 176, 207, 72, 38, 187, 0, 162, 2, 176, 73, 30, 114, 179, 0, 139, 110, 200, 31, 210, 46, 12, + 240, 140, 6, 91, 207, 128, 229, 76, 0, 141, 6, 179, 210, 85, 23, 209, 0, 214, 6, 30, 217, 218, 153, 209, 0, 123, 139, 177, 217, 28, 24, 0, + 0, 246, 74, 176, 18, 11, 28, 24, 0, 47, 62, 176, 99, 218, 99, 0, 0, 6, 62, 176, 3, 86, 167, 60, 0, 110, 74, 241, 56, 180, 168, 163, + 213, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, + 0, 213, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 213, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 0, 141, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 162, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 107, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 222, 0, 0, 0, 0, 0, 0, 0, + 216, 246, 74, 75, 5, 11, 111, 112, 0, 47, 6, 176, 5, 166, 76, 35, 0, 6, 6, 176, 5, 180, 35, 187, 0, 74, 62, 176, 5, 194, 198, 50, + 8, 54, 62, 176, 161, 42, 195, 0, 76, 6, 6, 176, 122, 11, 203, 28, 0, 6, 6, 179, 243, 180, 38, 24, 42, 74, 62, 176, 204, 203, 111, 65, + 50, 54, 74, 176, 99, 42, 168, 209, 117, 74, 62, 176, 5, 92, 229, 65, 221, 74, 62, 176, 5, 42, 157, 0, 0, 110, 110, 176, 5, 128, 120, 41, + 181, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 26, 110, 204, 216, 233, 180, 0, 0, 10, 6, 31, 153, 11, 228, 0, 0, 62, 9, 156, 18, 64, 96, 0, 0, 110, 176, 200, 198, 145, 221, + 0, 0, 6, 222, 169, 156, 170, 0, 0, 0, 2, 237, 75, 218, 53, 190, 0, 0, 6, 91, 30, 198, 10, 229, 0, 0, 62, 30, 156, 72, 203, 46, + 0, 0, 19, 176, 18, 165, 194, 42, 0, 0, 74, 30, 99, 62, 35, 218, 0, 0, 6, 217, 23, 218, 12, 190, 144, 0, 62, 176, 5, 144, 168, 221, + 1, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 62, 89, 192, 161, 166, 111, 0, 0, 74, 176, 216, 180, 198, 112, 121, 0, 62, 13, 31, 23, 11, 112, 21, 0, 62, 176, 243, 218, 231, 60, + 0, 0, 107, 13, 192, 166, 109, 187, 0, 0, 6, 74, 56, 75, 195, 144, 0, 0, 6, 176, 208, 99, 23, 231, 0, 0, 62, 176, 122, 23, 111, 163, + 236, 0, 233, 56, 161, 166, 165, 111, 0, 0, 62, 176, 5, 109, 35, 168, 0, 0, 110, 176, 18, 180, 38, 24, 0, 0, 110, 176, 5, 218, 168, 17, + 224, 0, 0, 0, 0, 0, 0, 0, 205, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 6, 176, 207, 195, 229, 253, 0, 159, 6, 176, 5, 198, 28, 221, 0, 140, 6, 176, 5, 195, 187, 157, 0, 110, 62, 176, 5, 11, 28, 46, + 0, 140, 6, 176, 99, 53, 244, 187, 0, 234, 6, 176, 207, 126, 203, 120, 0, 234, 6, 176, 99, 12, 99, 209, 0, 62, 62, 176, 5, 86, 157, 50, + 0, 54, 62, 176, 5, 42, 11, 24, 0, 74, 62, 176, 99, 5, 38, 221, 0, 74, 62, 30, 5, 72, 28, 35, 229, 110, 62, 176, 5, 128, 46, 65, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 141, 6, 176, 18, 161, 248, 65, 0, 140, 62, 217, 5, 203, 235, 65, 0, 141, 6, 75, 243, 92, 195, 28, 0, 47, 62, 176, 5, 11, 35, 253, + 0, 140, 6, 3, 138, 170, 253, 31, 0, 234, 6, 179, 122, 53, 0, 27, 0, 141, 6, 75, 219, 11, 35, 18, 66, 159, 62, 176, 5, 12, 24, 209, + 0, 123, 54, 176, 204, 210, 162, 12, 0, 62, 6, 69, 3, 92, 59, 46, 0, 54, 62, 91, 86, 42, 253, 182, 0, 74, 62, 176, 5, 72, 23, 108, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 74, 6, 176, 5, 92, 168, 231, 0, 47, 74, 176, 5, 156, 126, 41, 221, 74, 62, 176, 232, 5, 204, 11, 0, 74, 62, 176, 5, 42, 157, 190, + 0, 74, 62, 176, 156, 11, 92, 157, 0, 246, 6, 75, 251, 92, 248, 94, 0, 6, 110, 176, 138, 166, 35, 46, 0, 74, 62, 176, 5, 42, 35, 209, + 0, 62, 62, 176, 161, 92, 187, 190, 0, 62, 62, 176, 5, 11, 35, 108, 0, 110, 62, 176, 5, 42, 28, 116, 190, 62, 62, 176, 5, 218, 76, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 45, 6, 100, 5, 11, 20, 60, 0, 141, 74, 197, 207, 76, 46, 167, 0, 140, 6, 91, 249, 153, 218, 27, 0, 6, 62, 237, 208, 122, 221, 79, + 0, 141, 74, 176, 241, 198, 120, 218, 0, 140, 6, 94, 166, 161, 157, 253, 0, 234, 6, 176, 3, 243, 26, 92, 0, 6, 74, 30, 208, 120, 226, 166, + 0, 123, 62, 75, 156, 218, 229, 50, 0, 74, 62, 176, 208, 42, 86, 229, 0, 6, 62, 179, 156, 218, 253, 221, 0, 54, 62, 176, 5, 72, 112, 221, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 14, 47, 74, 176, 99, 180, 50, 163, 0, 47, 62, 75, 207, 156, 187, 251, 0, 6, 110, 176, 124, 5, 180, 53, 0, 123, 62, 176, 5, 195, 167, 157, + 0, 140, 62, 75, 5, 53, 195, 180, 0, 62, 62, 75, 18, 12, 111, 190, 0, 234, 2, 176, 219, 120, 17, 209, 0, 74, 62, 176, 5, 12, 168, 190, + 0, 172, 62, 176, 5, 128, 168, 38, 0, 54, 110, 176, 99, 203, 157, 17, 0, 110, 62, 176, 31, 170, 167, 167, 243, 110, 62, 176, 5, 128, 167, 65, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 75, 18, 92, 23, 194, 0, 159, 6, 100, 156, 243, 229, 228, 0, 234, 6, 44, 243, 180, 0, 97, 0, 107, 62, 176, 5, 166, 109, 18, + 0, 140, 6, 75, 243, 128, 60, 221, 0, 234, 6, 237, 5, 170, 100, 2, 0, 140, 6, 176, 194, 12, 49, 229, 0, 74, 6, 49, 211, 3, 243, 12, + 0, 54, 62, 176, 5, 198, 38, 35, 0, 47, 62, 30, 5, 194, 204, 203, 0, 74, 62, 3, 30, 38, 231, 218, 0, 74, 62, 176, 99, 195, 231, 166, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 74, 176, 99, 23, 35, 17, 209, 47, 6, 75, 5, 11, 23, 60, 0, 47, 6, 176, 207, 156, 38, 166, 193, 110, 62, 176, 5, 109, 231, 28, + 0, 54, 74, 176, 99, 218, 38, 46, 0, 140, 6, 176, 156, 42, 229, 60, 132, 6, 62, 75, 5, 92, 23, 76, 0, 110, 62, 176, 5, 42, 229, 120, + 102, 110, 62, 176, 5, 42, 35, 0, 214, 110, 62, 176, 5, 218, 168, 209, 0, 62, 62, 176, 5, 92, 231, 221, 69, 62, 62, 176, 5, 218, 167, 65, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 30, 124, 42, 23, 244, 0, 0, 74, 91, 161, 12, 229, 112, 0, 0, 6, 75, 5, 219, 46, 12, 38, 0, 110, 176, 5, 128, 76, 17, + 0, 0, 62, 176, 243, 208, 5, 253, 0, 0, 6, 75, 5, 72, 209, 253, 0, 0, 6, 75, 5, 76, 229, 0, 0, 0, 62, 176, 5, 170, 231, 163, + 0, 0, 110, 176, 99, 11, 35, 167, 0, 0, 110, 176, 5, 92, 180, 157, 0, 0, 110, 176, 5, 198, 203, 165, 229, 0, 62, 176, 5, 128, 20, 60, + 181, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 143, 0, 74, 176, 99, 170, 166, 221, 214, 0, 74, 176, 99, 170, 111, 231, 0, 0, 74, 176, 99, 92, 195, 203, 171, 0, 110, 176, 5, 42, 187, 24, + 0, 0, 62, 176, 161, 109, 20, 187, 0, 0, 74, 176, 5, 194, 74, 76, 0, 0, 62, 176, 30, 35, 231, 111, 46, 0, 62, 176, 5, 42, 76, 0, + 20, 0, 110, 176, 192, 166, 35, 46, 0, 0, 110, 176, 5, 92, 38, 111, 0, 0, 110, 176, 5, 12, 126, 46, 229, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 187, 159, 6, 75, 207, 11, 35, 221, 24, 159, 6, 75, 207, 195, 35, 209, 144, 234, 6, 176, 99, 23, 180, 60, 0, 110, 62, 176, 5, 42, 168, 209, + 112, 140, 6, 176, 5, 11, 35, 221, 86, 234, 6, 176, 5, 195, 28, 0, 69, 234, 6, 176, 5, 203, 76, 46, 0, 74, 62, 176, 156, 128, 35, 209, + 168, 54, 62, 176, 5, 42, 35, 209, 92, 47, 62, 176, 5, 42, 35, 65, 0, 74, 62, 176, 5, 11, 231, 190, 187, 110, 62, 176, 5, 218, 167, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 100, 122, 92, 165, 112, 0, 234, 6, 69, 127, 16, 165, 194, 0, 234, 6, 254, 243, 38, 74, 20, 0, 47, 62, 176, 5, 53, 180, 11, + 0, 140, 62, 176, 99, 180, 251, 21, 0, 140, 62, 176, 91, 109, 111, 244, 0, 234, 6, 100, 31, 99, 12, 109, 0, 74, 62, 30, 243, 203, 46, 112, + 0, 238, 110, 176, 99, 99, 12, 28, 0, 47, 6, 176, 5, 195, 128, 76, 0, 47, 62, 179, 166, 12, 41, 108, 0, 74, 62, 176, 5, 198, 20, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 54, 62, 3, 18, 42, 126, 108, 0, 47, 74, 176, 99, 170, 111, 28, 0, 110, 74, 176, 5, 128, 76, 244, 0, 74, 62, 176, 99, 128, 161, 187, + 0, 74, 110, 176, 31, 192, 198, 187, 0, 214, 6, 30, 31, 144, 174, 0, 0, 134, 62, 217, 249, 180, 42, 72, 0, 74, 62, 176, 156, 194, 249, 229, + 0, 172, 110, 176, 192, 166, 35, 221, 0, 123, 62, 176, 243, 92, 76, 24, 0, 74, 62, 3, 99, 72, 168, 209, 0, 110, 62, 176, 5, 218, 157, 221, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 30, 177, 176, 180, 126, 169, 140, 6, 179, 124, 120, 99, 229, 0, 234, 6, 30, 13, 112, 34, 187, 0, 47, 74, 176, 243, 128, 0, 21, + 0, 140, 6, 176, 176, 11, 35, 35, 0, 141, 6, 179, 129, 23, 218, 3, 0, 140, 6, 174, 4, 192, 91, 60, 0, 6, 9, 30, 99, 23, 0, 157, + 0, 47, 74, 176, 156, 203, 38, 111, 0, 47, 6, 176, 99, 194, 229, 108, 0, 54, 6, 30, 5, 72, 72, 20, 0, 47, 62, 176, 243, 12, 218, 190, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 54, 74, 176, 31, 109, 167, 24, 0, 47, 62, 91, 5, 211, 99, 50, 0, 107, 62, 176, 156, 161, 218, 29, 0, 74, 62, 176, 156, 12, 20, 20, + 0, 54, 110, 176, 73, 180, 144, 28, 0, 47, 6, 197, 207, 35, 244, 76, 0, 123, 6, 176, 122, 5, 60, 218, 0, 6, 62, 75, 99, 23, 168, 209, + 0, 172, 62, 176, 5, 42, 35, 209, 0, 47, 62, 75, 5, 198, 76, 209, 95, 74, 62, 176, 5, 198, 42, 120, 229, 110, 110, 176, 5, 42, 231, 50, + 238, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 251, 140, 6, 176, 5, 11, 229, 111, 42, 141, 6, 176, 5, 11, 168, 65, 0, 234, 6, 217, 219, 210, 20, 11, 0, 74, 62, 176, 18, 5, 76, 108, + 194, 140, 6, 176, 5, 11, 35, 111, 0, 141, 6, 176, 219, 180, 229, 231, 209, 234, 6, 217, 249, 194, 109, 209, 0, 74, 62, 176, 99, 92, 144, 183, + 163, 246, 74, 176, 5, 42, 35, 209, 0, 47, 6, 176, 5, 42, 231, 112, 0, 74, 62, 176, 5, 12, 126, 112, 0, 110, 62, 176, 5, 12, 20, 65, + 82, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 137, 246, 74, 176, 5, 42, 35, 221, 189, 47, 6, 176, 5, 11, 35, 65, 24, 74, 6, 176, 5, 42, 38, 60, 0, 110, 62, 176, 5, 42, 120, 0, + 244, 54, 74, 176, 5, 42, 35, 46, 0, 47, 6, 176, 5, 42, 38, 24, 0, 6, 62, 176, 5, 42, 42, 163, 0, 74, 62, 176, 5, 218, 35, 209, + 190, 238, 62, 176, 5, 42, 168, 209, 168, 47, 62, 176, 5, 42, 168, 209, 180, 62, 62, 176, 5, 42, 168, 50, 198, 110, 62, 176, 5, 218, 167, 50, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 176, 56, 203, 229, 38, 0, 0, 62, 176, 219, 42, 28, 7, 0, 0, 62, 179, 251, 42, 120, 163, 0, 0, 62, 176, 156, 53, 157, 38, + 0, 0, 74, 176, 18, 69, 46, 170, 0, 0, 74, 44, 5, 53, 11, 120, 0, 0, 6, 176, 156, 195, 120, 120, 195, 0, 74, 176, 156, 5, 218, 183, + 0, 0, 110, 176, 161, 42, 167, 209, 112, 0, 62, 176, 99, 42, 187, 187, 0, 0, 6, 176, 99, 198, 35, 24, 183, 0, 110, 176, 5, 218, 168, 190, + 238, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 176, 192, 42, 35, 165, 0, 0, 110, 176, 5, 180, 229, 157, 0, 0, 62, 75, 156, 12, 120, 112, 0, 0, 110, 176, 5, 92, 187, 112, + 0, 0, 74, 176, 192, 198, 111, 126, 0, 0, 62, 30, 208, 180, 157, 218, 0, 0, 6, 176, 99, 92, 187, 41, 0, 0, 110, 176, 5, 11, 231, 221, + 0, 0, 110, 176, 161, 166, 165, 183, 244, 0, 110, 176, 5, 42, 157, 0, 0, 0, 62, 176, 99, 42, 165, 0, 167, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 126, 159, 6, 176, 207, 195, 35, 221, 72, 159, 6, 176, 219, 11, 35, 0, 0, 234, 6, 176, 5, 12, 231, 112, 82, 110, 62, 176, 5, 218, 76, 111, + 0, 140, 6, 176, 5, 11, 231, 65, 244, 234, 6, 176, 5, 198, 168, 209, 183, 234, 6, 176, 5, 198, 157, 50, 0, 74, 62, 176, 99, 218, 76, 244, + 143, 54, 62, 176, 5, 42, 168, 50, 0, 74, 62, 176, 5, 42, 231, 65, 218, 74, 62, 176, 5, 218, 28, 209, 190, 110, 62, 176, 5, 218, 120, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 208, 23, 120, 111, 0, 140, 6, 176, 31, 42, 38, 190, 0, 141, 6, 176, 5, 12, 35, 111, 0, 47, 6, 176, 5, 218, 168, 24, + 0, 140, 6, 176, 99, 11, 251, 53, 0, 140, 6, 176, 156, 12, 38, 221, 0, 140, 6, 176, 99, 12, 35, 65, 0, 47, 62, 176, 5, 218, 111, 65, + 0, 47, 62, 176, 5, 42, 168, 209, 0, 47, 62, 176, 5, 194, 229, 0, 0, 54, 62, 176, 99, 218, 187, 28, 0, 110, 62, 176, 5, 218, 168, 112, + 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 62, 176, 5, 218, 229, 183, 0, 47, 62, 176, 5, 195, 229, 41, 0, 74, 74, 176, 156, 11, 157, 46, 0, 110, 62, 176, 5, 42, 20, 65, + 207, 110, 62, 176, 5, 11, 229, 116, 1, 6, 62, 176, 5, 42, 229, 41, 0, 47, 62, 176, 99, 42, 229, 65, 168, 110, 62, 176, 5, 198, 76, 253, + 244, 123, 62, 176, 5, 42, 168, 46, 0, 110, 62, 176, 5, 42, 229, 168, 0, 110, 62, 176, 5, 12, 231, 0, 11, 110, 62, 176, 5, 218, 120, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 156, 198, 198, 137, 0, 234, 6, 176, 208, 92, 5, 28, 0, 140, 6, 75, 5, 166, 168, 183, 0, 47, 62, 176, 5, 195, 166, 65, + 0, 140, 6, 176, 99, 128, 28, 108, 0, 140, 6, 176, 208, 128, 111, 221, 0, 140, 6, 100, 5, 232, 126, 209, 0, 54, 62, 176, 5, 42, 231, 120, + 0, 47, 74, 176, 5, 42, 229, 112, 0, 47, 6, 176, 5, 218, 229, 244, 166, 54, 62, 176, 5, 218, 35, 221, 254, 110, 62, 176, 5, 218, 120, 244, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 62, 176, 99, 11, 231, 46, 0, 47, 62, 176, 5, 195, 168, 157, 110, 74, 62, 176, 5, 12, 157, 157, 0, 74, 110, 176, 5, 218, 38, 167, + 7, 54, 62, 176, 5, 218, 120, 28, 0, 74, 62, 176, 156, 92, 111, 0, 0, 74, 62, 176, 5, 218, 231, 60, 0, 74, 62, 176, 5, 198, 168, 137, + 0, 110, 62, 176, 5, 198, 35, 65, 37, 110, 62, 176, 5, 42, 187, 24, 0, 74, 62, 176, 5, 218, 120, 60, 0, 110, 62, 176, 5, 218, 120, 253, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 195, 229, 112, 0, 141, 6, 176, 5, 11, 229, 116, 0, 234, 6, 176, 86, 195, 120, 112, 0, 74, 62, 176, 5, 218, 198, 168, + 38, 140, 6, 176, 5, 42, 35, 137, 0, 141, 6, 176, 207, 72, 231, 244, 0, 234, 6, 176, 5, 42, 229, 187, 0, 110, 62, 176, 5, 198, 198, 28, + 0, 246, 74, 176, 5, 42, 168, 221, 0, 47, 6, 176, 5, 42, 168, 209, 0, 74, 62, 176, 5, 42, 167, 209, 0, 110, 62, 176, 5, 198, 76, 112, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 126, 246, 74, 176, 5, 42, 229, 221, 0, 47, 6, 176, 5, 11, 231, 112, 0, 54, 62, 176, 99, 42, 28, 46, 0, 110, 62, 176, 5, 218, 168, 112, + 0, 54, 74, 176, 5, 42, 35, 112, 0, 47, 6, 176, 5, 42, 35, 60, 3, 6, 6, 176, 5, 218, 231, 41, 0, 110, 62, 176, 5, 42, 168, 0, + 209, 172, 62, 176, 5, 42, 168, 209, 194, 47, 62, 176, 5, 218, 168, 244, 207, 74, 62, 176, 5, 42, 168, 209, 92, 110, 62, 176, 5, 218, 168, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, + 0, 250, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 176, 5, 194, 167, 209, 0, 0, 6, 176, 5, 109, 231, 111, 0, 0, 74, 176, 124, 92, 72, 65, 0, 0, 74, 176, 5, 42, 28, 209, + 0, 0, 74, 176, 5, 42, 28, 24, 0, 0, 74, 176, 219, 194, 35, 35, 0, 0, 74, 176, 5, 12, 120, 209, 86, 0, 74, 176, 5, 42, 231, 60, + 0, 0, 110, 176, 5, 42, 167, 46, 0, 0, 110, 176, 5, 11, 168, 253, 65, 0, 110, 176, 5, 42, 231, 116, 20, 0, 110, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 176, 5, 92, 166, 183, 0, 0, 110, 176, 5, 42, 229, 111, 0, 0, 110, 176, 5, 11, 38, 112, 23, 0, 110, 176, 5, 218, 168, 209, + 0, 0, 110, 176, 5, 42, 38, 46, 0, 0, 110, 176, 5, 11, 229, 111, 0, 0, 110, 176, 5, 11, 126, 116, 28, 0, 110, 176, 5, 218, 168, 112, + 48, 0, 110, 176, 5, 42, 165, 112, 0, 0, 110, 176, 5, 42, 229, 0, 229, 0, 110, 176, 5, 42, 38, 65, 120, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 179, 159, 6, 176, 5, 11, 229, 137, 0, 159, 6, 176, 207, 195, 187, 229, 166, 234, 6, 176, 156, 218, 46, 221, 0, 74, 62, 176, 99, 170, 35, 60, + 128, 140, 62, 75, 99, 180, 38, 244, 0, 234, 6, 176, 124, 218, 53, 229, 84, 234, 6, 176, 99, 218, 50, 0, 0, 74, 62, 176, 156, 86, 168, 0, + 201, 54, 74, 176, 99, 42, 168, 112, 0, 74, 62, 176, 5, 42, 229, 65, 0, 74, 62, 176, 5, 180, 28, 190, 8, 110, 62, 176, 5, 218, 187, 190, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 74, 176, 232, 12, 78, 156, 0, 141, 6, 30, 219, 192, 92, 190, 0, 141, 6, 49, 99, 92, 203, 165, 0, 47, 62, 30, 5, 218, 231, 76, + 0, 140, 74, 40, 232, 198, 76, 15, 0, 141, 6, 75, 207, 203, 23, 190, 0, 234, 6, 49, 138, 23, 215, 0, 0, 54, 62, 40, 192, 72, 92, 76, + 0, 54, 110, 176, 161, 11, 94, 60, 0, 47, 74, 237, 208, 12, 177, 151, 0, 123, 6, 32, 21, 12, 50, 166, 0, 110, 62, 176, 5, 92, 76, 244, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 74, 176, 192, 166, 76, 163, 0, 47, 62, 176, 99, 12, 111, 190, 0, 74, 74, 176, 99, 166, 218, 0, 4, 54, 62, 176, 5, 42, 28, 209, + 198, 54, 62, 89, 192, 166, 46, 35, 0, 6, 62, 176, 207, 166, 165, 157, 0, 234, 6, 75, 156, 144, 12, 24, 0, 110, 62, 176, 5, 194, 28, 163, + 108, 110, 110, 89, 161, 166, 35, 111, 0, 74, 62, 176, 99, 92, 229, 163, 21, 6, 62, 176, 156, 53, 38, 65, 167, 110, 62, 176, 5, 218, 167, 253, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 75, 99, 92, 11, 46, 0, 141, 6, 197, 5, 72, 187, 168, 0, 140, 6, 164, 232, 65, 190, 102, 0, 172, 74, 176, 153, 120, 244, 243, + 0, 141, 6, 30, 153, 161, 166, 221, 0, 140, 6, 30, 85, 180, 109, 76, 0, 140, 22, 75, 18, 166, 22, 0, 0, 74, 62, 3, 156, 219, 73, 157, + 0, 47, 62, 176, 99, 92, 167, 249, 0, 123, 62, 176, 243, 92, 11, 5, 0, 242, 22, 49, 100, 144, 168, 168, 144, 6, 62, 176, 5, 72, 120, 17, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 74, 176, 5, 128, 203, 190, 0, 47, 6, 176, 207, 194, 229, 221, 0, 234, 62, 176, 18, 11, 187, 108, 0, 74, 62, 176, 156, 144, 38, 190, + 0, 54, 62, 3, 99, 198, 229, 50, 0, 6, 6, 30, 153, 11, 92, 65, 0, 234, 2, 176, 5, 203, 229, 46, 0, 110, 62, 176, 243, 198, 111, 120, + 0, 172, 62, 176, 5, 42, 231, 17, 0, 47, 62, 176, 5, 170, 187, 0, 0, 74, 62, 176, 156, 23, 76, 0, 0, 110, 62, 176, 5, 218, 120, 60, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 75, 249, 195, 60, 60, 0, 141, 6, 75, 211, 92, 166, 249, 0, 234, 6, 237, 32, 12, 209, 111, 0, 47, 74, 176, 243, 161, 5, 142, + 0, 140, 6, 3, 207, 53, 194, 126, 0, 141, 6, 176, 124, 207, 204, 137, 0, 234, 6, 179, 241, 194, 184, 65, 18, 6, 62, 176, 49, 60, 28, 12, + 0, 246, 74, 176, 99, 11, 35, 60, 0, 47, 62, 176, 31, 195, 231, 112, 0, 21, 62, 75, 31, 122, 195, 89, 0, 123, 62, 176, 5, 218, 111, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 54, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 246, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 224, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 74, 176, 99, 23, 35, 60, 0, 47, 6, 75, 5, 12, 231, 0, 104, 74, 62, 176, 153, 11, 76, 111, 0, 74, 62, 176, 99, 194, 53, 120, + 0, 140, 74, 176, 99, 53, 195, 209, 0, 47, 6, 176, 5, 170, 229, 209, 0, 6, 6, 176, 156, 53, 157, 50, 0, 74, 62, 176, 5, 53, 170, 209, + 163, 172, 62, 176, 99, 42, 35, 112, 0, 74, 62, 176, 5, 218, 38, 0, 0, 110, 62, 176, 5, 42, 28, 24, 0, 110, 62, 176, 5, 218, 120, 190, + 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 74, 200, 124, 195, 28, 229, 0, 0, 6, 176, 219, 60, 229, 0, 0, 0, 74, 75, 208, 56, 24, 255, 0, 0, 62, 176, 99, 109, 28, 0, + 0, 0, 74, 30, 5, 218, 120, 165, 0, 0, 74, 75, 153, 111, 207, 253, 0, 0, 6, 179, 243, 109, 22, 38, 0, 0, 62, 176, 5, 218, 167, 12, + 0, 0, 74, 176, 161, 109, 111, 50, 0, 0, 74, 75, 156, 92, 187, 170, 140, 0, 110, 91, 122, 194, 180, 35, 65, 0, 62, 176, 5, 23, 168, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 24, 0, 74, 176, 192, 92, 165, 60, 0, 0, 74, 176, 219, 195, 229, 111, 0, 0, 74, 176, 99, 53, 180, 24, 49, 0, 110, 176, 5, 42, 35, 112, + 0, 0, 62, 176, 99, 166, 111, 28, 0, 0, 62, 176, 5, 166, 180, 65, 0, 0, 6, 176, 5, 53, 229, 253, 2, 0, 110, 176, 5, 218, 168, 60, + 35, 0, 110, 176, 192, 166, 165, 183, 118, 0, 62, 176, 5, 42, 165, 46, 215, 0, 62, 176, 5, 42, 168, 116, 12, 0, 110, 176, 5, 218, 120, 190, + 63, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 6, 176, 207, 195, 229, 60, 11, 159, 6, 75, 207, 11, 229, 112, 0, 140, 6, 176, 5, 218, 111, 60, 0, 54, 62, 176, 5, 42, 168, 221, + 0, 140, 6, 176, 99, 92, 53, 190, 0, 234, 6, 176, 5, 11, 35, 28, 0, 234, 6, 176, 5, 92, 203, 126, 23, 74, 62, 176, 5, 42, 76, 190, + 0, 54, 62, 176, 99, 198, 35, 111, 0, 74, 62, 176, 5, 180, 38, 112, 0, 110, 62, 176, 5, 218, 231, 112, 221, 110, 62, 176, 5, 218, 120, 190, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 42, 46, 126, 50, 140, 6, 176, 5, 11, 42, 46, 0, 140, 6, 176, 5, 12, 229, 229, 253, 172, 110, 176, 5, 42, 35, 209, + 0, 234, 6, 176, 122, 211, 35, 210, 0, 140, 6, 176, 208, 42, 165, 112, 0, 141, 6, 176, 219, 198, 20, 253, 0, 47, 6, 176, 5, 42, 35, 209, + 0, 110, 110, 176, 5, 42, 35, 229, 0, 110, 110, 176, 5, 218, 229, 28, 0, 110, 110, 176, 5, 92, 92, 190, 0, 110, 110, 176, 5, 218, 168, 46, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 120, 47, 110, 176, 5, 42, 35, 221, 237, 172, 110, 176, 5, 11, 35, 60, 0, 172, 110, 176, 5, 42, 229, 221, 190, 172, 110, 176, 5, 218, 168, 209, + 0, 54, 110, 176, 5, 198, 168, 28, 0, 110, 62, 176, 5, 42, 187, 209, 0, 47, 6, 176, 5, 42, 229, 46, 137, 47, 62, 176, 5, 218, 120, 209, + 38, 110, 110, 176, 5, 42, 165, 112, 0, 110, 110, 176, 5, 92, 229, 221, 16, 110, 110, 176, 5, 42, 38, 60, 65, 110, 110, 176, 5, 218, 120, 190, + 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 159, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 150, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 11, 168, 231, 0, 140, 6, 176, 5, 12, 231, 38, 0, 140, 6, 176, 5, 12, 203, 157, 0, 47, 110, 176, 5, 42, 28, 112, + 0, 140, 6, 176, 5, 195, 12, 231, 0, 234, 6, 176, 176, 218, 111, 41, 0, 140, 6, 176, 5, 92, 229, 128, 0, 110, 110, 176, 5, 218, 126, 221, + 0, 172, 110, 176, 5, 5, 126, 221, 0, 172, 110, 176, 5, 72, 218, 221, 0, 123, 62, 176, 243, 11, 195, 0, 0, 110, 62, 176, 5, 218, 168, 112, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 209, 47, 6, 176, 5, 42, 111, 120, 0, 159, 6, 176, 5, 42, 229, 209, 0, 54, 110, 176, 5, 198, 38, 0, 10, 54, 110, 176, 5, 218, 168, 209, + 0, 54, 110, 176, 5, 42, 35, 253, 0, 54, 6, 176, 5, 12, 35, 41, 0, 110, 6, 176, 5, 218, 35, 46, 0, 110, 110, 176, 5, 42, 20, 244, + 0, 110, 110, 176, 5, 218, 167, 209, 0, 110, 110, 176, 5, 198, 35, 190, 0, 110, 110, 176, 5, 218, 120, 209, 198, 110, 110, 176, 5, 218, 168, 163, + 51, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 28, 140, 6, 176, 5, 11, 168, 198, 0, 140, 6, 176, 208, 198, 229, 187, 0, 140, 6, 176, 211, 12, 229, 229, 0, 47, 62, 176, 5, 12, 38, 65, + 0, 140, 6, 176, 219, 176, 194, 221, 0, 234, 6, 176, 5, 156, 76, 65, 0, 234, 6, 176, 122, 72, 229, 65, 0, 47, 62, 176, 5, 11, 28, 65, + 190, 172, 110, 176, 5, 195, 42, 209, 0, 47, 110, 176, 99, 218, 38, 60, 0, 74, 62, 176, 5, 11, 209, 0, 0, 110, 62, 176, 5, 42, 231, 76, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 212, 47, 6, 176, 5, 11, 157, 46, 209, 47, 6, 176, 5, 42, 229, 60, 0, 140, 6, 176, 5, 12, 76, 46, 0, 54, 110, 176, 5, 42, 168, 209, + 0, 54, 110, 176, 99, 42, 168, 0, 0, 110, 6, 176, 5, 42, 229, 0, 0, 234, 6, 176, 5, 12, 38, 46, 0, 110, 110, 176, 5, 218, 187, 46, + 0, 172, 110, 176, 5, 42, 28, 244, 0, 110, 110, 176, 5, 42, 231, 50, 0, 110, 62, 176, 5, 11, 38, 244, 245, 110, 110, 176, 5, 218, 120, 190, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 176, 5, 11, 187, 53, 0, 0, 110, 176, 5, 195, 53, 126, 0, 0, 110, 176, 5, 11, 168, 253, 8, 0, 110, 176, 5, 42, 168, 46, + 46, 0, 110, 176, 99, 53, 42, 108, 0, 0, 110, 176, 99, 218, 168, 168, 0, 0, 6, 176, 5, 42, 180, 46, 0, 0, 110, 176, 5, 42, 231, 60, + 0, 0, 110, 176, 5, 42, 187, 0, 0, 0, 110, 176, 5, 12, 229, 111, 0, 0, 110, 176, 5, 42, 231, 229, 224, 0, 110, 176, 5, 218, 168, 190, + 51, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 250, 0, 0, 0, 0, 0, 0, 0, + 203, 0, 110, 176, 99, 42, 231, 183, 208, 0, 110, 176, 5, 42, 229, 111, 192, 0, 110, 176, 5, 11, 229, 221, 76, 0, 110, 176, 5, 42, 168, 209, + 0, 0, 110, 176, 5, 198, 35, 60, 0, 0, 110, 176, 5, 11, 229, 112, 213, 0, 110, 176, 5, 12, 38, 46, 168, 0, 110, 176, 5, 218, 120, 190, + 168, 0, 110, 176, 5, 42, 35, 183, 28, 0, 110, 176, 5, 42, 157, 112, 75, 0, 110, 176, 5, 42, 229, 137, 20, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 35, 159, 6, 176, 5, 11, 35, 221, 25, 159, 6, 176, 5, 11, 35, 65, 0, 234, 6, 176, 5, 42, 28, 244, 0, 110, 62, 176, 5, 42, 28, 0, + 112, 140, 6, 176, 5, 42, 35, 46, 209, 234, 6, 176, 5, 11, 231, 112, 0, 234, 6, 176, 5, 198, 229, 46, 0, 110, 62, 176, 5, 42, 231, 24, + 163, 172, 62, 176, 5, 42, 168, 209, 198, 47, 62, 176, 5, 42, 168, 137, 0, 110, 62, 176, 5, 42, 165, 253, 0, 110, 110, 176, 5, 218, 168, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 173, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 198, 168, 168, 0, 140, 6, 176, 18, 194, 28, 168, 0, 140, 6, 176, 207, 12, 231, 35, 120, 47, 6, 176, 5, 218, 120, 190, + 0, 140, 6, 176, 161, 198, 76, 112, 0, 140, 6, 176, 5, 138, 116, 35, 0, 234, 6, 176, 5, 194, 126, 50, 0, 47, 62, 176, 5, 12, 229, 38, + 0, 172, 110, 176, 5, 11, 35, 76, 0, 47, 62, 176, 5, 42, 157, 163, 0, 110, 6, 176, 5, 218, 157, 35, 144, 110, 110, 176, 5, 218, 167, 17, + 173, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 110, 176, 99, 42, 35, 209, 0, 47, 6, 176, 5, 42, 35, 60, 0, 54, 62, 176, 5, 194, 229, 65, 0, 110, 110, 176, 5, 218, 28, 209, + 0, 54, 110, 176, 5, 11, 35, 35, 144, 6, 6, 176, 5, 42, 168, 17, 0, 110, 110, 176, 5, 12, 167, 209, 194, 110, 110, 176, 5, 218, 168, 244, + 158, 172, 110, 176, 5, 42, 35, 209, 126, 110, 62, 176, 5, 42, 231, 65, 0, 110, 62, 176, 5, 42, 231, 65, 194, 110, 110, 176, 5, 218, 120, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 42, 168, 41, 0, 141, 6, 176, 5, 12, 126, 28, 0, 234, 6, 176, 243, 198, 46, 108, 0, 110, 62, 176, 5, 42, 35, 163, + 31, 140, 6, 176, 5, 11, 35, 108, 0, 141, 6, 176, 99, 198, 187, 76, 0, 234, 6, 217, 5, 12, 199, 65, 0, 110, 62, 176, 5, 42, 209, 116, + 112, 172, 110, 176, 5, 42, 35, 108, 35, 47, 6, 176, 5, 42, 167, 0, 0, 54, 6, 176, 5, 218, 168, 112, 0, 110, 110, 176, 5, 218, 168, 209, + 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 110, 176, 5, 42, 35, 0, 112, 47, 6, 176, 5, 42, 168, 60, 0, 54, 62, 176, 5, 42, 231, 253, 16, 110, 62, 176, 5, 218, 35, 221, + 0, 172, 110, 176, 5, 42, 229, 0, 0, 47, 6, 176, 99, 218, 38, 112, 0, 6, 6, 176, 5, 42, 42, 221, 0, 110, 110, 176, 5, 218, 28, 65, + 120, 172, 110, 176, 5, 42, 168, 209, 46, 47, 62, 176, 5, 42, 168, 209, 104, 110, 62, 176, 5, 218, 168, 190, 190, 110, 110, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 46, 140, 6, 176, 5, 42, 126, 65, 0, 141, 6, 176, 5, 195, 157, 65, 0, 234, 6, 176, 5, 42, 23, 60, 0, 110, 62, 176, 5, 218, 20, 76, + 42, 140, 6, 176, 5, 42, 11, 116, 163, 141, 6, 176, 5, 12, 111, 53, 121, 234, 6, 176, 5, 31, 126, 244, 0, 74, 62, 176, 5, 42, 35, 168, + 244, 172, 110, 176, 5, 42, 168, 65, 157, 47, 6, 176, 5, 218, 231, 209, 0, 54, 6, 176, 5, 42, 231, 46, 0, 110, 110, 176, 5, 218, 168, 65, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 74, 176, 5, 42, 168, 65, 65, 47, 6, 176, 5, 42, 35, 209, 0, 110, 6, 176, 5, 198, 231, 50, 0, 110, 62, 176, 5, 218, 120, 209, + 126, 172, 110, 176, 5, 42, 168, 209, 0, 47, 6, 176, 5, 198, 120, 112, 0, 6, 6, 176, 5, 42, 168, 0, 196, 110, 62, 176, 5, 218, 28, 60, + 20, 172, 110, 176, 5, 42, 168, 209, 112, 47, 62, 176, 5, 218, 168, 209, 108, 110, 62, 176, 5, 218, 168, 209, 144, 110, 110, 176, 5, 218, 167, 24, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 110, 176, 5, 12, 35, 112, 0, 0, 6, 176, 5, 11, 12, 65, 0, 0, 6, 176, 5, 11, 111, 28, 0, 0, 110, 176, 5, 42, 38, 137, + 0, 0, 110, 176, 5, 11, 111, 111, 0, 0, 110, 176, 5, 12, 218, 38, 0, 0, 110, 176, 5, 11, 187, 111, 213, 0, 110, 176, 5, 42, 111, 60, + 120, 0, 110, 176, 5, 42, 35, 0, 0, 0, 110, 176, 5, 42, 168, 209, 0, 0, 110, 176, 5, 218, 35, 65, 43, 0, 110, 176, 5, 218, 168, 60, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 110, 176, 99, 42, 35, 168, 0, 0, 110, 176, 5, 42, 229, 28, 0, 0, 110, 176, 5, 42, 168, 112, 25, 0, 110, 176, 5, 218, 168, 46, + 0, 0, 110, 176, 5, 42, 35, 183, 209, 0, 110, 176, 5, 42, 35, 221, 194, 0, 110, 176, 5, 11, 168, 46, 77, 0, 110, 176, 5, 218, 168, 190, + 168, 0, 110, 176, 5, 42, 35, 209, 38, 0, 110, 176, 5, 42, 168, 209, 157, 0, 110, 176, 5, 218, 168, 24, 220, 0, 110, 176, 5, 218, 120, 190, + 224, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 120, 159, 6, 176, 5, 11, 35, 221, 91, 159, 6, 176, 5, 11, 229, 111, 0, 140, 6, 176, 5, 42, 231, 112, 11, 172, 110, 176, 5, 218, 168, 112, + 108, 140, 6, 176, 5, 11, 229, 60, 0, 140, 6, 176, 5, 42, 231, 221, 231, 140, 6, 176, 5, 42, 35, 65, 197, 110, 62, 176, 5, 42, 168, 209, + 200, 172, 110, 176, 5, 42, 168, 112, 24, 47, 62, 176, 5, 42, 168, 112, 109, 110, 62, 176, 5, 218, 167, 137, 209, 110, 110, 176, 5, 218, 120, 190, + 15, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, + 210, 140, 6, 176, 5, 42, 229, 221, 0, 140, 6, 176, 5, 11, 229, 111, 0, 140, 6, 176, 5, 12, 126, 126, 0, 172, 110, 176, 5, 42, 231, 112, + 0, 140, 6, 176, 5, 42, 28, 60, 0, 140, 6, 176, 5, 42, 35, 60, 0, 140, 6, 176, 5, 11, 229, 111, 0, 47, 110, 176, 5, 42, 168, 112, + 38, 172, 110, 176, 5, 42, 35, 221, 112, 172, 110, 176, 5, 42, 168, 0, 251, 123, 110, 176, 5, 42, 168, 221, 190, 110, 110, 176, 5, 218, 168, 60, + 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 173, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 110, 176, 5, 42, 231, 112, 137, 172, 110, 176, 5, 12, 229, 46, 0, 172, 110, 176, 5, 12, 229, 46, 28, 172, 110, 176, 5, 218, 168, 190, + 187, 172, 110, 176, 5, 42, 35, 112, 0, 54, 110, 176, 5, 218, 168, 0, 0, 47, 110, 176, 5, 12, 35, 116, 218, 110, 110, 176, 5, 218, 120, 209, + 78, 110, 110, 176, 5, 218, 168, 0, 12, 110, 110, 176, 5, 218, 168, 112, 216, 110, 110, 176, 5, 218, 168, 190, 111, 110, 110, 176, 5, 218, 120, 190, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 159, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 82, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 42, 126, 60, 0, 140, 6, 176, 5, 11, 231, 126, 0, 140, 6, 176, 5, 42, 111, 126, 0, 172, 110, 176, 5, 218, 229, 0, + 0, 140, 6, 176, 5, 11, 126, 111, 0, 140, 6, 176, 5, 12, 229, 111, 0, 140, 6, 176, 5, 11, 126, 126, 0, 123, 110, 176, 5, 12, 168, 221, + 0, 172, 110, 176, 5, 42, 231, 112, 72, 47, 110, 176, 5, 42, 229, 46, 0, 54, 110, 176, 5, 42, 229, 112, 76, 110, 110, 176, 5, 218, 120, 112, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 122, 172, 74, 176, 5, 42, 229, 209, 168, 172, 110, 176, 5, 42, 229, 112, 126, 172, 110, 176, 5, 42, 229, 112, 112, 123, 110, 176, 5, 218, 168, 112, + 20, 172, 110, 176, 5, 42, 35, 112, 53, 172, 110, 176, 5, 218, 229, 209, 106, 54, 110, 176, 5, 12, 229, 108, 108, 110, 110, 176, 5, 218, 120, 209, + 35, 172, 110, 176, 5, 218, 168, 190, 229, 123, 110, 176, 5, 218, 168, 190, 190, 54, 110, 176, 5, 218, 120, 190, 209, 110, 110, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 140, 6, 176, 5, 42, 229, 221, 0, 140, 6, 176, 5, 218, 229, 221, 0, 140, 6, 176, 5, 12, 168, 65, 56, 123, 110, 176, 5, 218, 168, 190, + 143, 140, 6, 176, 5, 42, 231, 221, 0, 140, 6, 176, 5, 42, 229, 168, 0, 140, 6, 176, 5, 218, 35, 46, 0, 123, 110, 176, 5, 218, 168, 221, + 244, 172, 110, 176, 5, 42, 231, 112, 0, 47, 110, 176, 5, 42, 231, 112, 62, 54, 110, 176, 5, 42, 168, 112, 194, 110, 110, 176, 5, 218, 168, 112, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 3, 172, 110, 176, 5, 42, 231, 112, 157, 47, 110, 176, 5, 42, 229, 112, 190, 54, 62, 176, 5, 42, 231, 112, 168, 110, 110, 176, 5, 218, 168, 112, + 42, 172, 110, 176, 5, 42, 35, 60, 28, 47, 110, 176, 5, 12, 229, 108, 106, 54, 62, 176, 5, 42, 229, 46, 0, 110, 110, 176, 5, 218, 168, 209, + 209, 172, 110, 176, 5, 218, 168, 209, 163, 47, 110, 176, 5, 218, 168, 190, 253, 54, 110, 176, 5, 218, 168, 190, 76, 110, 110, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, + 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, + 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, + 42, 0, 110, 176, 5, 42, 229, 111, 0, 0, 110, 176, 5, 12, 229, 126, 107, 0, 110, 176, 5, 42, 229, 126, 231, 0, 110, 176, 5, 218, 168, 112, + 0, 0, 110, 176, 5, 42, 229, 35, 187, 0, 110, 176, 5, 12, 229, 116, 0, 0, 110, 176, 5, 12, 126, 116, 221, 0, 110, 176, 5, 42, 231, 112, + 218, 0, 110, 176, 5, 218, 168, 112, 3, 0, 110, 176, 5, 42, 168, 190, 190, 0, 110, 176, 5, 218, 231, 112, 167, 0, 110, 176, 5, 218, 120, 190, + 172, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 169, 0, 110, 176, 5, 42, 229, 111, 120, 0, 110, 176, 5, 42, 229, 111, 16, 0, 110, 176, 5, 42, 229, 112, 168, 0, 110, 176, 5, 218, 120, 190, + 18, 0, 110, 176, 5, 42, 168, 112, 157, 0, 110, 176, 5, 42, 229, 111, 231, 0, 110, 176, 5, 12, 229, 116, 190, 0, 110, 176, 5, 218, 120, 190, + 28, 0, 110, 176, 5, 218, 168, 190, 187, 0, 110, 176, 5, 218, 168, 190, 190, 0, 110, 176, 5, 218, 120, 190, 190, 0, 110, 176, 5, 218, 120, 190, +}; + +static const unsigned char model_run_state_table[8192] = +{ + 80, 168, 115, 59, 51, 58, 99, 250, 80, 80, 225, 59, 63, 99, 250, 250, 233, 80, 225, 23, 51, 63, 207, 99, 233, 80, 225, 23, 23, 249, 207, 1, + 20, 80, 225, 23, 23, 98, 104, 58, 20, 80, 225, 23, 23, 160, 108, 167, 20, 80, 65, 23, 23, 160, 207, 148, 80, 80, 65, 112, 160, 160, 207, 4, + 114, 114, 219, 247, 223, 222, 108, 108, 61, 128, 216, 202, 59, 100, 108, 12, 61, 128, 245, 25, 13, 103, 226, 132, 61, 20, 245, 251, 247, 231, 84, 104, + 61, 20, 245, 159, 239, 89, 251, 84, 61, 20, 20, 251, 72, 250, 103, 135, 34, 20, 20, 232, 100, 104, 251, 58, 34, 61, 20, 237, 103, 72, 125, 104, + 114, 145, 219, 247, 123, 63, 58, 4, 128, 245, 142, 247, 103, 58, 58, 250, 20, 190, 142, 13, 103, 100, 104, 218, 61, 190, 245, 146, 239, 119, 226, 108, + 61, 20, 237, 25, 226, 160, 224, 183, 61, 20, 245, 14, 199, 4, 108, 108, 34, 20, 20, 251, 72, 82, 1, 108, 34, 20, 20, 125, 125, 47, 226, 104, + 128, 151, 219, 247, 119, 63, 12, 108, 85, 156, 219, 219, 231, 100, 58, 108, 94, 137, 151, 219, 7, 100, 89, 108, 94, 128, 190, 25, 72, 231, 63, 104, + 61, 73, 151, 219, 14, 89, 224, 192, 50, 61, 151, 56, 6, 232, 104, 183, 50, 61, 151, 237, 72, 227, 104, 104, 153, 61, 20, 237, 232, 72, 104, 108, + 114, 145, 115, 155, 100, 226, 108, 108, 128, 245, 219, 231, 231, 47, 89, 4, 61, 245, 142, 74, 231, 104, 104, 255, 61, 20, 245, 142, 231, 82, 177, 192, + 61, 20, 245, 146, 47, 14, 140, 222, 61, 20, 20, 146, 89, 125, 207, 250, 61, 20, 20, 146, 100, 104, 207, 207, 34, 20, 20, 125, 125, 160, 226, 108, + 128, 114, 219, 183, 164, 100, 226, 250, 252, 128, 184, 25, 103, 47, 89, 104, 94, 128, 190, 25, 14, 47, 125, 104, 94, 85, 190, 26, 14, 47, 104, 72, + 50, 85, 20, 147, 14, 18, 14, 192, 50, 61, 144, 33, 26, 45, 14, 125, 50, 61, 144, 26, 14, 100, 252, 192, 153, 61, 20, 26, 18, 72, 192, 192, + 114, 151, 219, 247, 199, 98, 158, 250, 85, 190, 151, 6, 231, 89, 168, 89, 94, 128, 190, 142, 103, 89, 89, 104, 94, 128, 190, 25, 14, 14, 125, 104, + 94, 20, 190, 25, 14, 232, 232, 47, 50, 61, 190, 159, 14, 72, 25, 101, 50, 61, 20, 159, 232, 89, 125, 108, 153, 61, 20, 26, 125, 103, 192, 104, + 128, 151, 219, 247, 247, 63, 63, 108, 85, 156, 151, 219, 231, 100, 47, 108, 94, 137, 151, 219, 14, 14, 89, 89, 94, 85, 151, 33, 14, 89, 89, 232, + 50, 85, 190, 33, 14, 14, 89, 104, 50, 85, 190, 130, 14, 14, 135, 192, 50, 85, 73, 33, 14, 14, 104, 104, 153, 61, 20, 26, 232, 89, 192, 192, + 145, 219, 247, 155, 172, 110, 58, 162, 20, 245, 142, 59, 82, 99, 103, 58, 20, 245, 142, 25, 103, 89, 231, 207, 20, 245, 142, 142, 231, 160, 214, 108, + 20, 20, 142, 146, 125, 14, 167, 192, 20, 20, 142, 13, 103, 160, 207, 162, 20, 20, 20, 82, 125, 160, 207, 4, 34, 20, 20, 125, 160, 160, 226, 108, + 114, 151, 219, 247, 247, 63, 58, 250, 85, 156, 151, 219, 231, 227, 58, 108, 94, 128, 151, 219, 231, 47, 58, 192, 50, 128, 245, 244, 89, 47, 14, 108, + 50, 128, 190, 56, 103, 14, 14, 155, 61, 69, 190, 6, 103, 104, 135, 84, 61, 61, 20, 26, 14, 231, 104, 154, 153, 61, 20, 237, 232, 14, 89, 104, + 114, 219, 219, 247, 119, 79, 12, 4, 128, 151, 219, 219, 231, 51, 58, 4, 61, 190, 151, 219, 231, 100, 160, 84, 61, 128, 245, 25, 103, 232, 18, 80, + 61, 20, 245, 25, 103, 219, 99, 104, 61, 61, 245, 130, 146, 232, 89, 108, 61, 61, 190, 25, 89, 192, 104, 250, 34, 20, 20, 237, 232, 125, 226, 104, + 128, 151, 219, 247, 119, 79, 22, 4, 85, 151, 219, 247, 247, 63, 63, 226, 94, 137, 151, 219, 231, 100, 89, 226, 94, 137, 151, 219, 231, 47, 89, 89, + 50, 85, 151, 219, 103, 47, 58, 58, 50, 85, 151, 56, 232, 89, 89, 226, 50, 85, 151, 145, 14, 232, 47, 104, 153, 61, 20, 237, 232, 89, 11, 104, + 114, 145, 219, 155, 65, 47, 226, 158, 128, 114, 219, 13, 89, 125, 108, 179, 128, 20, 245, 251, 103, 89, 89, 14, 61, 20, 245, 142, 231, 231, 99, 20, + 61, 20, 245, 251, 14, 18, 58, 192, 61, 20, 20, 245, 45, 58, 4, 1, 61, 20, 20, 80, 231, 160, 207, 108, 34, 20, 20, 125, 125, 125, 99, 104, + 128, 151, 219, 13, 100, 100, 103, 108, 85, 128, 151, 237, 147, 103, 127, 192, 94, 128, 190, 33, 14, 47, 89, 227, 94, 85, 190, 33, 135, 135, 14, 16, + 50, 85, 190, 144, 14, 232, 201, 103, 50, 85, 73, 26, 14, 45, 103, 115, 50, 61, 20, 20, 14, 45, 89, 11, 153, 61, 20, 26, 135, 135, 201, 192, + 156, 151, 219, 13, 247, 100, 98, 162, 85, 156, 151, 142, 231, 103, 159, 104, 94, 128, 190, 56, 14, 72, 47, 226, 94, 128, 190, 33, 14, 47, 125, 104, + 50, 128, 190, 26, 14, 14, 33, 72, 50, 61, 190, 26, 14, 18, 100, 249, 50, 61, 190, 26, 45, 103, 33, 72, 153, 61, 20, 26, 135, 125, 192, 192, + 156, 151, 219, 219, 100, 100, 226, 108, 85, 137, 151, 219, 14, 232, 89, 104, 85, 137, 151, 33, 14, 14, 89, 72, 60, 85, 190, 33, 14, 14, 45, 72, + 49, 85, 73, 33, 14, 14, 125, 192, 49, 85, 73, 33, 14, 14, 14, 192, 49, 85, 73, 33, 14, 14, 45, 192, 153, 55, 32, 154, 135, 135, 192, 192, + 27, 8, 142, 44, 103, 51, 63, 4, 186, 20, 216, 6, 3, 103, 58, 58, 34, 144, 216, 8, 123, 255, 58, 181, 61, 190, 142, 6, 23, 226, 72, 4, + 88, 20, 56, 159, 51, 115, 67, 4, 28, 8, 80, 44, 25, 33, 12, 131, 34, 20, 42, 216, 93, 41, 25, 63, 34, 80, 20, 125, 72, 255, 231, 250, + 193, 213, 219, 219, 247, 127, 59, 162, 252, 213, 216, 124, 146, 237, 242, 125, 50, 190, 190, 179, 14, 251, 238, 6, 50, 85, 245, 245, 42, 159, 144, 113, + 50, 228, 245, 6, 59, 11, 108, 168, 61, 161, 20, 42, 134, 138, 225, 181, 94, 20, 26, 184, 55, 213, 238, 244, 34, 61, 20, 80, 159, 82, 104, 108, + 27, 97, 216, 6, 103, 102, 226, 104, 105, 97, 190, 130, 159, 82, 0, 41, 50, 69, 190, 147, 146, 14, 231, 219, 50, 20, 245, 237, 237, 75, 126, 100, + 50, 15, 252, 21, 146, 61, 250, 192, 50, 36, 73, 190, 175, 11, 251, 0, 88, 61, 233, 5, 232, 127, 0, 71, 34, 61, 20, 26, 18, 45, 44, 41, + 128, 151, 219, 247, 247, 51, 58, 226, 85, 97, 151, 219, 247, 247, 231, 155, 94, 97, 9, 184, 146, 104, 6, 99, 50, 37, 190, 25, 89, 63, 108, 147, + 50, 190, 190, 184, 159, 250, 144, 170, 88, 69, 97, 189, 135, 104, 33, 135, 153, 194, 35, 80, 103, 40, 125, 25, 153, 61, 190, 237, 25, 103, 250, 104, + 27, 128, 142, 146, 146, 80, 145, 226, 252, 228, 190, 80, 103, 232, 15, 147, 94, 61, 8, 6, 7, 44, 162, 139, 50, 61, 144, 179, 25, 191, 96, 12, + 61, 105, 235, 18, 146, 135, 111, 99, 50, 61, 20, 61, 183, 155, 231, 180, 88, 61, 197, 45, 240, 250, 219, 64, 34, 61, 20, 26, 89, 103, 125, 197, + 128, 151, 151, 219, 168, 47, 58, 158, 85, 128, 245, 151, 134, 232, 184, 192, 94, 128, 8, 235, 107, 47, 145, 72, 50, 194, 20, 20, 173, 26, 155, 144, + 50, 61, 36, 14, 82, 125, 237, 99, 50, 105, 20, 27, 14, 137, 167, 235, 50, 61, 175, 186, 190, 28, 108, 127, 153, 61, 54, 26, 129, 89, 26, 108, + 252, 97, 216, 25, 7, 11, 159, 226, 94, 252, 190, 245, 235, 135, 251, 18, 50, 252, 190, 8, 33, 125, 144, 226, 88, 94, 144, 42, 202, 13, 45, 253, + 88, 94, 97, 27, 14, 93, 153, 192, 88, 61, 61, 9, 103, 228, 250, 244, 88, 61, 144, 20, 14, 232, 18, 138, 153, 61, 61, 154, 26, 215, 18, 192, + 128, 156, 219, 219, 247, 100, 63, 108, 252, 97, 151, 151, 239, 103, 103, 231, 94, 85, 190, 244, 159, 141, 233, 18, 50, 85, 156, 8, 45, 14, 72, 133, + 50, 94, 190, 33, 14, 135, 159, 80, 50, 94, 73, 37, 20, 14, 155, 157, 153, 94, 37, 245, 26, 192, 153, 68, 153, 61, 32, 26, 129, 232, 18, 192, + 27, 235, 56, 56, 59, 249, 45, 226, 61, 190, 33, 25, 159, 47, 104, 231, 186, 54, 20, 26, 151, 80, 249, 146, 61, 61, 235, 6, 192, 72, 82, 4, + 34, 128, 73, 18, 192, 93, 232, 0, 50, 2, 237, 20, 230, 250, 52, 161, 19, 20, 20, 109, 231, 190, 49, 102, 34, 215, 20, 26, 244, 54, 183, 125, + 128, 151, 219, 247, 247, 249, 158, 108, 85, 137, 151, 121, 219, 232, 59, 80, 94, 190, 190, 142, 239, 232, 6, 76, 94, 73, 189, 26, 237, 247, 156, 90, + 50, 61, 149, 144, 5, 178, 49, 68, 36, 242, 221, 176, 159, 251, 10, 6, 50, 73, 26, 144, 190, 82, 0, 229, 34, 61, 20, 20, 103, 125, 47, 116, + 194, 213, 151, 219, 231, 231, 51, 104, 94, 161, 213, 151, 7, 146, 7, 249, 50, 35, 190, 245, 25, 18, 251, 165, 50, 69, 190, 245, 237, 146, 72, 120, + 50, 36, 73, 147, 25, 190, 34, 72, 153, 50, 73, 215, 89, 25, 120, 71, 153, 87, 144, 14, 189, 55, 36, 167, 153, 61, 20, 20, 129, 147, 57, 115, + 128, 151, 219, 247, 119, 119, 12, 250, 85, 156, 219, 219, 247, 63, 63, 249, 94, 137, 151, 219, 13, 100, 47, 72, 50, 137, 151, 70, 14, 89, 103, 247, + 49, 85, 190, 25, 125, 247, 192, 138, 50, 69, 190, 130, 14, 255, 227, 100, 49, 85, 190, 233, 14, 18, 89, 247, 153, 61, 20, 26, 129, 72, 14, 192, + 27, 9, 130, 146, 145, 125, 125, 226, 186, 27, 8, 33, 56, 145, 14, 104, 94, 61, 245, 33, 80, 199, 91, 192, 50, 186, 144, 103, 232, 194, 247, 194, + 50, 61, 54, 235, 152, 132, 244, 228, 61, 61, 144, 190, 180, 21, 196, 188, 50, 61, 144, 125, 51, 74, 0, 114, 153, 61, 20, 26, 72, 232, 154, 214, + 156, 151, 219, 219, 159, 100, 7, 7, 85, 137, 151, 8, 56, 245, 159, 183, 94, 85, 190, 245, 142, 9, 22, 13, 50, 85, 190, 21, 244, 114, 201, 45, + 50, 94, 190, 33, 72, 165, 104, 24, 50, 61, 20, 19, 80, 92, 9, 90, 50, 94, 32, 235, 197, 113, 234, 192, 153, 61, 61, 26, 129, 135, 18, 47, + 252, 156, 151, 237, 29, 103, 103, 104, 94, 128, 190, 237, 14, 135, 72, 223, 94, 252, 190, 26, 14, 26, 26, 104, 50, 94, 69, 20, 33, 47, 135, 72, + 50, 94, 73, 54, 14, 135, 114, 249, 88, 50, 20, 2, 245, 197, 188, 58, 88, 61, 61, 186, 135, 153, 136, 185, 153, 61, 61, 197, 135, 135, 201, 192, + 156, 151, 219, 219, 232, 232, 220, 108, 85, 137, 151, 21, 232, 232, 89, 232, 60, 137, 151, 33, 14, 232, 232, 135, 60, 85, 73, 33, 14, 232, 18, 104, + 49, 85, 73, 27, 14, 14, 33, 192, 49, 94, 73, 73, 135, 135, 135, 249, 49, 60, 73, 154, 33, 45, 125, 15, 153, 55, 32, 154, 135, 135, 135, 192, + 0, 111, 247, 119, 79, 62, 167, 90, 0, 130, 247, 59, 51, 62, 181, 101, 0, 117, 142, 247, 113, 160, 224, 1, 0, 130, 6, 59, 63, 148, 99, 181, + 0, 205, 74, 146, 59, 118, 104, 84, 0, 121, 65, 119, 146, 140, 168, 1, 0, 190, 44, 30, 62, 222, 101, 121, 0, 74, 65, 132, 112, 22, 181, 101, + 0, 219, 247, 119, 79, 22, 238, 90, 0, 151, 219, 247, 119, 62, 22, 214, 0, 151, 219, 247, 222, 63, 99, 1, 0, 5, 219, 247, 132, 99, 108, 181, + 0, 33, 219, 119, 148, 62, 207, 108, 0, 151, 219, 132, 100, 140, 226, 181, 0, 18, 44, 132, 63, 160, 226, 104, 0, 151, 182, 123, 224, 12, 238, 101, + 244, 247, 247, 79, 224, 0, 0, 0, 128, 219, 247, 199, 0, 0, 0, 0, 128, 48, 147, 148, 0, 0, 0, 0, 170, 48, 142, 124, 0, 0, 0, 0, + 128, 77, 159, 53, 0, 0, 0, 0, 186, 184, 183, 0, 0, 0, 0, 0, 32, 142, 83, 207, 0, 0, 0, 0, 20, 20, 247, 56, 0, 0, 0, 0, + 151, 247, 247, 162, 0, 0, 0, 0, 156, 219, 247, 255, 84, 0, 0, 0, 156, 151, 219, 224, 46, 0, 0, 0, 128, 151, 219, 115, 105, 0, 0, 0, + 128, 151, 219, 51, 0, 0, 0, 0, 85, 151, 219, 192, 0, 0, 0, 0, 128, 151, 111, 0, 0, 0, 0, 0, 61, 145, 219, 13, 0, 0, 0, 0, + 0, 219, 247, 119, 79, 62, 22, 214, 0, 5, 219, 247, 51, 148, 63, 250, 0, 137, 219, 239, 51, 58, 160, 63, 0, 216, 219, 247, 100, 158, 162, 181, + 0, 244, 179, 3, 51, 58, 62, 22, 0, 137, 219, 6, 119, 99, 62, 250, 0, 151, 130, 204, 100, 158, 158, 42, 0, 213, 202, 140, 132, 99, 4, 250, + 0, 219, 247, 119, 79, 22, 238, 90, 0, 219, 247, 247, 119, 79, 22, 214, 0, 151, 219, 247, 119, 79, 207, 4, 0, 151, 219, 247, 119, 12, 207, 4, + 0, 151, 219, 247, 113, 12, 12, 214, 0, 151, 219, 247, 53, 12, 207, 4, 0, 151, 234, 123, 79, 79, 4, 214, 0, 151, 247, 30, 222, 12, 4, 214, + 151, 247, 247, 62, 130, 0, 0, 0, 128, 151, 219, 53, 146, 0, 0, 0, 85, 151, 219, 224, 162, 0, 0, 0, 85, 151, 219, 253, 0, 0, 0, 0, + 94, 245, 41, 132, 0, 0, 0, 0, 94, 114, 142, 126, 0, 0, 0, 0, 61, 114, 219, 84, 0, 0, 0, 0, 61, 114, 219, 51, 0, 0, 0, 0, + 151, 247, 247, 22, 101, 0, 0, 0, 156, 219, 247, 158, 119, 0, 0, 0, 156, 219, 219, 224, 0, 0, 0, 0, 156, 151, 219, 255, 208, 0, 0, 0, + 85, 151, 219, 255, 160, 0, 0, 0, 85, 151, 219, 191, 0, 0, 0, 0, 85, 151, 219, 12, 0, 0, 0, 0, 85, 151, 247, 89, 0, 0, 0, 0, + 0, 219, 247, 119, 53, 22, 12, 181, 0, 219, 219, 247, 51, 118, 250, 4, 0, 5, 202, 155, 58, 231, 12, 99, 0, 77, 179, 6, 249, 115, 148, 18, + 0, 184, 42, 131, 127, 108, 4, 108, 0, 77, 145, 179, 63, 63, 4, 112, 0, 187, 74, 6, 89, 199, 99, 207, 0, 190, 145, 183, 249, 162, 113, 250, + 0, 219, 247, 119, 79, 22, 238, 90, 0, 151, 219, 247, 119, 98, 12, 250, 0, 151, 219, 247, 119, 224, 162, 1, 0, 151, 219, 247, 119, 226, 108, 108, + 0, 151, 219, 39, 119, 255, 62, 181, 0, 128, 134, 155, 155, 58, 181, 58, 0, 9, 111, 247, 113, 181, 108, 250, 0, 151, 219, 31, 95, 162, 108, 84, + 151, 219, 247, 32, 0, 0, 0, 0, 128, 151, 219, 45, 158, 0, 0, 0, 128, 114, 109, 79, 0, 0, 0, 0, 54, 114, 247, 0, 0, 0, 0, 0, + 194, 245, 8, 146, 0, 0, 0, 0, 194, 20, 219, 139, 0, 0, 0, 0, 194, 56, 72, 0, 0, 0, 0, 0, 61, 20, 151, 114, 0, 0, 0, 0, + 151, 219, 247, 255, 161, 0, 0, 0, 156, 151, 219, 30, 0, 0, 0, 0, 128, 156, 219, 196, 211, 0, 0, 0, 85, 156, 219, 226, 0, 0, 0, 0, + 85, 156, 219, 121, 0, 0, 0, 0, 85, 190, 151, 116, 0, 0, 0, 0, 85, 128, 190, 66, 0, 0, 0, 0, 94, 128, 151, 89, 142, 0, 0, 0, + 0, 219, 247, 247, 79, 118, 22, 4, 0, 151, 219, 247, 119, 63, 63, 250, 0, 151, 219, 219, 199, 249, 160, 226, 0, 151, 184, 239, 223, 222, 224, 207, + 0, 190, 130, 7, 231, 58, 63, 1, 0, 20, 219, 115, 100, 118, 162, 214, 0, 73, 244, 74, 59, 58, 192, 90, 0, 117, 145, 146, 140, 58, 162, 1, + 0, 219, 247, 119, 79, 22, 238, 90, 0, 151, 219, 247, 119, 79, 12, 250, 0, 151, 219, 247, 119, 12, 162, 181, 0, 151, 219, 247, 51, 79, 162, 84, + 0, 151, 219, 247, 113, 148, 12, 84, 0, 151, 219, 247, 222, 12, 4, 250, 0, 151, 219, 183, 119, 62, 99, 1, 0, 151, 219, 31, 222, 12, 250, 84, + 151, 219, 247, 255, 0, 0, 0, 0, 156, 151, 219, 191, 0, 0, 0, 0, 128, 156, 151, 53, 0, 0, 0, 0, 85, 190, 219, 160, 0, 0, 0, 0, + 85, 128, 5, 49, 0, 0, 0, 0, 85, 137, 245, 169, 0, 0, 0, 0, 94, 128, 190, 0, 0, 0, 0, 0, 61, 128, 219, 133, 0, 0, 0, 0, + 151, 219, 247, 58, 255, 0, 0, 0, 156, 151, 219, 220, 30, 0, 0, 0, 156, 151, 219, 219, 0, 0, 0, 0, 156, 151, 219, 155, 48, 0, 0, 0, + 85, 156, 219, 89, 90, 0, 0, 0, 85, 156, 219, 108, 0, 0, 0, 0, 85, 156, 151, 222, 0, 0, 0, 0, 60, 156, 219, 192, 0, 0, 0, 0, + 0, 200, 179, 82, 51, 63, 99, 250, 0, 117, 142, 239, 103, 223, 103, 250, 0, 97, 111, 13, 227, 47, 142, 4, 0, 116, 72, 59, 129, 199, 223, 181, + 0, 56, 212, 213, 44, 207, 126, 0, 0, 10, 184, 126, 107, 30, 51, 214, 0, 152, 20, 66, 232, 110, 230, 181, 0, 117, 146, 146, 227, 53, 99, 4, + 0, 219, 247, 119, 119, 62, 12, 214, 0, 151, 219, 247, 140, 119, 99, 108, 0, 187, 151, 247, 51, 63, 181, 199, 0, 212, 109, 219, 59, 158, 214, 207, + 0, 8, 52, 40, 118, 140, 238, 238, 0, 208, 219, 132, 100, 167, 133, 224, 0, 41, 211, 142, 25, 127, 100, 7, 0, 145, 96, 159, 110, 99, 51, 101, + 128, 244, 13, 59, 0, 0, 0, 0, 252, 75, 219, 232, 164, 0, 0, 0, 252, 245, 9, 57, 0, 0, 0, 0, 194, 128, 44, 220, 0, 0, 0, 0, + 87, 212, 125, 0, 0, 0, 0, 0, 85, 36, 20, 0, 0, 0, 0, 0, 50, 20, 228, 0, 0, 0, 0, 0, 186, 128, 80, 51, 0, 0, 0, 0, + 156, 219, 247, 220, 118, 0, 0, 0, 97, 151, 219, 168, 0, 0, 0, 0, 35, 190, 151, 100, 0, 0, 0, 0, 242, 170, 176, 202, 0, 0, 0, 0, + 94, 170, 151, 45, 0, 0, 0, 0, 85, 144, 52, 0, 0, 0, 0, 0, 85, 170, 145, 0, 113, 0, 0, 0, 34, 20, 244, 72, 0, 0, 0, 0, + 0, 213, 219, 82, 172, 249, 63, 250, 0, 97, 121, 202, 82, 51, 249, 58, 0, 97, 9, 145, 146, 159, 93, 192, 0, 97, 184, 6, 89, 20, 65, 0, + 0, 144, 130, 72, 82, 13, 45, 103, 0, 213, 26, 216, 231, 250, 58, 250, 0, 150, 20, 147, 89, 152, 13, 181, 0, 48, 9, 146, 103, 58, 104, 104, + 0, 219, 247, 119, 79, 62, 167, 214, 0, 151, 219, 247, 119, 79, 12, 250, 0, 43, 219, 247, 119, 53, 63, 1, 0, 151, 219, 183, 223, 63, 162, 238, + 0, 120, 219, 39, 172, 108, 158, 181, 0, 144, 229, 31, 123, 98, 249, 4, 0, 114, 109, 82, 58, 58, 160, 224, 0, 240, 219, 59, 51, 98, 99, 101, + 252, 216, 219, 219, 2, 0, 0, 0, 252, 190, 151, 156, 0, 0, 0, 0, 94, 128, 121, 13, 167, 0, 0, 0, 94, 54, 176, 86, 0, 0, 0, 0, + 88, 27, 8, 0, 0, 0, 0, 0, 87, 128, 134, 92, 0, 0, 0, 0, 153, 161, 117, 0, 0, 0, 0, 0, 50, 194, 245, 40, 0, 0, 0, 0, + 156, 219, 247, 62, 1, 89, 0, 0, 97, 219, 219, 249, 0, 0, 0, 0, 85, 151, 219, 230, 0, 0, 0, 0, 252, 189, 219, 70, 0, 0, 0, 0, + 252, 156, 77, 89, 0, 0, 0, 0, 94, 149, 120, 12, 0, 0, 0, 0, 55, 156, 124, 189, 0, 0, 0, 0, 94, 240, 219, 199, 0, 0, 0, 0, + 0, 70, 111, 82, 51, 51, 58, 250, 0, 9, 25, 239, 7, 82, 99, 14, 0, 9, 38, 7, 244, 28, 98, 58, 0, 235, 245, 121, 123, 181, 65, 1, + 0, 28, 245, 176, 146, 205, 186, 1, 0, 85, 237, 120, 192, 176, 175, 7, 0, 205, 20, 2, 100, 51, 42, 75, 0, 156, 145, 18, 98, 59, 59, 192, + 0, 219, 247, 119, 79, 12, 4, 214, 0, 151, 219, 247, 199, 222, 99, 162, 0, 151, 219, 168, 119, 47, 214, 167, 0, 156, 120, 247, 100, 119, 62, 101, + 0, 187, 200, 100, 63, 7, 143, 115, 0, 149, 151, 247, 58, 227, 46, 148, 0, 64, 5, 219, 137, 160, 214, 0, 0, 187, 219, 247, 158, 98, 4, 250, + 128, 213, 219, 104, 0, 0, 0, 0, 194, 9, 109, 195, 63, 0, 0, 0, 85, 190, 244, 0, 0, 0, 0, 0, 94, 97, 184, 40, 0, 0, 0, 0, + 94, 27, 94, 0, 0, 0, 0, 0, 19, 85, 151, 0, 0, 0, 0, 0, 50, 61, 237, 0, 0, 0, 0, 0, 105, 28, 145, 144, 0, 0, 0, 0, + 151, 219, 219, 119, 190, 0, 0, 0, 156, 151, 219, 160, 0, 0, 0, 0, 85, 137, 219, 51, 0, 0, 0, 0, 85, 156, 190, 0, 0, 0, 0, 0, + 85, 156, 97, 0, 0, 0, 0, 0, 94, 156, 159, 0, 0, 0, 0, 0, 94, 20, 96, 243, 0, 0, 0, 0, 60, 20, 219, 154, 0, 0, 0, 0, + 0, 151, 219, 239, 172, 249, 108, 250, 0, 189, 184, 202, 146, 123, 158, 108, 0, 97, 213, 74, 6, 115, 232, 108, 0, 97, 213, 25, 202, 47, 224, 118, + 0, 73, 235, 25, 146, 148, 58, 104, 0, 128, 20, 147, 51, 126, 46, 162, 0, 128, 243, 74, 176, 58, 250, 46, 0, 128, 245, 146, 59, 223, 250, 101, + 0, 219, 247, 247, 119, 62, 4, 214, 0, 151, 219, 247, 247, 222, 12, 1, 0, 151, 219, 247, 100, 98, 58, 4, 0, 151, 151, 247, 199, 224, 162, 181, + 0, 156, 219, 247, 155, 123, 181, 84, 0, 156, 219, 247, 199, 98, 158, 255, 0, 137, 151, 109, 172, 51, 43, 223, 0, 156, 219, 168, 222, 12, 250, 84, + 97, 213, 151, 108, 160, 0, 0, 0, 85, 97, 151, 189, 0, 0, 0, 0, 252, 97, 190, 43, 0, 0, 0, 0, 94, 97, 245, 76, 0, 0, 0, 0, + 94, 186, 201, 0, 0, 0, 0, 0, 94, 85, 170, 216, 0, 0, 0, 0, 50, 61, 184, 171, 0, 0, 0, 0, 50, 128, 147, 104, 0, 0, 0, 0, + 151, 219, 219, 100, 18, 0, 0, 0, 156, 151, 219, 247, 0, 0, 0, 0, 85, 156, 151, 114, 0, 0, 0, 0, 85, 156, 151, 64, 0, 0, 0, 0, + 85, 156, 151, 197, 0, 0, 0, 0, 85, 156, 151, 90, 0, 0, 0, 0, 60, 156, 151, 100, 0, 0, 0, 0, 60, 156, 151, 108, 22, 0, 0, 0, + 80, 145, 115, 100, 79, 222, 99, 101, 186, 8, 142, 159, 172, 58, 1, 108, 61, 20, 21, 59, 158, 47, 104, 108, 54, 144, 251, 59, 227, 103, 226, 214, + 34, 212, 20, 103, 113, 63, 250, 181, 20, 9, 142, 223, 72, 162, 84, 199, 20, 20, 184, 125, 112, 98, 111, 214, 34, 20, 142, 146, 125, 47, 158, 250, + 114, 114, 219, 44, 51, 99, 63, 4, 94, 128, 245, 70, 51, 47, 110, 201, 50, 61, 190, 74, 124, 199, 125, 8, 50, 27, 245, 235, 232, 121, 162, 226, + 50, 34, 20, 232, 124, 130, 19, 181, 61, 15, 130, 26, 231, 22, 120, 0, 61, 61, 144, 18, 54, 144, 166, 214, 34, 20, 20, 251, 89, 100, 232, 226, + 114, 145, 219, 247, 16, 98, 158, 84, 94, 190, 38, 145, 231, 103, 160, 104, 61, 20, 190, 25, 14, 103, 226, 104, 61, 161, 8, 74, 231, 51, 249, 160, + 61, 212, 184, 145, 7, 226, 123, 125, 50, 61, 235, 44, 168, 110, 215, 4, 34, 20, 20, 237, 24, 102, 125, 103, 34, 20, 20, 125, 103, 100, 104, 84, + 128, 151, 219, 247, 119, 158, 99, 63, 94, 161, 38, 234, 7, 103, 31, 72, 94, 128, 190, 25, 179, 126, 192, 89, 50, 242, 190, 25, 211, 192, 181, 72, + 50, 85, 190, 75, 72, 25, 118, 63, 50, 61, 73, 3, 18, 18, 166, 16, 50, 194, 245, 6, 89, 197, 203, 108, 153, 61, 20, 26, 72, 125, 66, 104, + 128, 145, 145, 247, 123, 148, 108, 207, 94, 144, 245, 42, 159, 103, 226, 158, 61, 20, 147, 159, 59, 146, 192, 72, 61, 61, 26, 216, 14, 109, 247, 249, + 61, 69, 120, 18, 219, 2, 7, 250, 61, 61, 190, 245, 175, 238, 207, 250, 61, 215, 25, 56, 125, 89, 238, 207, 34, 20, 20, 125, 7, 125, 100, 84, + 128, 114, 145, 247, 100, 249, 95, 84, 94, 128, 190, 25, 103, 25, 104, 250, 50, 252, 190, 33, 26, 103, 47, 47, 50, 61, 245, 237, 103, 125, 182, 192, + 88, 61, 245, 147, 172, 89, 198, 58, 50, 61, 190, 20, 103, 18, 7, 27, 153, 61, 20, 237, 144, 172, 135, 101, 153, 61, 20, 237, 18, 103, 103, 104, + 128, 151, 219, 247, 247, 63, 226, 4, 94, 128, 151, 142, 59, 232, 58, 108, 94, 144, 190, 6, 103, 249, 26, 155, 50, 94, 245, 134, 14, 82, 167, 250, + 50, 54, 20, 251, 42, 129, 63, 199, 50, 61, 190, 147, 125, 6, 125, 250, 50, 61, 117, 235, 115, 121, 192, 84, 153, 61, 20, 125, 100, 251, 108, 72, + 128, 151, 219, 247, 247, 119, 12, 4, 94, 156, 151, 219, 13, 103, 125, 108, 50, 69, 151, 25, 72, 232, 103, 12, 50, 85, 190, 21, 14, 89, 26, 160, + 50, 85, 190, 33, 14, 14, 14, 104, 50, 94, 73, 33, 14, 47, 18, 104, 153, 61, 190, 26, 14, 47, 129, 181, 153, 61, 20, 237, 14, 89, 89, 84, + 114, 145, 13, 126, 58, 99, 158, 162, 186, 190, 145, 42, 47, 18, 249, 108, 94, 144, 142, 237, 231, 226, 249, 203, 50, 20, 147, 237, 103, 226, 176, 207, + 61, 20, 20, 18, 18, 146, 186, 90, 61, 20, 20, 7, 76, 126, 232, 1, 34, 20, 216, 125, 83, 18, 179, 108, 34, 20, 20, 125, 80, 20, 181, 192, + 128, 244, 219, 65, 249, 251, 51, 47, 94, 128, 190, 33, 44, 192, 47, 72, 94, 252, 190, 184, 237, 16, 200, 80, 50, 61, 27, 26, 166, 192, 226, 119, + 50, 61, 128, 18, 100, 192, 153, 90, 50, 186, 20, 215, 125, 195, 0, 244, 50, 61, 20, 143, 24, 191, 79, 104, 153, 61, 20, 20, 129, 125, 232, 67, + 114, 145, 145, 155, 249, 98, 108, 84, 94, 128, 245, 237, 103, 103, 103, 59, 94, 128, 190, 159, 14, 227, 255, 192, 50, 61, 190, 26, 14, 89, 99, 162, + 50, 61, 20, 8, 72, 47, 45, 4, 50, 61, 20, 201, 14, 125, 89, 84, 50, 61, 190, 18, 237, 113, 1, 250, 153, 61, 20, 18, 18, 103, 47, 84, + 128, 151, 219, 247, 100, 58, 108, 226, 94, 128, 190, 237, 72, 7, 227, 104, 94, 85, 190, 33, 14, 72, 135, 89, 50, 94, 128, 33, 26, 18, 125, 103, + 50, 94, 144, 26, 45, 72, 192, 104, 50, 94, 73, 147, 159, 72, 26, 84, 153, 61, 20, 28, 26, 14, 192, 235, 153, 61, 20, 26, 18, 89, 103, 104, + 128, 151, 219, 247, 247, 226, 226, 84, 186, 243, 8, 25, 232, 7, 63, 72, 94, 27, 190, 45, 7, 251, 249, 231, 50, 73, 20, 3, 14, 242, 110, 193, + 50, 105, 190, 34, 14, 72, 250, 18, 50, 61, 8, 142, 249, 108, 104, 47, 50, 50, 154, 251, 95, 29, 0, 250, 34, 61, 20, 26, 125, 20, 224, 108, + 128, 156, 151, 44, 136, 103, 160, 108, 94, 128, 190, 245, 29, 47, 250, 108, 94, 85, 190, 33, 14, 231, 151, 205, 50, 94, 190, 26, 18, 89, 135, 89, + 50, 94, 190, 144, 26, 125, 191, 231, 50, 61, 144, 26, 18, 89, 14, 248, 153, 50, 190, 190, 72, 104, 9, 238, 153, 61, 20, 26, 18, 45, 192, 226, + 128, 151, 219, 247, 247, 226, 108, 4, 94, 128, 245, 48, 103, 47, 135, 47, 94, 85, 190, 147, 103, 103, 192, 192, 50, 85, 190, 26, 26, 14, 223, 108, + 50, 61, 20, 33, 14, 192, 7, 192, 50, 94, 20, 20, 14, 51, 23, 108, 50, 61, 73, 20, 47, 14, 181, 45, 153, 61, 20, 26, 18, 14, 125, 192, + 156, 151, 219, 247, 100, 100, 58, 250, 85, 137, 151, 21, 232, 232, 89, 104, 60, 137, 190, 21, 14, 232, 135, 104, 49, 85, 190, 33, 14, 14, 232, 135, + 49, 85, 73, 33, 14, 18, 89, 192, 49, 94, 73, 33, 18, 14, 192, 192, 49, 60, 73, 33, 14, 18, 232, 104, 153, 55, 32, 154, 135, 135, 135, 192, + 186, 144, 216, 159, 59, 58, 103, 250, 94, 54, 9, 146, 103, 255, 99, 99, 19, 85, 144, 146, 202, 13, 181, 47, 50, 161, 42, 115, 59, 63, 70, 83, + 88, 69, 25, 112, 231, 94, 219, 123, 2, 144, 3, 129, 58, 100, 177, 255, 61, 54, 25, 202, 160, 157, 0, 1, 34, 20, 20, 59, 160, 72, 47, 108, + 186, 8, 216, 239, 239, 7, 119, 155, 50, 27, 216, 245, 25, 82, 85, 109, 88, 61, 190, 147, 155, 242, 22, 192, 50, 150, 20, 5, 216, 144, 4, 143, + 88, 94, 144, 237, 103, 36, 209, 17, 163, 88, 36, 29, 232, 231, 148, 10, 163, 55, 28, 164, 15, 72, 74, 25, 34, 228, 235, 237, 129, 245, 101, 97, + 252, 97, 213, 237, 7, 103, 58, 104, 88, 252, 190, 56, 26, 146, 104, 108, 88, 94, 54, 33, 26, 14, 192, 104, 88, 94, 54, 54, 33, 45, 115, 202, + 88, 186, 27, 54, 14, 46, 18, 231, 88, 50, 20, 57, 18, 29, 100, 58, 10, 61, 20, 137, 120, 216, 104, 154, 163, 61, 54, 26, 26, 25, 104, 192, + 186, 97, 213, 121, 7, 102, 47, 250, 94, 85, 156, 25, 14, 72, 108, 104, 50, 105, 190, 235, 26, 245, 6, 101, 88, 19, 144, 130, 237, 192, 135, 67, + 50, 105, 61, 117, 117, 81, 195, 39, 88, 61, 144, 26, 241, 40, 166, 0, 88, 55, 34, 228, 146, 31, 75, 26, 153, 50, 54, 193, 103, 201, 146, 226, + 228, 235, 216, 6, 82, 103, 135, 104, 19, 186, 144, 25, 111, 235, 147, 103, 50, 50, 190, 251, 20, 47, 132, 232, 88, 242, 194, 235, 103, 154, 250, 0, + 50, 69, 67, 103, 162, 126, 191, 45, 50, 2, 147, 25, 44, 237, 159, 0, 153, 105, 184, 191, 144, 237, 227, 67, 163, 61, 190, 26, 47, 71, 104, 0, + 128, 216, 134, 239, 82, 47, 249, 47, 94, 35, 190, 144, 144, 237, 41, 232, 94, 94, 20, 144, 237, 103, 31, 240, 88, 105, 144, 235, 159, 22, 51, 224, + 50, 61, 235, 237, 103, 103, 159, 67, 49, 2, 20, 144, 237, 237, 141, 178, 153, 19, 144, 144, 103, 103, 159, 7, 153, 61, 144, 144, 103, 103, 51, 18, + 186, 97, 216, 124, 72, 7, 7, 226, 50, 252, 9, 38, 202, 72, 192, 58, 88, 94, 161, 56, 26, 115, 80, 146, 88, 36, 73, 54, 45, 197, 250, 255, + 88, 61, 190, 20, 251, 103, 147, 181, 153, 87, 73, 114, 180, 182, 45, 110, 153, 61, 54, 251, 251, 120, 126, 138, 153, 61, 27, 26, 147, 68, 135, 26, + 85, 156, 219, 219, 247, 51, 108, 108, 94, 165, 151, 219, 159, 102, 89, 226, 49, 85, 156, 245, 14, 146, 45, 14, 50, 105, 190, 21, 232, 72, 89, 212, + 153, 61, 243, 33, 237, 245, 82, 59, 88, 61, 69, 156, 18, 83, 102, 64, 153, 50, 190, 8, 18, 251, 242, 0, 153, 61, 20, 26, 18, 72, 142, 197, + 186, 144, 245, 56, 89, 125, 56, 127, 19, 186, 8, 8, 89, 132, 232, 158, 50, 186, 212, 147, 231, 237, 247, 104, 50, 61, 190, 45, 14, 79, 15, 20, + 50, 88, 194, 129, 34, 27, 202, 62, 88, 61, 190, 50, 128, 151, 73, 162, 88, 105, 215, 26, 26, 87, 143, 92, 163, 61, 215, 20, 197, 124, 14, 192, + 252, 128, 184, 121, 146, 232, 185, 63, 94, 94, 161, 233, 17, 227, 246, 34, 88, 94, 221, 28, 232, 22, 37, 215, 88, 69, 20, 75, 109, 192, 211, 193, + 50, 61, 190, 28, 134, 233, 151, 224, 50, 94, 61, 232, 119, 138, 0, 0, 88, 50, 194, 194, 252, 92, 10, 0, 153, 61, 20, 144, 25, 252, 197, 86, + 252, 252, 190, 56, 29, 103, 159, 104, 88, 252, 54, 54, 26, 129, 201, 192, 88, 94, 54, 26, 14, 196, 103, 192, 88, 50, 54, 54, 18, 45, 26, 192, + 88, 50, 54, 54, 26, 135, 197, 104, 88, 50, 61, 197, 54, 14, 166, 192, 10, 50, 61, 197, 192, 18, 88, 10, 10, 88, 61, 197, 135, 135, 192, 192, + 252, 128, 213, 237, 103, 7, 72, 192, 94, 85, 190, 235, 14, 89, 147, 8, 88, 94, 69, 54, 33, 72, 103, 108, 88, 94, 85, 54, 135, 135, 114, 85, + 88, 50, 69, 144, 135, 175, 147, 163, 88, 50, 61, 61, 26, 42, 155, 192, 153, 88, 94, 36, 56, 243, 158, 156, 10, 153, 61, 61, 210, 251, 135, 192, + 186, 240, 245, 235, 25, 154, 133, 78, 19, 186, 144, 20, 33, 103, 44, 192, 50, 252, 20, 80, 125, 134, 209, 30, 50, 50, 73, 26, 25, 125, 222, 125, + 50, 87, 194, 235, 50, 132, 151, 108, 153, 105, 212, 50, 201, 81, 0, 249, 153, 34, 69, 103, 61, 106, 0, 0, 153, 61, 186, 26, 20, 20, 7, 17, + 128, 156, 151, 202, 82, 104, 155, 145, 94, 85, 190, 216, 159, 144, 253, 108, 94, 94, 190, 147, 56, 146, 100, 103, 50, 94, 144, 212, 18, 177, 174, 200, + 50, 105, 144, 20, 100, 235, 122, 174, 88, 50, 144, 87, 216, 89, 159, 251, 153, 50, 144, 194, 20, 8, 181, 243, 153, 61, 61, 144, 144, 237, 18, 104, + 252, 97, 9, 237, 14, 72, 146, 104, 94, 252, 190, 54, 26, 45, 47, 104, 88, 94, 54, 26, 135, 26, 82, 192, 88, 94, 61, 154, 14, 72, 103, 103, + 88, 50, 61, 2, 26, 147, 88, 45, 88, 50, 61, 20, 14, 2, 254, 70, 153, 50, 50, 87, 125, 237, 137, 19, 10, 88, 61, 197, 26, 135, 26, 192, + 85, 156, 151, 219, 72, 72, 232, 108, 94, 242, 170, 21, 14, 18, 25, 103, 94, 85, 190, 33, 14, 45, 192, 104, 49, 94, 73, 33, 14, 232, 21, 47, + 49, 94, 69, 212, 14, 89, 14, 34, 49, 50, 194, 105, 135, 97, 45, 80, 153, 55, 61, 36, 33, 173, 21, 92, 153, 50, 61, 154, 135, 135, 135, 192, + 0, 44, 247, 119, 79, 22, 238, 250, 0, 216, 179, 59, 51, 12, 99, 250, 0, 184, 179, 59, 51, 148, 99, 250, 0, 97, 179, 100, 118, 181, 4, 162, + 0, 78, 191, 132, 62, 132, 238, 250, 0, 187, 179, 249, 53, 108, 214, 249, 0, 143, 7, 155, 224, 59, 207, 79, 0, 225, 65, 113, 99, 167, 214, 101, + 0, 219, 247, 119, 79, 22, 167, 214, 0, 151, 219, 247, 119, 148, 22, 214, 0, 77, 219, 247, 79, 12, 4, 4, 0, 151, 247, 126, 113, 148, 4, 118, + 0, 253, 168, 30, 79, 178, 101, 12, 0, 151, 191, 131, 222, 250, 70, 1, 0, 5, 131, 119, 62, 255, 111, 1, 0, 38, 247, 119, 224, 167, 181, 84, + 151, 247, 247, 98, 101, 0, 0, 0, 97, 216, 41, 118, 0, 0, 0, 0, 97, 38, 142, 98, 64, 0, 0, 0, 177, 244, 179, 224, 0, 0, 0, 0, + 128, 244, 179, 104, 0, 0, 0, 0, 61, 244, 65, 0, 0, 0, 0, 0, 34, 219, 223, 0, 0, 0, 0, 0, 20, 7, 82, 250, 0, 0, 0, 0, + 151, 247, 247, 22, 0, 0, 0, 0, 156, 219, 219, 160, 0, 0, 0, 0, 97, 151, 219, 12, 0, 0, 0, 0, 85, 219, 202, 12, 0, 0, 0, 0, + 27, 109, 115, 148, 0, 0, 0, 0, 15, 151, 82, 227, 4, 0, 0, 0, 128, 244, 191, 0, 0, 0, 0, 0, 20, 145, 247, 158, 160, 0, 0, 0, + 0, 219, 247, 119, 79, 62, 22, 101, 0, 120, 219, 247, 119, 51, 62, 250, 0, 151, 219, 247, 119, 98, 12, 226, 0, 244, 44, 68, 51, 108, 101, 162, + 0, 48, 247, 59, 231, 255, 231, 207, 0, 235, 179, 129, 119, 118, 207, 101, 0, 75, 247, 30, 222, 99, 238, 207, 0, 5, 182, 119, 79, 99, 207, 4, + 0, 219, 247, 119, 79, 22, 238, 90, 0, 151, 247, 119, 79, 62, 167, 214, 0, 151, 247, 119, 79, 62, 238, 101, 0, 151, 247, 119, 178, 22, 207, 1, + 0, 187, 247, 119, 79, 22, 4, 214, 0, 139, 191, 119, 79, 167, 4, 90, 0, 219, 191, 119, 79, 62, 238, 214, 0, 120, 247, 119, 118, 167, 4, 214, + 151, 247, 247, 4, 191, 0, 0, 0, 170, 219, 219, 226, 0, 0, 0, 0, 128, 121, 219, 189, 0, 0, 0, 0, 252, 6, 13, 84, 0, 0, 0, 0, + 97, 245, 225, 62, 28, 0, 0, 0, 128, 151, 168, 250, 0, 0, 0, 0, 252, 56, 225, 62, 0, 0, 0, 0, 15, 145, 202, 214, 0, 0, 0, 0, + 151, 247, 247, 238, 226, 0, 0, 0, 156, 219, 247, 162, 249, 0, 0, 0, 156, 219, 247, 226, 0, 0, 0, 0, 156, 219, 247, 22, 0, 0, 0, 0, + 128, 219, 247, 53, 0, 0, 0, 0, 85, 219, 247, 118, 0, 0, 0, 0, 85, 219, 247, 226, 0, 0, 0, 0, 128, 219, 191, 162, 0, 0, 0, 0, + 0, 219, 247, 247, 110, 79, 108, 101, 0, 213, 216, 239, 51, 63, 162, 108, 0, 97, 216, 239, 51, 63, 22, 192, 0, 243, 77, 25, 249, 58, 42, 101, + 0, 97, 6, 179, 63, 181, 158, 250, 0, 177, 184, 124, 158, 162, 101, 167, 0, 33, 109, 82, 100, 238, 214, 45, 0, 244, 142, 59, 51, 226, 101, 4, + 0, 219, 247, 119, 79, 22, 4, 1, 0, 213, 219, 247, 119, 58, 4, 250, 0, 213, 219, 247, 63, 255, 108, 12, 0, 128, 78, 182, 164, 224, 100, 104, + 0, 156, 219, 172, 16, 162, 108, 98, 0, 245, 219, 247, 119, 99, 4, 90, 0, 151, 121, 222, 16, 222, 98, 62, 0, 245, 219, 59, 53, 58, 181, 1, + 244, 219, 247, 47, 0, 0, 0, 0, 252, 97, 219, 120, 0, 0, 0, 0, 252, 128, 121, 230, 0, 0, 0, 0, 252, 97, 8, 193, 0, 0, 0, 0, + 252, 128, 124, 203, 0, 0, 0, 0, 252, 128, 3, 9, 0, 0, 0, 0, 94, 33, 103, 110, 0, 0, 0, 0, 61, 20, 41, 110, 0, 0, 0, 0, + 151, 219, 247, 226, 0, 0, 0, 0, 97, 213, 219, 33, 0, 0, 0, 0, 85, 156, 151, 61, 0, 0, 0, 0, 85, 156, 145, 55, 0, 0, 0, 0, + 252, 128, 219, 189, 0, 0, 0, 0, 94, 156, 142, 108, 0, 0, 0, 0, 252, 190, 219, 0, 0, 0, 0, 0, 61, 190, 52, 153, 125, 0, 0, 0, + 0, 219, 247, 247, 119, 22, 207, 1, 0, 151, 219, 219, 199, 119, 148, 250, 0, 213, 151, 229, 231, 239, 103, 51, 0, 151, 109, 202, 225, 23, 160, 111, + 0, 149, 142, 239, 59, 4, 33, 108, 0, 190, 142, 219, 23, 99, 34, 0, 0, 213, 219, 59, 13, 58, 70, 127, 0, 151, 145, 82, 158, 79, 53, 214, + 0, 219, 247, 119, 79, 62, 238, 90, 0, 219, 219, 247, 119, 79, 207, 4, 0, 151, 219, 247, 119, 58, 162, 250, 0, 151, 219, 247, 113, 160, 250, 250, + 0, 151, 219, 247, 110, 12, 1, 214, 0, 151, 219, 126, 58, 62, 214, 22, 0, 151, 219, 191, 255, 167, 63, 84, 0, 151, 219, 126, 222, 89, 90, 84, + 151, 219, 247, 68, 0, 0, 0, 0, 97, 213, 219, 102, 0, 0, 0, 0, 85, 213, 219, 109, 0, 0, 0, 0, 85, 128, 142, 226, 0, 0, 0, 0, + 252, 128, 159, 42, 0, 0, 0, 0, 252, 128, 134, 240, 0, 0, 0, 0, 61, 128, 142, 0, 0, 0, 0, 0, 61, 190, 120, 10, 0, 0, 0, 0, + 151, 219, 247, 224, 45, 0, 0, 0, 156, 151, 219, 227, 131, 0, 0, 0, 156, 151, 219, 111, 0, 0, 0, 0, 85, 156, 219, 218, 0, 0, 0, 0, + 85, 156, 219, 168, 0, 0, 0, 0, 85, 156, 219, 15, 0, 0, 0, 0, 85, 156, 219, 117, 0, 0, 0, 0, 85, 156, 219, 217, 0, 0, 0, 0, + 0, 213, 179, 59, 51, 63, 99, 250, 0, 213, 179, 59, 51, 51, 108, 1, 0, 97, 78, 202, 123, 63, 58, 84, 0, 5, 179, 202, 59, 58, 110, 250, + 0, 37, 134, 74, 112, 101, 101, 1, 0, 147, 145, 6, 103, 118, 82, 250, 0, 186, 179, 255, 4, 98, 158, 103, 0, 166, 74, 204, 51, 63, 4, 101, + 0, 121, 191, 191, 119, 62, 167, 214, 0, 176, 219, 191, 132, 79, 207, 108, 0, 221, 239, 239, 123, 224, 99, 162, 0, 221, 78, 239, 123, 167, 133, 255, + 0, 54, 13, 168, 119, 158, 225, 159, 0, 121, 142, 239, 227, 100, 213, 58, 0, 36, 48, 191, 178, 53, 172, 118, 0, 77, 109, 40, 224, 222, 249, 250, + 97, 216, 216, 227, 192, 0, 0, 0, 252, 97, 216, 212, 197, 0, 0, 0, 94, 150, 216, 135, 106, 0, 0, 0, 50, 144, 233, 162, 0, 0, 0, 0, + 87, 54, 9, 118, 0, 0, 0, 0, 88, 54, 114, 234, 0, 0, 0, 0, 49, 37, 225, 99, 0, 0, 0, 0, 88, 243, 235, 158, 0, 0, 0, 0, + 97, 121, 219, 127, 61, 92, 0, 0, 97, 213, 219, 72, 203, 0, 0, 0, 252, 213, 234, 52, 0, 0, 0, 0, 94, 240, 211, 236, 0, 0, 0, 0, + 252, 114, 75, 58, 0, 0, 0, 0, 15, 137, 145, 0, 0, 0, 0, 0, 252, 170, 24, 0, 0, 0, 0, 0, 64, 245, 185, 200, 0, 0, 0, 0, + 0, 121, 219, 191, 51, 63, 99, 250, 0, 5, 121, 219, 172, 249, 158, 108, 0, 156, 216, 179, 172, 103, 7, 250, 0, 97, 142, 145, 103, 123, 27, 227, + 0, 221, 130, 121, 40, 123, 59, 104, 0, 143, 245, 82, 222, 23, 100, 163, 0, 241, 202, 74, 159, 19, 122, 67, 0, 176, 121, 82, 103, 249, 250, 108, + 0, 169, 191, 119, 79, 62, 167, 214, 0, 219, 247, 247, 119, 62, 12, 214, 0, 5, 219, 247, 119, 63, 12, 1, 0, 213, 219, 119, 119, 148, 99, 1, + 0, 187, 191, 119, 140, 224, 4, 238, 0, 38, 219, 199, 63, 238, 1, 4, 0, 24, 239, 40, 22, 178, 58, 84, 0, 169, 191, 119, 178, 22, 181, 214, + 97, 121, 219, 249, 250, 0, 0, 0, 97, 213, 216, 47, 0, 0, 0, 0, 64, 9, 107, 45, 0, 0, 0, 0, 85, 213, 70, 196, 0, 0, 0, 0, + 252, 97, 134, 124, 0, 0, 0, 0, 60, 213, 120, 88, 0, 0, 0, 0, 64, 37, 77, 108, 0, 0, 0, 0, 252, 245, 213, 241, 0, 0, 0, 0, + 133, 191, 191, 62, 214, 0, 0, 0, 156, 219, 219, 172, 118, 0, 0, 0, 128, 151, 219, 255, 226, 0, 0, 0, 85, 151, 219, 107, 0, 0, 0, 0, + 85, 151, 234, 214, 0, 0, 0, 0, 194, 134, 219, 64, 0, 0, 0, 0, 252, 56, 168, 175, 0, 0, 0, 0, 186, 145, 168, 146, 227, 0, 0, 0, + 0, 97, 216, 6, 102, 227, 226, 108, 0, 97, 213, 25, 13, 227, 104, 192, 0, 252, 221, 237, 146, 199, 219, 108, 0, 252, 213, 80, 202, 30, 41, 84, + 0, 165, 235, 142, 126, 232, 187, 0, 0, 97, 48, 42, 31, 166, 184, 192, 0, 97, 193, 8, 132, 51, 181, 181, 0, 27, 245, 159, 103, 47, 99, 1, + 0, 107, 219, 191, 119, 12, 12, 101, 0, 213, 78, 219, 199, 224, 162, 250, 0, 213, 107, 44, 249, 232, 63, 250, 0, 213, 107, 13, 72, 118, 53, 167, + 0, 97, 70, 168, 16, 132, 101, 229, 0, 213, 142, 232, 222, 100, 198, 245, 0, 184, 244, 168, 47, 108, 0, 0, 0, 213, 145, 191, 199, 58, 249, 108, + 252, 97, 216, 45, 114, 8, 0, 0, 252, 252, 190, 166, 0, 0, 0, 0, 252, 252, 144, 113, 0, 0, 0, 0, 88, 252, 54, 166, 0, 0, 0, 0, + 88, 94, 190, 238, 0, 0, 0, 0, 88, 61, 245, 174, 10, 0, 0, 0, 88, 252, 54, 198, 0, 0, 0, 0, 88, 252, 54, 86, 202, 0, 0, 0, + 97, 213, 219, 39, 46, 0, 0, 0, 252, 97, 151, 17, 0, 0, 0, 0, 252, 97, 213, 52, 0, 0, 0, 0, 252, 97, 213, 60, 0, 0, 0, 0, + 94, 97, 156, 206, 0, 0, 0, 0, 94, 37, 142, 0, 0, 0, 0, 0, 94, 128, 133, 0, 0, 0, 0, 0, 94, 97, 190, 243, 0, 0, 0, 0, + 0, 213, 121, 219, 51, 63, 226, 108, 0, 97, 213, 121, 239, 231, 108, 104, 0, 97, 213, 25, 146, 47, 59, 25, 0, 128, 9, 139, 204, 232, 58, 177, + 0, 128, 216, 237, 103, 235, 145, 221, 0, 212, 245, 202, 45, 242, 108, 0, 0, 150, 33, 251, 153, 132, 0, 0, 0, 144, 216, 168, 65, 58, 82, 18, + 0, 219, 191, 191, 119, 62, 167, 214, 0, 151, 219, 247, 119, 63, 99, 4, 0, 151, 219, 247, 119, 227, 226, 226, 0, 156, 219, 39, 183, 58, 178, 148, + 0, 43, 151, 219, 98, 148, 140, 14, 0, 190, 219, 155, 112, 110, 22, 146, 0, 190, 78, 31, 100, 249, 6, 77, 0, 151, 219, 191, 223, 148, 4, 108, + 97, 213, 219, 13, 126, 0, 0, 0, 252, 97, 213, 171, 143, 0, 0, 0, 252, 252, 190, 49, 0, 0, 0, 0, 252, 85, 73, 74, 0, 0, 0, 0, + 94, 252, 9, 0, 0, 0, 0, 0, 94, 35, 159, 0, 0, 0, 0, 0, 94, 194, 70, 0, 0, 0, 0, 0, 94, 194, 184, 111, 0, 0, 0, 0, + 43, 219, 219, 127, 99, 0, 0, 0, 97, 151, 219, 195, 0, 0, 0, 0, 85, 156, 219, 182, 0, 0, 0, 0, 85, 156, 219, 146, 0, 0, 0, 0, + 85, 170, 219, 171, 0, 0, 0, 0, 94, 170, 219, 1, 0, 0, 0, 0, 94, 170, 219, 140, 0, 0, 0, 0, 94, 170, 219, 248, 0, 0, 0, 0, +}; + +static INLINE int bsc_stretch(const int p) +{ + return bsc_stretch_table[p]; +} + +static INLINE int bsc_squash(const int s) +{ + return bsc_squash_table[2048 + s]; +} + +static INLINE int model_rank_state(const int contextRank4, const int contextRun, const int rankSizeHistory) +{ + return model_rank_state_table[(contextRun << 11) | (contextRank4 << 3) | (rankSizeHistory)]; +} + +static INLINE int model_run_state(const int contextRank0, const int contextRun, const int rank, const int runSizeHistory) +{ + return model_run_state_table[(contextRank0 << 10) | (contextRun << 6) | ((rank < 7 ? rank : 7) << 3) | (runSizeHistory < 7 ? runSizeHistory : 7)]; +} + +#endif + +/*-----------------------------------------------------------*/ +/* End tables.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/qlfc/qlfc.cpp b/libbsc/libbsc/coder/qlfc/qlfc.cpp new file mode 100644 index 00000000..eae05c52 --- /dev/null +++ b/libbsc/libbsc/coder/qlfc/qlfc.cpp @@ -0,0 +1,2208 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Quantized Local Frequency Coding functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include +#include + +#include "qlfc.h" + +#include "../../libbsc.h" +#include "../../platform/platform.h" + +#include "../common/rangecoder.h" +#include "../common/tables.h" +#include "../common/predictor.h" + +#include "qlfc_model.h" + +#if defined(LIBBSC_DYNAMIC_CPU_DISPATCH) + unsigned char * bsc_qlfc_transform(const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable); + unsigned char * bsc_qlfc_transform_avx2(const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable); + unsigned char * bsc_qlfc_transform_avx(const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable); + unsigned char * bsc_qlfc_transform_sse2(const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable); + + int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model); + int bsc_qlfc_adaptive_encode_avx2(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model); + int bsc_qlfc_adaptive_encode_sse2(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model); + + int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model); + int bsc_qlfc_static_encode_avx2(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model); + int bsc_qlfc_static_encode_sse2(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model); + + int bsc_qlfc_fast_encode(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel2 * model); + int bsc_qlfc_fast_encode_avx2(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel2 * model); + int bsc_qlfc_fast_encode_sse2(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel2 * model); + + int bsc_qlfc_adaptive_decode(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + int bsc_qlfc_adaptive_decode_avx(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + int bsc_qlfc_adaptive_decode_sse41(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + int bsc_qlfc_adaptive_decode_sse2(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + + int bsc_qlfc_static_decode(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + int bsc_qlfc_static_decode_avx(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + int bsc_qlfc_static_decode_sse41(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + int bsc_qlfc_static_decode_sse2(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model); + + int bsc_qlfc_fast_decode(const unsigned char * input, unsigned char * output, QlfcStatisticalModel2 * model); + int bsc_qlfc_fast_decode_avx(const unsigned char * input, unsigned char * output, QlfcStatisticalModel2 * model); + int bsc_qlfc_fast_decode_sse41(const unsigned char * input, unsigned char * output, QlfcStatisticalModel2 * model); + int bsc_qlfc_fast_decode_sse2(const unsigned char * input, unsigned char * output, QlfcStatisticalModel2 * model); + + #if LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE2 + int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX2) { return bsc_qlfc_adaptive_encode_avx2(input, output, buffer, inputSize, outputSize, model); } + + return bsc_qlfc_adaptive_encode_sse2(input, output, buffer, inputSize, outputSize, model); + } + + int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX2) { return bsc_qlfc_static_encode_avx2(input, output, buffer, inputSize, outputSize, model); } + + return bsc_qlfc_static_encode_sse2(input, output, buffer, inputSize, outputSize, model); + } + + int bsc_qlfc_fast_encode(const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel2 * model) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX2) { return bsc_qlfc_fast_encode_avx2(input, output, buffer, inputSize, outputSize, model); } + + return bsc_qlfc_fast_encode_sse2(input, output, buffer, inputSize, outputSize, model); + } + + unsigned char * bsc_qlfc_transform(const unsigned char * input, unsigned char * buffer, int n, unsigned char * MTFTable) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX2) { return bsc_qlfc_transform_avx2(input, buffer, n, MTFTable); } + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX) { return bsc_qlfc_transform_avx (input, buffer, n, MTFTable); } + + return bsc_qlfc_transform_sse2(input, buffer, n, MTFTable); + } + + int bsc_qlfc_adaptive_decode(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX) { return bsc_qlfc_adaptive_decode_avx (input, output, model); } + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_SSE41) { return bsc_qlfc_adaptive_decode_sse41(input, output, model); } + + return bsc_qlfc_adaptive_decode_sse2(input, output, model); + } + + int bsc_qlfc_static_decode(const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX) { return bsc_qlfc_static_decode_avx (input, output, model); } + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_SSE41) { return bsc_qlfc_static_decode_sse41(input, output, model); } + + return bsc_qlfc_static_decode_sse2(input, output, model); + } + + int bsc_qlfc_fast_decode(const unsigned char * input, unsigned char * output, QlfcStatisticalModel2 * model) + { + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_AVX) { return bsc_qlfc_fast_decode_avx (input, output, model); } + if (bsc_get_cpu_features() >= LIBBSC_CPU_FEATURE_SSE41) { return bsc_qlfc_fast_decode_sse41(input, output, model); } + + return bsc_qlfc_fast_decode_sse2(input, output, model); + } + #endif + + #if LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_AVX2 + #define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_avx2 + #define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan_avx2 + #define QLFC_ADAPTIVE_ENCODE_FUNCTION_NAME bsc_qlfc_adaptive_encode_avx2 + #define QLFC_STATIC_ENCODE_FUNCTION_NAME bsc_qlfc_static_encode_avx2 + #define QLFC_FAST_ENCODE_FUNCTION_NAME bsc_qlfc_fast_encode_avx2 + #elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_AVX + #define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_avx + #define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan_avx + #define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode_avx + #define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode_avx + #define QLFC_FAST_DECODE_FUNCTION_NAME bsc_qlfc_fast_decode_avx + #elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE41 + #define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode_sse41 + #define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode_sse41 + #define QLFC_FAST_DECODE_FUNCTION_NAME bsc_qlfc_fast_decode_sse41 + #elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE2 + #define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_sse2 + #define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan_sse2 + #define QLFC_ADAPTIVE_ENCODE_FUNCTION_NAME bsc_qlfc_adaptive_encode_sse2 + #define QLFC_STATIC_ENCODE_FUNCTION_NAME bsc_qlfc_static_encode_sse2 + #define QLFC_FAST_ENCODE_FUNCTION_NAME bsc_qlfc_fast_encode_sse2 + #define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode_sse2 + #define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode_sse2 + #define QLFC_FAST_DECODE_FUNCTION_NAME bsc_qlfc_fast_decode_sse2 + #endif +#else + #define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform + #define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan + #define QLFC_ADAPTIVE_ENCODE_FUNCTION_NAME bsc_qlfc_adaptive_encode + #define QLFC_STATIC_ENCODE_FUNCTION_NAME bsc_qlfc_static_encode + #define QLFC_FAST_ENCODE_FUNCTION_NAME bsc_qlfc_fast_encode + #define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode + #define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode + #define QLFC_FAST_DECODE_FUNCTION_NAME bsc_qlfc_fast_decode +#endif + +#if defined(QLFC_TRANSFORM_FUNCTION_NAME) + +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2 + +INLINE ptrdiff_t QLFC_TRANSFORM_SCAN_FUNCTION_NAME (const unsigned char * RESTRICT input, ptrdiff_t i, unsigned char currentChar) +{ +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_AVX2 + __m256i v = _mm256_set1_epi8(currentChar); + + while (i >= 32) + { + i -= 32; int m = _mm256_movemask_epi8(_mm256_cmpeq_epi8(_mm256_loadu_si256((const __m256i *)(input + i)), v)); + if (m != (int)0xffffffff) { return i + bsc_bit_scan_reverse(((unsigned int)(~m))); } + } +#elif LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2 + __m128i v = _mm_set1_epi8(currentChar); + + while (i >= 16) + { + i -= 16; int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)(input + i)), v)); + if (m != 0xffff) { return i + bsc_bit_scan_reverse((unsigned int)(m ^ 0xffff)); } + } +#endif + + do {} while ((--i >= 0) && (input[i] == currentChar)); return i; +} + +unsigned char * QLFC_TRANSFORM_FUNCTION_NAME (const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable) +{ + signed char ALIGNED(64) ranks[ALPHABET_SIZE]; + signed char ALIGNED(64) flags[ALPHABET_SIZE]; + + for (ptrdiff_t i = 0; i < ALPHABET_SIZE; ++i) { ranks[i] = (signed char)(i - 128); } + for (ptrdiff_t i = 0; i < ALPHABET_SIZE; ++i) { flags[i] = 0; } + + ptrdiff_t i = (ptrdiff_t)n - 1, j = n; signed char nSymbols = 0; + + for (; i >= 0; ) + { + unsigned char currentChar1 = input[i]; i = QLFC_TRANSFORM_SCAN_FUNCTION_NAME(input, i, currentChar1); if (i < 0) { i = 0; break; } + unsigned char currentChar2 = input[i]; i = QLFC_TRANSFORM_SCAN_FUNCTION_NAME(input, i, currentChar2); + + signed char rank1 = ranks[currentChar1], rank2 = ranks[currentChar2]; rank2 += rank1 > rank2; + + buffer[--j] = rank1 + 128; if (flags[currentChar1] == 0) { flags[currentChar1] = 1; buffer[j] = nSymbols++; } + buffer[--j] = rank2 + 128; if (flags[currentChar2] == 0) { flags[currentChar2] = 1; buffer[j] = nSymbols++; } + + for (int t = 0 * 32; t < 1 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 1 * 32; t < 2 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 2 * 32; t < 3 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 3 * 32; t < 4 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 4 * 32; t < 5 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 5 * 32; t < 6 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 6 * 32; t < 7 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + for (int t = 7 * 32; t < 8 * 32; ++t) { ranks[t] -= (rank1 > ranks[t] ? (signed char)-1 : (signed char)0) + (rank2 > ranks[t] ? (signed char)-1 : (signed char)0); } + + ranks[currentChar1] = -127; ranks[currentChar2] = -128; + } + + if (i >= 0) + { + unsigned char currentChar = input[0]; signed char rank = ranks[currentChar]; + + buffer[--j] = rank + 128; if (flags[currentChar] == 0) { flags[currentChar] = 1; buffer[j] = nSymbols++; } + + for (int t = 0 * 32; t < 1 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 1 * 32; t < 2 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 2 * 32; t < 3 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 3 * 32; t < 4 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 4 * 32; t < 5 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 5 * 32; t < 6 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 6 * 32; t < 7 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + for (int t = 7 * 32; t < 8 * 32; ++t) { ranks[t] -= (ranks[t] < rank ? -1 : 0); } + ranks[currentChar] = -128; + } + + buffer[n - 1] = 1; + + for (ptrdiff_t i = 0; i < ALPHABET_SIZE; ++i) { MTFTable[ranks[i] + 128] = (unsigned char)i; } + for (ptrdiff_t i = 1; i < ALPHABET_SIZE; ++i) { if (flags[MTFTable[i]] == 0) { MTFTable[i] = MTFTable[i - 1]; break; } } + + return buffer + j; +} + +#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + +INLINE ptrdiff_t QLFC_TRANSFORM_SCAN_FUNCTION_NAME (const unsigned char * RESTRICT input, ptrdiff_t i, unsigned long long currentChar) +{ + unsigned long long v = currentChar; v |= (v << 8); v |= (v << 16); v |= (v << 32); + + while (i >= 8) + { + i -= 8; unsigned long long m = (*(unsigned long long const *)(input + i)) ^ v; + if (m != 0) { return i + (bsc_bit_scan_reverse64(m) / 8); } + } + + do {} while ((--i >= 0) && (input[i] == currentChar)); return i; +} + +unsigned char * QLFC_TRANSFORM_FUNCTION_NAME (const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable) +{ + signed char ALIGNED(64) ranks[ALPHABET_SIZE]; + signed char ALIGNED(64) flags[ALPHABET_SIZE]; + + for (ptrdiff_t i = 0; i < ALPHABET_SIZE; ++i) { ranks[i] = (signed char)(i - 128); } + for (ptrdiff_t i = 0; i < ALPHABET_SIZE; ++i) { flags[i] = 0; } + + ptrdiff_t i = (ptrdiff_t)n - 1, j = n; signed char nSymbols = 0; + + for (; i >= 0;) + { + unsigned char currentChar1 = input[i]; i = QLFC_TRANSFORM_SCAN_FUNCTION_NAME(input, i, currentChar1); if (i < 0) { i = 0; break; } + unsigned char currentChar2 = input[i]; i = QLFC_TRANSFORM_SCAN_FUNCTION_NAME(input, i, currentChar2); + + signed char rank1 = ranks[currentChar1], rank2 = ranks[currentChar2]; rank2 += rank1 > rank2; + + buffer[--j] = rank1 + 128; if (flags[currentChar1] == 0) { flags[currentChar1] = 1; buffer[j] = nSymbols++; } + buffer[--j] = rank2 + 128; if (flags[currentChar2] == 0) { flags[currentChar2] = 1; buffer[j] = nSymbols++; } + + int8x16_t r1 = vdupq_n_s8(rank1), r2 = vdupq_n_s8(rank2), x, y; + + x = vld1q_s8((int8_t const *)(ranks + 16 * 0)); y = vld1q_s8((int8_t const *)(ranks + 16 * 1)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 0), x); vst1q_s8((int8_t *)(ranks + 16 * 1), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 2)); y = vld1q_s8((int8_t const *)(ranks + 16 * 3)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 2), x); vst1q_s8((int8_t *)(ranks + 16 * 3), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 4)); y = vld1q_s8((int8_t const *)(ranks + 16 * 5)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 4), x); vst1q_s8((int8_t *)(ranks + 16 * 5), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 6)); y = vld1q_s8((int8_t const *)(ranks + 16 * 7)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 6), x); vst1q_s8((int8_t *)(ranks + 16 * 7), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 8)); y = vld1q_s8((int8_t const *)(ranks + 16 * 9)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 8), x); vst1q_s8((int8_t *)(ranks + 16 * 9), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 10)); y = vld1q_s8((int8_t const *)(ranks + 16 * 11)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 10), x); vst1q_s8((int8_t *)(ranks + 16 * 11), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 12)); y = vld1q_s8((int8_t const *)(ranks + 16 * 13)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 12), x); vst1q_s8((int8_t *)(ranks + 16 * 13), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 14)); y = vld1q_s8((int8_t const *)(ranks + 16 * 15)); + x = vsubq_s8(vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r1, x))), vreinterpretq_s8_u8(vcgtq_s8(r2, x))); + y = vsubq_s8(vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r1, y))), vreinterpretq_s8_u8(vcgtq_s8(r2, y))); + vst1q_s8((int8_t *)(ranks + 16 * 14), x); vst1q_s8((int8_t *)(ranks + 16 * 15), y); + + ranks[currentChar1] = -127; ranks[currentChar2] = -128; + } + + if (i >= 0) + { + unsigned char currentChar = input[0]; signed char rank = ranks[currentChar]; + + buffer[--j] = rank + 128; if (flags[currentChar] == 0) { flags[currentChar] = 1; buffer[j] = nSymbols++; } + + int8x16_t r = vdupq_n_s8(rank), x, y; + + x = vld1q_s8((int8_t const *)(ranks + 16 * 0)); y = vld1q_s8((int8_t const *)(ranks + 16 * 1)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 0), x); vst1q_s8((int8_t *)(ranks + 16 * 1), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 2)); y = vld1q_s8((int8_t const *)(ranks + 16 * 3)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 2), x); vst1q_s8((int8_t *)(ranks + 16 * 3), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 4)); y = vld1q_s8((int8_t const *)(ranks + 16 * 5)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 4), x); vst1q_s8((int8_t *)(ranks + 16 * 5), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 6)); y = vld1q_s8((int8_t const *)(ranks + 16 * 7)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 6), x); vst1q_s8((int8_t *)(ranks + 16 * 7), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 8)); y = vld1q_s8((int8_t const *)(ranks + 16 * 9)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 8), x); vst1q_s8((int8_t *)(ranks + 16 * 9), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 10)); y = vld1q_s8((int8_t const *)(ranks + 16 * 11)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 10), x); vst1q_s8((int8_t *)(ranks + 16 * 11), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 12)); y = vld1q_s8((int8_t const *)(ranks + 16 * 13)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 12), x); vst1q_s8((int8_t *)(ranks + 16 * 13), y); + + x = vld1q_s8((int8_t const *)(ranks + 16 * 14)); y = vld1q_s8((int8_t const *)(ranks + 16 * 15)); + x = vsubq_s8(x, vreinterpretq_s8_u8(vcgtq_s8(r, x))); + y = vsubq_s8(y, vreinterpretq_s8_u8(vcgtq_s8(r, y))); + vst1q_s8((int8_t *)(ranks + 16 * 14), x); vst1q_s8((int8_t *)(ranks + 16 * 15), y); + + ranks[currentChar] = -128; + } + + buffer[n - 1] = 1; + + for (ptrdiff_t i = 0; i < ALPHABET_SIZE; ++i) { MTFTable[ranks[i] + 128] = (unsigned char)i; } + for (ptrdiff_t i = 1; i < ALPHABET_SIZE; ++i) { if (flags[MTFTable[i]] == 0) { MTFTable[i] = MTFTable[i - 1]; break; } } + + return buffer + j; +} + +#else + +unsigned char * QLFC_TRANSFORM_FUNCTION_NAME (const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable) +{ + unsigned char Flag[ALPHABET_SIZE]; + + for (int i = 0; i < ALPHABET_SIZE; ++i) Flag[i] = 0; + for (int i = 0; i < ALPHABET_SIZE; ++i) MTFTable[i] = i; + + if (input[n - 1] == 0) + { + MTFTable[0] = 1; MTFTable[1] = 0; + } + + int index = n, nSymbols = 0; + for (int i = n - 1; i >= 0;) + { + unsigned char currentChar = input[i--]; + for (; (i >= 0) && (input[i] == currentChar); --i) ; + + unsigned char previousChar = MTFTable[0], rank = 1; MTFTable[0] = currentChar; + while (true) + { + unsigned char temporaryChar0 = MTFTable[rank + 0]; MTFTable[rank + 0] = previousChar; + if (temporaryChar0 == currentChar) { rank += 0; break; } + + unsigned char temporaryChar1 = MTFTable[rank + 1]; MTFTable[rank + 1] = temporaryChar0; + if (temporaryChar1 == currentChar) { rank += 1; break; } + + unsigned char temporaryChar2 = MTFTable[rank + 2]; MTFTable[rank + 2] = temporaryChar1; + if (temporaryChar2 == currentChar) { rank += 2; break; } + + unsigned char temporaryChar3 = MTFTable[rank + 3]; MTFTable[rank + 3] = temporaryChar2; + if (temporaryChar3 == currentChar) { rank += 3; break; } + + rank += 4; previousChar = temporaryChar3; + } + + if (Flag[currentChar] == 0) + { + Flag[currentChar] = 1; + rank = nSymbols++; + } + + buffer[--index] = rank; + } + + buffer[n - 1] = 1; + + for (int rank = 1; rank < ALPHABET_SIZE; ++rank) + { + if (Flag[MTFTable[rank]] == 0) + { + MTFTable[rank] = MTFTable[rank - 1]; + break; + } + } + + return buffer + index; +} + +#endif + +#endif + +#if defined(QLFC_ADAPTIVE_ENCODE_FUNCTION_NAME) + +int QLFC_ADAPTIVE_ENCODE_FUNCTION_NAME (const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model) +{ + unsigned char MTFTable[ALPHABET_SIZE]; + + bsc_qlfc_init_model(model); + + int contextRank0 = 0; + int contextRank4 = 0; + int contextRun = 0; + int maxRank = 7; + int avgRank = 0; + + unsigned char rankHistory[ALPHABET_SIZE], runHistory[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) + { + rankHistory[i] = runHistory[i] = 0; + } + + unsigned char * rankArray = bsc_qlfc_transform(input, buffer, inputSize, MTFTable); + + RangeCoder coder; + + coder.InitEncoder(output, outputSize); + coder.EncodeWord((unsigned int)inputSize); + + unsigned char usedChar[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) usedChar[i] = 0; + + int prevChar = -1; + for (int rank = 0; rank < ALPHABET_SIZE; ++rank) + { + int currentChar = MTFTable[rank]; + + for (int bit = 7; bit >= 0; --bit) + { + bool bit0 = false, bit1 = false; + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == prevChar || usedChar[c] == 0) + { + if ((currentChar >> (bit + 1)) == (c >> (bit + 1))) + { + if (c & (1 << bit)) bit1 = true; else bit0 = true; + if (bit0 && bit1) break; + } + } + } + + if (bit0 && bit1) + { + coder.EncodeBit(currentChar & (1 << bit)); + } + } + + if (currentChar == prevChar) + { + maxRank = bsc_bit_scan_reverse(rank - 1); + break; + } + + prevChar = currentChar; usedChar[currentChar] = 1; + } + + const unsigned char * inputEnd = input + inputSize; + const unsigned char * rankArrayEnd = buffer + inputSize; + + for (; rankArray < rankArrayEnd; ) + { + if (coder.CheckEOB()) + { + return LIBBSC_NOT_COMPRESSIBLE; + } + + int currentChar = *input, runSize; + { + const unsigned char * inputStart = input++; + + if (rankArray >= rankArrayEnd - 16) + { + while ((input < inputEnd) && (*input == currentChar)) { input++; } + } + else + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2 + __m128i v = _mm_set1_epi8(currentChar); + + while (true) + { + int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)input), v)); + if (m != 0xffff) + { + input += bsc_bit_scan_forward((unsigned int)(~m)); + break; + } + + input += 16; + } +#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + unsigned long long v = currentChar; v |= (v << 8); v |= (v << 16); v |= (v << 32); + + while (true) + { + unsigned long long m = (*(unsigned long long const *)input) ^ v; + if (m != 0) + { + input += bsc_bit_scan_forward64(m) / 8; + break; + } + + input += 8; + } +#else + while (*input == currentChar) { input++; } +#endif + } + + runSize = (int)(input - inputStart); + } + + int rank = *rankArray++; + int history = rankHistory[currentChar]; + int state = model_rank_state(contextRank4, contextRun, history); + + short * RESTRICT statePredictor = & model->Rank.StateModel[state]; + short * RESTRICT charPredictor = & model->Rank.CharModel[currentChar]; + short * RESTRICT staticPredictor = & model->Rank.StaticModel; + ProbabilityMixer * RESTRICT mixer = & model->mixerOfRank[currentChar]; + + if (avgRank < 32) + { + if (rank == 1) + { + rankHistory[currentChar] = 0; + + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit0(*statePredictor, M_RANK_TS_TH0, M_RANK_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RANK_TC_TH0, M_RANK_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RANK_TP_TH0, M_RANK_TP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RANK_TM_LR0, M_RANK_TM_LR1, M_RANK_TM_LR2, M_RANK_TM_TH0, M_RANK_TM_AR0)); + } + else + { + { + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit1(*statePredictor, M_RANK_TS_TH1, M_RANK_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, M_RANK_TC_TH1, M_RANK_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RANK_TP_TH1, M_RANK_TP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RANK_TM_LR0, M_RANK_TM_LR1, M_RANK_TM_LR2, M_RANK_TM_TH1, M_RANK_TM_AR1)); + } + + int bitRankSize = bsc_bit_scan_reverse(rank); rankHistory[currentChar] = bitRankSize; + + statePredictor = & model->Rank.Exponent.StateModel[state][0]; + charPredictor = & model->Rank.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Exponent.StaticModel[0]; + mixer = & model->mixerOfRankExponent[history < 1 ? 1 : history][1]; + + for (int bit = 1; bit < bitRankSize; ++bit, ++statePredictor, ++charPredictor, ++staticPredictor) + { + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit1(*statePredictor, M_RANK_ES_TH1, M_RANK_ES_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, M_RANK_EC_TH1, M_RANK_EC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RANK_EP_TH1, M_RANK_EP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RANK_EM_LR0, M_RANK_EM_LR1, M_RANK_EM_LR2, M_RANK_EM_TH1, M_RANK_EM_AR1)); + + mixer = & model->mixerOfRankExponent[history <= bit ? bit + 1 : history][bit + 1]; + } + if (bitRankSize < maxRank) + { + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit0(*statePredictor, M_RANK_ES_TH0, M_RANK_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RANK_EC_TH0, M_RANK_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RANK_EP_TH0, M_RANK_EP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RANK_EM_LR0, M_RANK_EM_LR1, M_RANK_EM_LR2, M_RANK_EM_TH0, M_RANK_EM_AR0)); + } + + statePredictor = & model->Rank.Mantissa[bitRankSize].StateModel[state][0]; + charPredictor = & model->Rank.Mantissa[bitRankSize].CharModel[currentChar][0]; + staticPredictor = & model->Rank.Mantissa[bitRankSize].StaticModel[0]; + mixer = & model->mixerOfRankMantissa[bitRankSize]; + + for (int context = 1, bit = bitRankSize - 1; bit >= 0; --bit) + { + if (rank & (1 << bit)) + { + int probability0 = charPredictor[context], probability1 = statePredictor[context], probability2 = staticPredictor[context]; + + ProbabilityCounter::UpdateBit1(statePredictor[context], M_RANK_MS_TH1, M_RANK_MS_AR1); + ProbabilityCounter::UpdateBit1(charPredictor[context], M_RANK_MC_TH1, M_RANK_MC_AR1); + ProbabilityCounter::UpdateBit1(staticPredictor[context], M_RANK_MP_TH1, M_RANK_MP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RANK_MM_LR0, M_RANK_MM_LR1, M_RANK_MM_LR2, M_RANK_MM_TH1, M_RANK_MM_AR1)); + + context += context + 1; + } + else + { + int probability0 = charPredictor[context], probability1 = statePredictor[context], probability2 = staticPredictor[context]; + + ProbabilityCounter::UpdateBit0(statePredictor[context], M_RANK_MS_TH0, M_RANK_MS_AR0); + ProbabilityCounter::UpdateBit0(charPredictor[context], M_RANK_MC_TH0, M_RANK_MC_AR0); + ProbabilityCounter::UpdateBit0(staticPredictor[context], M_RANK_MP_TH0, M_RANK_MP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RANK_MM_LR0, M_RANK_MM_LR1, M_RANK_MM_LR2, M_RANK_MM_TH0, M_RANK_MM_AR0)); + + context += context; + } + } + } + } + else + { + rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank); + + statePredictor = & model->Rank.Escape.StateModel[state][0]; + charPredictor = & model->Rank.Escape.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Escape.StaticModel[0]; + + for (int context = 1, bit = maxRank; bit >= 0; --bit) + { + mixer = & model->mixerOfRankEscape[context]; + + if (rank & (1 << bit)) + { + int probability0 = charPredictor[context], probability1 = statePredictor[context], probability2 = staticPredictor[context]; + + ProbabilityCounter::UpdateBit1(statePredictor[context], M_RANK_PS_TH1, M_RANK_PS_AR1); + ProbabilityCounter::UpdateBit1(charPredictor[context], M_RANK_PC_TH1, M_RANK_PC_AR1); + ProbabilityCounter::UpdateBit1(staticPredictor[context], M_RANK_PP_TH1, M_RANK_PP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RANK_PM_LR0, M_RANK_PM_LR1, M_RANK_PM_LR2, M_RANK_PM_TH1, M_RANK_PM_AR1)); + + context += context + 1; + } + else + { + int probability0 = charPredictor[context], probability1 = statePredictor[context], probability2 = staticPredictor[context]; + + ProbabilityCounter::UpdateBit0(statePredictor[context], M_RANK_PS_TH0, M_RANK_PS_AR0); + ProbabilityCounter::UpdateBit0(charPredictor[context], M_RANK_PC_TH0, M_RANK_PC_AR0); + ProbabilityCounter::UpdateBit0(staticPredictor[context], M_RANK_PP_TH0, M_RANK_PP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RANK_PM_LR0, M_RANK_PM_LR1, M_RANK_PM_LR2, M_RANK_PM_TH0, M_RANK_PM_AR0)); + + context += context; + } + } + } + + avgRank = (avgRank * 124 + rank * 4) >> 7; + rank = rank - 1; + history = runHistory[currentChar]; + state = model_run_state(contextRank0, contextRun, rank, history); + statePredictor = & model->Run.StateModel[state]; + charPredictor = & model->Run.CharModel[currentChar]; + staticPredictor = & model->Run.StaticModel; + mixer = & model->mixerOfRun[currentChar]; + + if (runSize == 1) + { + runHistory[currentChar] = (runHistory[currentChar] + 2) >> 2; + + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit0(*statePredictor, M_RUN_TS_TH0, M_RUN_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RUN_TC_TH0, M_RUN_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RUN_TP_TH0, M_RUN_TP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RUN_TM_LR0, M_RUN_TM_LR1, M_RUN_TM_LR2, M_RUN_TM_TH0, M_RUN_TM_AR0)); + } + else + { + { + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit1(*statePredictor, M_RUN_TS_TH1, M_RUN_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, M_RUN_TC_TH1, M_RUN_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RUN_TP_TH1, M_RUN_TP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RUN_TM_LR0, M_RUN_TM_LR1, M_RUN_TM_LR2, M_RUN_TM_TH1, M_RUN_TM_AR1)); + } + + int bitRunSize = bsc_bit_scan_reverse(runSize); runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2; + + statePredictor = & model->Run.Exponent.StateModel[state][0]; + charPredictor = & model->Run.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Run.Exponent.StaticModel[0]; + mixer = & model->mixerOfRunExponent[history < 1 ? 1 : history][1]; + + for (int bit = 1; bit < bitRunSize; ++bit, ++statePredictor, ++charPredictor, ++staticPredictor) + { + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit1(*statePredictor, M_RUN_ES_TH1, M_RUN_ES_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, M_RUN_EC_TH1, M_RUN_EC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RUN_EP_TH1, M_RUN_EP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RUN_EM_LR0, M_RUN_EM_LR1, M_RUN_EM_LR2, M_RUN_EM_TH1, M_RUN_EM_AR1)); + + mixer = & model->mixerOfRunExponent[history <= bit ? bit + 1 : history][bit + 1]; + } + { + int probability0 = *charPredictor, probability1 = *statePredictor, probability2 = *staticPredictor; + + ProbabilityCounter::UpdateBit0(*statePredictor, M_RUN_ES_TH0, M_RUN_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RUN_EC_TH0, M_RUN_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RUN_EP_TH0, M_RUN_EP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RUN_EM_LR0, M_RUN_EM_LR1, M_RUN_EM_LR2, M_RUN_EM_TH0, M_RUN_EM_AR0)); + } + + statePredictor = & model->Run.Mantissa[bitRunSize].StateModel[state][0]; + charPredictor = & model->Run.Mantissa[bitRunSize].CharModel[currentChar][0]; + staticPredictor = & model->Run.Mantissa[bitRunSize].StaticModel[0]; + mixer = & model->mixerOfRunMantissa[bitRunSize]; + + for (int context = 1, bit = bitRunSize - 1; bit >= 0; --bit) + { + if (runSize & (1 << bit)) + { + int probability0 = charPredictor[context], probability1 = statePredictor[context], probability2 = staticPredictor[context]; + + ProbabilityCounter::UpdateBit1(statePredictor[context], M_RUN_MS_TH1, M_RUN_MS_AR1); + ProbabilityCounter::UpdateBit1(charPredictor[context], M_RUN_MC_TH1, M_RUN_MC_AR1); + ProbabilityCounter::UpdateBit1(staticPredictor[context], M_RUN_MP_TH1, M_RUN_MP_AR1); + + coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RUN_MM_LR0, M_RUN_MM_LR1, M_RUN_MM_LR2, M_RUN_MM_TH1, M_RUN_MM_AR1)); + + if (bitRunSize <= 5) context += context + 1; else context++; + } + else + { + int probability0 = charPredictor[context], probability1 = statePredictor[context], probability2 = staticPredictor[context]; + + ProbabilityCounter::UpdateBit0(statePredictor[context], M_RUN_MS_TH0, M_RUN_MS_AR0); + ProbabilityCounter::UpdateBit0(charPredictor[context], M_RUN_MC_TH0, M_RUN_MC_AR0); + ProbabilityCounter::UpdateBit0(staticPredictor[context], M_RUN_MP_TH0, M_RUN_MP_AR0); + + coder.EncodeBit0(mixer->MixupAndUpdateBit0(probability0, probability1, probability2, M_RUN_MM_LR0, M_RUN_MM_LR1, M_RUN_MM_LR2, M_RUN_MM_TH0, M_RUN_MM_AR0)); + + if (bitRunSize <= 5) context += context + 0; else context++; + } + } + } + + contextRank0 = ((contextRank0 << 1) | (rank == 0 ? 1 : 0)) & 0x7; + contextRank4 = ((contextRank4 << 2) | (rank < 3 ? rank : 3)) & 0xff; + contextRun = ((contextRun << 1) | (runSize < 3 ? 1 : 0)) & 0xf; + } + + return coder.FinishEncoder(); +} + +#endif + +#if defined(QLFC_STATIC_ENCODE_FUNCTION_NAME) + +int QLFC_STATIC_ENCODE_FUNCTION_NAME (const unsigned char * input, unsigned char * output, unsigned char * buffer, int inputSize, int outputSize, QlfcStatisticalModel1 * model) +{ + unsigned char MTFTable[ALPHABET_SIZE]; + + bsc_qlfc_init_model(model); + + int contextRank0 = 0; + int contextRank4 = 0; + int contextRun = 0; + int maxRank = 7; + int avgRank = 0; + + unsigned char rankHistory[ALPHABET_SIZE], runHistory[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) + { + rankHistory[i] = runHistory[i] = 0; + } + + unsigned char * rankArray = bsc_qlfc_transform(input, buffer, inputSize, MTFTable); + + RangeCoder coder; + + coder.InitEncoder(output, outputSize); + coder.EncodeWord((unsigned int)inputSize); + + unsigned char usedChar[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) usedChar[i] = 0; + + int prevChar = -1; + for (int rank = 0; rank < ALPHABET_SIZE; ++rank) + { + int currentChar = MTFTable[rank]; + + for (int bit = 7; bit >= 0; --bit) + { + bool bit0 = false, bit1 = false; + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == prevChar || usedChar[c] == 0) + { + if ((currentChar >> (bit + 1)) == (c >> (bit + 1))) + { + if (c & (1 << bit)) bit1 = true; else bit0 = true; + if (bit0 && bit1) break; + } + } + } + + if (bit0 && bit1) + { + coder.EncodeBit(currentChar & (1 << bit)); + } + } + + if (currentChar == prevChar) + { + maxRank = bsc_bit_scan_reverse(rank - 1); + break; + } + + prevChar = currentChar; usedChar[currentChar] = 1; + } + + const unsigned char * inputEnd = input + inputSize; + const unsigned char * rankArrayEnd = buffer + inputSize; + + for (; rankArray < rankArrayEnd; ) + { + if (coder.CheckEOB()) + { + return LIBBSC_NOT_COMPRESSIBLE; + } + + int currentChar = *input, runSize; + { + const unsigned char * inputStart = input++; + + if (rankArray >= rankArrayEnd - 16) + { + while ((input < inputEnd) && (*input == currentChar)) { input++; } + } + else + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2 + __m128i v = _mm_set1_epi8(currentChar); + + while (true) + { + int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)input), v)); + if (m != 0xffff) + { + input += bsc_bit_scan_forward((unsigned int)(~m)); + break; + } + + input += 16; + } +#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + unsigned long long v = currentChar; v |= (v << 8); v |= (v << 16); v |= (v << 32); + + while (true) + { + unsigned long long m = (*(unsigned long long const *)input) ^ v; + if (m != 0) + { + input += bsc_bit_scan_forward64(m) / 8; + break; + } + + input += 8; + } +#else + while (*input == currentChar) { input++; } +#endif + } + + runSize = (int)(input - inputStart); + } + + int rank = *rankArray++; + int history = rankHistory[currentChar]; + int state = model_rank_state(contextRank4, contextRun, history); + + short * RESTRICT statePredictor = & model->Rank.StateModel[state]; + short * RESTRICT charPredictor = & model->Rank.CharModel[currentChar]; + short * RESTRICT staticPredictor = & model->Rank.StaticModel; + + if (avgRank < 32) + { + if (rank == 1) + { + rankHistory[currentChar] = 0; + + int probability = ((*charPredictor) * F_RANK_TM_LR0 + (*statePredictor) * F_RANK_TM_LR1 + (*staticPredictor) * F_RANK_TM_LR2) >> 5; + + ProbabilityCounter::UpdateBit0(*statePredictor, F_RANK_TS_TH0, F_RANK_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RANK_TC_TH0, F_RANK_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RANK_TP_TH0, F_RANK_TP_AR0); + + coder.EncodeBit0(probability); + } + else + { + { + int probability = ((*charPredictor) * F_RANK_TM_LR0 + (*statePredictor) * F_RANK_TM_LR1 + (*staticPredictor) * F_RANK_TM_LR2) >> 5; + + ProbabilityCounter::UpdateBit1(*statePredictor, F_RANK_TS_TH1, F_RANK_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, F_RANK_TC_TH1, F_RANK_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RANK_TP_TH1, F_RANK_TP_AR1); + + coder.EncodeBit1(probability); + } + + int bitRankSize = bsc_bit_scan_reverse(rank); rankHistory[currentChar] = bitRankSize; + + statePredictor = & model->Rank.Exponent.StateModel[state][0]; + charPredictor = & model->Rank.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Exponent.StaticModel[0]; + + for (int bit = 1; bit < bitRankSize; ++bit, ++statePredictor, ++charPredictor, ++staticPredictor) + { + int probability = ((*charPredictor) * F_RANK_EM_LR0 + (*statePredictor) * F_RANK_EM_LR1 + (*staticPredictor) * F_RANK_EM_LR2) >> 5; + + ProbabilityCounter::UpdateBit1(*statePredictor, F_RANK_ES_TH1, F_RANK_ES_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, F_RANK_EC_TH1, F_RANK_EC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RANK_EP_TH1, F_RANK_EP_AR1); + + coder.EncodeBit1(probability); + } + if (bitRankSize < maxRank) + { + int probability = ((*charPredictor) * F_RANK_EM_LR0 + (*statePredictor) * F_RANK_EM_LR1 + (*staticPredictor) * F_RANK_EM_LR2) >> 5; + + ProbabilityCounter::UpdateBit0(*statePredictor, F_RANK_ES_TH0, F_RANK_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RANK_EC_TH0, F_RANK_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RANK_EP_TH0, F_RANK_EP_AR0); + + coder.EncodeBit0(probability); + } + + statePredictor = & model->Rank.Mantissa[bitRankSize].StateModel[state][0]; + charPredictor = & model->Rank.Mantissa[bitRankSize].CharModel[currentChar][0]; + staticPredictor = & model->Rank.Mantissa[bitRankSize].StaticModel[0]; + + for (int context = 1, bit = bitRankSize - 1; bit >= 0; --bit) + { + int probability = (charPredictor[context] * F_RANK_MM_LR0 + statePredictor[context] * F_RANK_MM_LR1 + staticPredictor[context] * F_RANK_MM_LR2) >> 5; + + unsigned int b = (rank >> bit) & 1; + ProbabilityCounter::UpdateBit(b, statePredictor[context], F_RANK_MS_TH0, F_RANK_MS_AR0, F_RANK_MS_TH1, F_RANK_MS_AR1); + ProbabilityCounter::UpdateBit(b, charPredictor[context], F_RANK_MC_TH0, F_RANK_MC_AR0, F_RANK_MC_TH1, F_RANK_MC_AR1); + ProbabilityCounter::UpdateBit(b, staticPredictor[context], F_RANK_MP_TH0, F_RANK_MP_AR0, F_RANK_MP_TH1, F_RANK_MP_AR1); + + context += context + b; coder.EncodeBit(b, probability); + } + } + } + else + { + rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank); + + statePredictor = & model->Rank.Escape.StateModel[state][0]; + charPredictor = & model->Rank.Escape.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Escape.StaticModel[0]; + + for (int context = 1, bit = maxRank; bit >= 0; --bit) + { + int probability = (charPredictor[context] * F_RANK_PM_LR0 + statePredictor[context] * F_RANK_PM_LR1 + staticPredictor[context] * F_RANK_PM_LR2) >> 5; + + unsigned int b = (rank >> bit) & 1; + ProbabilityCounter::UpdateBit(b, statePredictor[context], F_RANK_PS_TH0, F_RANK_PS_AR0, F_RANK_PS_TH1, F_RANK_PS_AR1); + ProbabilityCounter::UpdateBit(b, charPredictor[context], F_RANK_PC_TH0, F_RANK_PC_AR0, F_RANK_PC_TH1, F_RANK_PC_AR1); + ProbabilityCounter::UpdateBit(b, staticPredictor[context], F_RANK_PP_TH0, F_RANK_PP_AR0, F_RANK_PP_TH1, F_RANK_PP_AR1); + + context += context + b; coder.EncodeBit(b, probability); + } + } + + avgRank = (avgRank * 124 + rank * 4) >> 7; + rank = rank - 1; + history = runHistory[currentChar]; + state = model_run_state(contextRank0, contextRun, rank, history); + statePredictor = & model->Run.StateModel[state]; + charPredictor = & model->Run.CharModel[currentChar]; + staticPredictor = & model->Run.StaticModel; + + if (runSize == 1) + { + runHistory[currentChar] = (runHistory[currentChar] + 2) >> 2; + + int probability = ((*charPredictor) * F_RUN_TM_LR0 + (*statePredictor) * F_RUN_TM_LR1 + (*staticPredictor) * F_RUN_TM_LR2) >> 5; + + ProbabilityCounter::UpdateBit0(*statePredictor, F_RUN_TS_TH0, F_RUN_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RUN_TC_TH0, F_RUN_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RUN_TP_TH0, F_RUN_TP_AR0); + + coder.EncodeBit0(probability); + } + else + { + { + int probability = ((*charPredictor) * F_RUN_TM_LR0 + (*statePredictor) * F_RUN_TM_LR1 + (*staticPredictor) * F_RUN_TM_LR2) >> 5; + + ProbabilityCounter::UpdateBit1(*statePredictor, F_RUN_TS_TH1, F_RUN_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, F_RUN_TC_TH1, F_RUN_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RUN_TP_TH1, F_RUN_TP_AR1); + + coder.EncodeBit1(probability); + } + + int bitRunSize = bsc_bit_scan_reverse(runSize); runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2; + + statePredictor = & model->Run.Exponent.StateModel[state][0]; + charPredictor = & model->Run.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Run.Exponent.StaticModel[0]; + + for (int bit = 1; bit < bitRunSize; ++bit, ++statePredictor, ++charPredictor, ++staticPredictor) + { + int probability = ((*charPredictor) * F_RUN_EM_LR0 + (*statePredictor) * F_RUN_EM_LR1 + (*staticPredictor) * F_RUN_EM_LR2) >> 5; + + ProbabilityCounter::UpdateBit1(*statePredictor, F_RUN_ES_TH1, F_RUN_ES_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, F_RUN_EC_TH1, F_RUN_EC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RUN_EP_TH1, F_RUN_EP_AR1); + + coder.EncodeBit1(probability); + } + { + int probability = ((*charPredictor) * F_RUN_EM_LR0 + (*statePredictor) * F_RUN_EM_LR1 + (*staticPredictor) * F_RUN_EM_LR2) >> 5; + + ProbabilityCounter::UpdateBit0(*statePredictor, F_RUN_ES_TH0, F_RUN_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RUN_EC_TH0, F_RUN_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RUN_EP_TH0, F_RUN_EP_AR0); + + coder.EncodeBit0(probability); + } + + statePredictor = & model->Run.Mantissa[bitRunSize].StateModel[state][0]; + charPredictor = & model->Run.Mantissa[bitRunSize].CharModel[currentChar][0]; + staticPredictor = & model->Run.Mantissa[bitRunSize].StaticModel[0]; + + for (int context = 1, bit = bitRunSize - 1; bit >= 0; --bit) + { + int probability = (charPredictor[context] * F_RUN_MM_LR0 + statePredictor[context] * F_RUN_MM_LR1 + staticPredictor[context] * F_RUN_MM_LR2) >> 5; + + unsigned int b = (runSize >> bit) & 1; + ProbabilityCounter::UpdateBit(b, statePredictor[context], F_RUN_MS_TH0, F_RUN_MS_AR0, F_RUN_MS_TH1, F_RUN_MS_AR1); + ProbabilityCounter::UpdateBit(b, charPredictor[context], F_RUN_MC_TH0, F_RUN_MC_AR0, F_RUN_MC_TH1, F_RUN_MC_AR1); + ProbabilityCounter::UpdateBit(b, staticPredictor[context], F_RUN_MP_TH0, F_RUN_MP_AR0, F_RUN_MP_TH1, F_RUN_MP_AR1); + + int ctx = context + context + b; context++; if (bitRunSize <= 5) { context = ctx; } coder.EncodeBit(b, probability); + } + } + + contextRank0 = ((contextRank0 << 1) | (rank == 0 ? 1 : 0)) & 0x7; + contextRank4 = ((contextRank4 << 2) | (rank < 3 ? rank : 3)) & 0xff; + contextRun = ((contextRun << 1) | (runSize < 3 ? 1 : 0)) & 0xf; + } + + return coder.FinishEncoder(); +} + +#endif + +#if defined(QLFC_FAST_ENCODE_FUNCTION_NAME) + +int QLFC_FAST_ENCODE_FUNCTION_NAME (const unsigned char * RESTRICT input, unsigned char * RESTRICT output, unsigned char * RESTRICT buffer, int inputSize, int outputSize, QlfcStatisticalModel2 * model) +{ + unsigned char MTFTable[ALPHABET_SIZE]; + + bsc_qlfc_init_model(model); + + unsigned char * RESTRICT ranks = bsc_qlfc_transform(input, buffer, inputSize, MTFTable); + + RangeCoder coder; + + coder.InitEncoder(output, outputSize); + coder.EncodeWord((unsigned int)inputSize); + + unsigned char usedChar[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) usedChar[i] = 0; + + int prevChar = -1; + for (int rank = 0; rank < ALPHABET_SIZE; ++rank) + { + int currentChar = MTFTable[rank]; + + for (int bit = 7; bit >= 0; --bit) + { + bool bit0 = false, bit1 = false; + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == prevChar || usedChar[c] == 0) + { + if ((currentChar >> (bit + 1)) == (c >> (bit + 1))) + { + if (c & (1 << bit)) bit1 = true; else bit0 = true; + if (bit0 && bit1) break; + } + } + } + + if (bit0 && bit1) + { + coder.EncodeBit<1>(currentChar & (1 << bit), 1); + } + } + + if (currentChar == prevChar) + { + break; + } + + prevChar = currentChar; usedChar[currentChar] = 1; + } + + const unsigned char * inputEnd = input + inputSize; + const unsigned char * ranksEnd = buffer + inputSize; + + for (; ranks < ranksEnd; ) + { + if (coder.CheckEOB()) + { + return LIBBSC_NOT_COMPRESSIBLE; + } + + unsigned int currentRank = *ranks++; + unsigned int currentChar = *input; + unsigned int currentRun; + + { + const unsigned char * runStart = input++; + + if (ranks < ranksEnd - 16) + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2 + __m128i v = _mm_set1_epi8(currentChar); + + while (true) + { + int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)input), v)); + if (m != 0xffff) + { + input += bsc_bit_scan_forward((unsigned int)(~m)); + break; + } + + input += 16; + } +#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + unsigned long long v = currentChar; v |= (v << 8); v |= (v << 16); v |= (v << 32); + + while (true) + { + unsigned long long m = (*(unsigned long long const *)input) ^ v; + if (m != 0) + { + input += bsc_bit_scan_forward64(m) / 8; + break; + } + + input += 8; + } +#else + while (*input == currentChar) { input++; } +#endif + } + else + { + while ((input < inputEnd) && (*input == currentChar)) { input++; } + } + + currentRun = (unsigned int)(input - runStart); + } + + { + short * RESTRICT predictor = &model->Rank.Exponent[currentChar][0]; + + if (currentRank == 1) + { + int p = predictor[0]; ProbabilityCounter::UpdateBit<4>(predictor[0], 8016); coder.EncodeBit0<13>(p); + } + else + { + { + int p = predictor[0]; ProbabilityCounter::UpdateBit<4>(predictor[0], 83); coder.EncodeBit1<13>(p); + } + + int bitRankSize = bsc_bit_scan_reverse(currentRank); + + for (int bit = 1; bit < bitRankSize; ++bit) + { + int p = predictor[bit]; ProbabilityCounter::UpdateBit<4>(predictor[bit], 122); coder.EncodeBit1<13>(p); + } + + if (bitRankSize < 7) + { + int p = predictor[bitRankSize]; ProbabilityCounter::UpdateBit<4>(predictor[bitRankSize], 8114); coder.EncodeBit0<13>(p); + } + + predictor = &model->Rank.Mantissa[currentChar][bitRankSize][0]; + + for (int context = 1, bit = bitRankSize - 1; bit >= 0; --bit) + { + unsigned int b = (currentRank >> bit) & 1; + + int p = predictor[context]; ProbabilityCounter::UpdateBit<7>(b, predictor[context], 7999, 235); coder.EncodeBit<13>(b, p); + + context += context + b; + } + } + } + + { + short * RESTRICT predictor = &model->Run.Exponent[currentChar][0]; + + if (currentRun == 1) + { + int p = predictor[0]; ProbabilityCounter::UpdateBit<5>(predictor[0], 2025); coder.EncodeBit0<11>(p); + } + else + { + { + int p = predictor[0]; ProbabilityCounter::UpdateBit<5>(predictor[0], 42); coder.EncodeBit1<11>(p); + } + + int bitRunSize = bsc_bit_scan_reverse(currentRun); + + for (int bit = 1; bit < bitRunSize; ++bit) + { + int p = predictor[bit]; ProbabilityCounter::UpdateBit<4>(predictor[bit], 142); coder.EncodeBit1<11>(p); + } + + { + int p = predictor[bitRunSize]; ProbabilityCounter::UpdateBit<4>(predictor[bitRunSize], 1962); coder.EncodeBit0<11>(p); + } + + predictor = &model->Run.Mantissa[currentChar][bitRunSize][0]; + + if (bitRunSize <= 5) + { + for (int context = 1, bit = bitRunSize - 1; bit >= 0; --bit) + { + unsigned int b = (currentRun >> bit) & 1; + + int p = predictor[context]; ProbabilityCounter::UpdateBit<6>(b, predictor[context], 1951, 147); coder.EncodeBit<11>(b, p); + + context += context + b; + } + } + else + { + for (int context = 1, bit = bitRunSize - 1; bit >= 0; --bit) + { + unsigned int b = (currentRun >> bit) & 1; + + int p = predictor[context]; ProbabilityCounter::UpdateBit<5>(b, predictor[context], 1987, 46); coder.EncodeBit<11>(b, p); + + context += 1; + } + } + } + } + } + + return coder.FinishEncoder(); +} + +#endif + +#if (defined(QLFC_ADAPTIVE_DECODE_FUNCTION_NAME) || defined(QLFC_STATIC_DECODE_FUNCTION_NAME) || defined(QLFC_FAST_DECODE_FUNCTION_NAME)) && (LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE41) + +static const __m128i ALIGNED(64) rank16_shuffle[16] = +{ + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 11, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 12, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 13, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 14, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 15), + _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0), +}; + +#endif + +#if defined(QLFC_ADAPTIVE_DECODE_FUNCTION_NAME) + +int QLFC_ADAPTIVE_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model) +{ + RangeCoder coder; + + unsigned char ALIGNED(64) MTFTable[ALPHABET_SIZE]; + + bsc_qlfc_init_model(model); + + int contextRank0 = 0; + int contextRank4 = 0; + int contextRun = 0; + int maxRank = 7; + int avgRank = 0; + + unsigned char rankHistory[ALPHABET_SIZE], runHistory[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) + { + rankHistory[i] = runHistory[i] = 0; + } + + coder.InitDecoder(input); + int n = (int)coder.DecodeWord(); + + unsigned char usedChar[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) usedChar[i] = 0; + + int prevChar = -1; + for (int rank = 0; rank < ALPHABET_SIZE; ++rank) + { + int currentChar = 0; + + for (int bit = 7; bit >= 0; --bit) + { + bool bit0 = false, bit1 = false; + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == prevChar || usedChar[c] == 0) + { + if (currentChar == (c >> (bit + 1))) + { + if (c & (1 << bit)) bit1 = true; else bit0 = true; + if (bit0 && bit1) break; + } + } + } + + if (bit0 && bit1) + { + currentChar += currentChar + coder.DecodeBit(); + } + else + { + if (bit0) currentChar += currentChar + 0; + if (bit1) currentChar += currentChar + 1; + } + } + + MTFTable[rank] = currentChar; + + if (currentChar == prevChar) + { + maxRank = bsc_bit_scan_reverse(rank - 1); + break; + } + + prevChar = currentChar; usedChar[currentChar] = 1; + } + + for (int i = 0; i < n;) + { + int currentChar = MTFTable[0]; + int history = rankHistory[currentChar]; + int state = model_rank_state(contextRank4, contextRun, history); + + short * RESTRICT statePredictor = & model->Rank.StateModel[state]; + short * RESTRICT charPredictor = & model->Rank.CharModel[currentChar]; + short * RESTRICT staticPredictor = & model->Rank.StaticModel; + ProbabilityMixer * RESTRICT mixer = & model->mixerOfRank[currentChar]; + + int rank = 1; + if (avgRank < 32) + { + if (coder.DecodeBit(mixer->Mixup(*charPredictor, *statePredictor, *staticPredictor))) + { + ProbabilityCounter::UpdateBit1(*statePredictor, M_RANK_TS_TH1, M_RANK_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, M_RANK_TC_TH1, M_RANK_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RANK_TP_TH1, M_RANK_TP_AR1); + mixer->UpdateBit1(M_RANK_TM_LR0, M_RANK_TM_LR1, M_RANK_TM_LR2, M_RANK_TM_TH1, M_RANK_TM_AR1); + + statePredictor = & model->Rank.Exponent.StateModel[state][0]; + charPredictor = & model->Rank.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Exponent.StaticModel[0]; + mixer = & model->mixerOfRankExponent[history < 1 ? 1 : history][1]; + + int bitRankSize = 1; + while (true) + { + if (bitRankSize == maxRank) break; + if (coder.DecodeBit(mixer->Mixup(*charPredictor, *statePredictor, *staticPredictor))) + { + ProbabilityCounter::UpdateBit1(*statePredictor, M_RANK_ES_TH1, M_RANK_ES_AR1); statePredictor++; + ProbabilityCounter::UpdateBit1(*charPredictor, M_RANK_EC_TH1, M_RANK_EC_AR1); charPredictor++; + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RANK_EP_TH1, M_RANK_EP_AR1); staticPredictor++; + mixer->UpdateBit1(M_RANK_EM_LR0, M_RANK_EM_LR1, M_RANK_EM_LR2, M_RANK_EM_TH1, M_RANK_EM_AR1); + bitRankSize++; + mixer = & model->mixerOfRankExponent[history < bitRankSize ? bitRankSize : history][bitRankSize]; + } + else + { + ProbabilityCounter::UpdateBit0(*statePredictor, M_RANK_ES_TH0, M_RANK_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RANK_EC_TH0, M_RANK_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RANK_EP_TH0, M_RANK_EP_AR0); + mixer->UpdateBit0(M_RANK_EM_LR0, M_RANK_EM_LR1, M_RANK_EM_LR2, M_RANK_EM_TH0, M_RANK_EM_AR0); + break; + } + } + + rankHistory[currentChar] = bitRankSize; + + statePredictor = & model->Rank.Mantissa[bitRankSize].StateModel[state][0]; + charPredictor = & model->Rank.Mantissa[bitRankSize].CharModel[currentChar][0]; + staticPredictor = & model->Rank.Mantissa[bitRankSize].StaticModel[0]; + mixer = & model->mixerOfRankMantissa[bitRankSize]; + + for (int bit = bitRankSize - 1; bit >= 0; --bit) + { + if (coder.DecodeBit(mixer->Mixup(charPredictor[rank], statePredictor[rank], staticPredictor[rank]))) + { + ProbabilityCounter::UpdateBit1(statePredictor[rank], M_RANK_MS_TH1, M_RANK_MS_AR1); + ProbabilityCounter::UpdateBit1(charPredictor[rank], M_RANK_MC_TH1, M_RANK_MC_AR1); + ProbabilityCounter::UpdateBit1(staticPredictor[rank], M_RANK_MP_TH1, M_RANK_MP_AR1); + mixer->UpdateBit1(M_RANK_MM_LR0, M_RANK_MM_LR1, M_RANK_MM_LR2, M_RANK_MM_TH1, M_RANK_MM_AR1); + rank += rank + 1; + } + else + { + ProbabilityCounter::UpdateBit0(statePredictor[rank], M_RANK_MS_TH0, M_RANK_MS_AR0); + ProbabilityCounter::UpdateBit0(charPredictor[rank], M_RANK_MC_TH0, M_RANK_MC_AR0); + ProbabilityCounter::UpdateBit0(staticPredictor[rank], M_RANK_MP_TH0, M_RANK_MP_AR0); + mixer->UpdateBit0(M_RANK_MM_LR0, M_RANK_MM_LR1, M_RANK_MM_LR2, M_RANK_MM_TH0, M_RANK_MM_AR0); + rank += rank; + } + } + } + else + { + rankHistory[currentChar] = 0; + ProbabilityCounter::UpdateBit0(*statePredictor, M_RANK_TS_TH0, M_RANK_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RANK_TC_TH0, M_RANK_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RANK_TP_TH0, M_RANK_TP_AR0); + mixer->UpdateBit0(M_RANK_TM_LR0, M_RANK_TM_LR1, M_RANK_TM_LR2, M_RANK_TM_TH0, M_RANK_TM_AR0); + } + } + else + { + statePredictor = & model->Rank.Escape.StateModel[state][0]; + charPredictor = & model->Rank.Escape.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Escape.StaticModel[0]; + + rank = 0; + for (int context = 1, bit = maxRank; bit >= 0; --bit) + { + mixer = & model->mixerOfRankEscape[context]; + + if (coder.DecodeBit(mixer->Mixup(charPredictor[context], statePredictor[context], staticPredictor[context]))) + { + ProbabilityCounter::UpdateBit1(statePredictor[context], M_RANK_PS_TH1, M_RANK_PS_AR1); + ProbabilityCounter::UpdateBit1(charPredictor[context], M_RANK_PC_TH1, M_RANK_PC_AR1); + ProbabilityCounter::UpdateBit1(staticPredictor[context], M_RANK_PP_TH1, M_RANK_PP_AR1); + mixer->UpdateBit1(M_RANK_PM_LR0, M_RANK_PM_LR1, M_RANK_PM_LR2, M_RANK_PM_TH1, M_RANK_PM_AR1); + context += context + 1; rank += rank + 1; + } + else + { + ProbabilityCounter::UpdateBit0(statePredictor[context], M_RANK_PS_TH0, M_RANK_PS_AR0); + ProbabilityCounter::UpdateBit0(charPredictor[context], M_RANK_PC_TH0, M_RANK_PC_AR0); + ProbabilityCounter::UpdateBit0(staticPredictor[context], M_RANK_PP_TH0, M_RANK_PP_AR0); + mixer->UpdateBit0(M_RANK_PM_LR0, M_RANK_PM_LR1, M_RANK_PM_LR2, M_RANK_PM_TH0, M_RANK_PM_AR0); + context += context; rank += rank; + } + } + + rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank); + } + + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE41 + __m128i * MTFTable_p = (__m128i *)&MTFTable[rank & (-16)]; + __m128i r = _mm_load_si128(MTFTable_p); _mm_store_si128(MTFTable_p, _mm_shuffle_epi8(_mm_insert_epi8(r, currentChar, 0), rank16_shuffle[rank & 15])); + + while ((--MTFTable_p) >= (__m128i *)MTFTable) + { + __m128i t = _mm_load_si128(MTFTable_p); _mm_store_si128(MTFTable_p, _mm_alignr_epi8(r, t, 1)); r = t; + } +#else + for (int r = 0; r < rank; ++r) + { + MTFTable[r] = MTFTable[r + 1]; + } + MTFTable[rank] = currentChar; +#endif + } + + avgRank = (avgRank * 124 + rank * 4) >> 7; + rank = rank - 1; + history = runHistory[currentChar]; + state = model_run_state(contextRank0, contextRun, rank, history); + statePredictor = & model->Run.StateModel[state]; + charPredictor = & model->Run.CharModel[currentChar]; + staticPredictor = & model->Run.StaticModel; + mixer = & model->mixerOfRun[currentChar]; + + int runSize = 1; + if (coder.DecodeBit(mixer->Mixup(*charPredictor, *statePredictor, *staticPredictor))) + { + ProbabilityCounter::UpdateBit1(*statePredictor, M_RUN_TS_TH1, M_RUN_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, M_RUN_TC_TH1, M_RUN_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RUN_TP_TH1, M_RUN_TP_AR1); + mixer->UpdateBit1(M_RUN_TM_LR0, M_RUN_TM_LR1, M_RUN_TM_LR2, M_RUN_TM_TH1, M_RUN_TM_AR1); + + statePredictor = & model->Run.Exponent.StateModel[state][0]; + charPredictor = & model->Run.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Run.Exponent.StaticModel[0]; + mixer = & model->mixerOfRunExponent[history < 1 ? 1 : history][1]; + + int bitRunSize = 1; + while (true) + { + if (coder.DecodeBit(mixer->Mixup(*charPredictor, *statePredictor, *staticPredictor))) + { + ProbabilityCounter::UpdateBit1(*statePredictor, M_RUN_ES_TH1, M_RUN_ES_AR1); statePredictor++; + ProbabilityCounter::UpdateBit1(*charPredictor, M_RUN_EC_TH1, M_RUN_EC_AR1); charPredictor++; + ProbabilityCounter::UpdateBit1(*staticPredictor, M_RUN_EP_TH1, M_RUN_EP_AR1); staticPredictor++; + mixer->UpdateBit1(M_RUN_EM_LR0, M_RUN_EM_LR1, M_RUN_EM_LR2, M_RUN_EM_TH1, M_RUN_EM_AR1); + bitRunSize++; mixer = & model->mixerOfRunExponent[history < bitRunSize ? bitRunSize : history][bitRunSize]; + } + else + { + ProbabilityCounter::UpdateBit0(*statePredictor, M_RUN_ES_TH0, M_RUN_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RUN_EC_TH0, M_RUN_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RUN_EP_TH0, M_RUN_EP_AR0); + mixer->UpdateBit0(M_RUN_EM_LR0, M_RUN_EM_LR1, M_RUN_EM_LR2, M_RUN_EM_TH0, M_RUN_EM_AR0); + break; + } + } + + runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2; + + statePredictor = & model->Run.Mantissa[bitRunSize].StateModel[state][0]; + charPredictor = & model->Run.Mantissa[bitRunSize].CharModel[currentChar][0]; + staticPredictor = & model->Run.Mantissa[bitRunSize].StaticModel[0]; + mixer = & model->mixerOfRunMantissa[bitRunSize]; + + for (int context = 1, bit = bitRunSize - 1; bit >= 0; --bit) + { + if (coder.DecodeBit(mixer->Mixup(charPredictor[context], statePredictor[context], staticPredictor[context]))) + { + ProbabilityCounter::UpdateBit1(statePredictor[context], M_RUN_MS_TH1, M_RUN_MS_AR1); + ProbabilityCounter::UpdateBit1(charPredictor[context], M_RUN_MC_TH1, M_RUN_MC_AR1); + ProbabilityCounter::UpdateBit1(staticPredictor[context], M_RUN_MP_TH1, M_RUN_MP_AR1); + mixer->UpdateBit1(M_RUN_MM_LR0, M_RUN_MM_LR1, M_RUN_MM_LR2, M_RUN_MM_TH1, M_RUN_MM_AR1); + runSize += runSize + 1; if (bitRunSize <= 5) context += context + 1; else context++; + } + else + { + ProbabilityCounter::UpdateBit0(statePredictor[context], M_RUN_MS_TH0, M_RUN_MS_AR0); + ProbabilityCounter::UpdateBit0(charPredictor[context], M_RUN_MC_TH0, M_RUN_MC_AR0); + ProbabilityCounter::UpdateBit0(staticPredictor[context], M_RUN_MP_TH0, M_RUN_MP_AR0); + mixer->UpdateBit0(M_RUN_MM_LR0, M_RUN_MM_LR1, M_RUN_MM_LR2, M_RUN_MM_TH0, M_RUN_MM_AR0); + runSize += runSize; if (bitRunSize <= 5) context += context; else context++; + } + } + + } + else + { + runHistory[currentChar] = (runHistory[currentChar] + 2) >> 2; + ProbabilityCounter::UpdateBit0(*statePredictor, M_RUN_TS_TH0, M_RUN_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, M_RUN_TC_TH0, M_RUN_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, M_RUN_TP_TH0, M_RUN_TP_AR0); + mixer->UpdateBit0(M_RUN_TM_LR0, M_RUN_TM_LR1, M_RUN_TM_LR2, M_RUN_TM_TH0, M_RUN_TM_AR0); + } + + contextRank0 = ((contextRank0 << 1) | (rank == 0 ? 1 : 0)) & 0x7; + contextRank4 = ((contextRank4 << 2) | (rank < 3 ? rank : 3)) & 0xff; + contextRun = ((contextRun << 1) | (runSize < 3 ? 1 : 0)) & 0xf; + + for (; runSize > 0; --runSize) output[i++] = currentChar; + } + + return n; +} + +#endif + +#if defined(QLFC_STATIC_DECODE_FUNCTION_NAME) + +int QLFC_STATIC_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned char * output, QlfcStatisticalModel1 * model) +{ + RangeCoder coder; + + unsigned char ALIGNED(64) MTFTable[ALPHABET_SIZE]; + + bsc_qlfc_init_model(model); + + int contextRank0 = 0; + int contextRank4 = 0; + int contextRun = 0; + int maxRank = 7; + int avgRank = 0; + + unsigned char rankHistory[ALPHABET_SIZE], runHistory[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) + { + rankHistory[i] = runHistory[i] = 0; + } + + coder.InitDecoder(input); + int n = (int)coder.DecodeWord(); + + unsigned char usedChar[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) usedChar[i] = 0; + + int prevChar = -1; + for (int rank = 0; rank < ALPHABET_SIZE; ++rank) + { + int currentChar = 0; + + for (int bit = 7; bit >= 0; --bit) + { + bool bit0 = false, bit1 = false; + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == prevChar || usedChar[c] == 0) + { + if (currentChar == (c >> (bit + 1))) + { + if (c & (1 << bit)) bit1 = true; else bit0 = true; + if (bit0 && bit1) break; + } + } + } + + if (bit0 && bit1) + { + currentChar += currentChar + coder.DecodeBit(); + } + else + { + if (bit0) currentChar += currentChar + 0; + if (bit1) currentChar += currentChar + 1; + } + } + + MTFTable[rank] = currentChar; + + if (currentChar == prevChar) + { + maxRank = bsc_bit_scan_reverse(rank - 1); + break; + } + + prevChar = currentChar; usedChar[currentChar] = 1; + } + + for (int i = 0; i < n;) + { + int currentChar = MTFTable[0]; + int history = rankHistory[currentChar]; + int state = model_rank_state(contextRank4, contextRun, history); + + short * RESTRICT statePredictor = & model->Rank.StateModel[state]; + short * RESTRICT charPredictor = & model->Rank.CharModel[currentChar]; + short * RESTRICT staticPredictor = & model->Rank.StaticModel; + + int rank = 1; + if (avgRank < 32) + { + if (coder.DecodeBit((*charPredictor * F_RANK_TM_LR0 + *statePredictor * F_RANK_TM_LR1 + *staticPredictor * F_RANK_TM_LR2) >> 5)) + { + ProbabilityCounter::UpdateBit1(*statePredictor, F_RANK_TS_TH1, F_RANK_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, F_RANK_TC_TH1, F_RANK_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RANK_TP_TH1, F_RANK_TP_AR1); + + statePredictor = & model->Rank.Exponent.StateModel[state][0]; + charPredictor = & model->Rank.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Exponent.StaticModel[0]; + + int bitRankSize = 1; + while (true) + { + if (bitRankSize == maxRank) break; + if (coder.DecodeBit((*charPredictor * F_RANK_EM_LR0 + *statePredictor * F_RANK_EM_LR1 + *staticPredictor * F_RANK_EM_LR2) >> 5)) + { + ProbabilityCounter::UpdateBit1(*statePredictor, F_RANK_ES_TH1, F_RANK_ES_AR1); statePredictor++; + ProbabilityCounter::UpdateBit1(*charPredictor, F_RANK_EC_TH1, F_RANK_EC_AR1); charPredictor++; + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RANK_EP_TH1, F_RANK_EP_AR1); staticPredictor++; + bitRankSize++; + } + else + { + ProbabilityCounter::UpdateBit0(*statePredictor, F_RANK_ES_TH0, F_RANK_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RANK_EC_TH0, F_RANK_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RANK_EP_TH0, F_RANK_EP_AR0); + break; + } + } + + rankHistory[currentChar] = bitRankSize; + + statePredictor = & model->Rank.Mantissa[bitRankSize].StateModel[state][0]; + charPredictor = & model->Rank.Mantissa[bitRankSize].CharModel[currentChar][0]; + staticPredictor = & model->Rank.Mantissa[bitRankSize].StaticModel[0]; + + for (int bit = bitRankSize - 1; bit >= 0; --bit) + { + unsigned int b = (unsigned int)coder.DecodeBit((charPredictor[rank] * F_RANK_MM_LR0 + statePredictor[rank] * F_RANK_MM_LR1 + staticPredictor[rank] * F_RANK_MM_LR2) >> 5); + + ProbabilityCounter::UpdateBit(b, statePredictor[rank], F_RANK_MS_TH0, F_RANK_MS_AR0, F_RANK_MS_TH1, F_RANK_MS_AR1); + ProbabilityCounter::UpdateBit(b, charPredictor[rank], F_RANK_MC_TH0, F_RANK_MC_AR0, F_RANK_MC_TH1, F_RANK_MC_AR1); + ProbabilityCounter::UpdateBit(b, staticPredictor[rank], F_RANK_MP_TH0, F_RANK_MP_AR0, F_RANK_MP_TH1, F_RANK_MP_AR1); + + rank += rank + b; + } + } + else + { + rankHistory[currentChar] = 0; + ProbabilityCounter::UpdateBit0(*statePredictor, F_RANK_TS_TH0, F_RANK_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RANK_TC_TH0, F_RANK_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RANK_TP_TH0, F_RANK_TP_AR0); + } + } + else + { + statePredictor = & model->Rank.Escape.StateModel[state][0]; + charPredictor = & model->Rank.Escape.CharModel[currentChar][0]; + staticPredictor = & model->Rank.Escape.StaticModel[0]; + + rank = 0; + for (int context = 1, bit = maxRank; bit >= 0; --bit) + { + unsigned int b = (unsigned int)coder.DecodeBit((charPredictor[context] * F_RANK_PM_LR0 + statePredictor[context] * F_RANK_PM_LR1 + staticPredictor[context] * F_RANK_PM_LR2) >> 5); + + ProbabilityCounter::UpdateBit(b, statePredictor[context], F_RANK_PS_TH0, F_RANK_PS_AR0, F_RANK_PS_TH1, F_RANK_PS_AR1); + ProbabilityCounter::UpdateBit(b, charPredictor[context], F_RANK_PC_TH0, F_RANK_PC_AR0, F_RANK_PC_TH1, F_RANK_PC_AR1); + ProbabilityCounter::UpdateBit(b, staticPredictor[context], F_RANK_PP_TH0, F_RANK_PP_AR0, F_RANK_PP_TH1, F_RANK_PP_AR1); + + context += context + b; rank += rank + b; + } + + rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank); + } + + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE41 + __m128i * MTFTable_p = (__m128i *)&MTFTable[rank & (-16)]; + __m128i r = _mm_load_si128(MTFTable_p); _mm_store_si128(MTFTable_p, _mm_shuffle_epi8(_mm_insert_epi8(r, currentChar, 0), rank16_shuffle[rank & 15])); + + while ((--MTFTable_p) >= (__m128i *)MTFTable) + { + __m128i t = _mm_load_si128(MTFTable_p); _mm_store_si128(MTFTable_p, _mm_alignr_epi8(r, t, 1)); r = t; + } +#else + for (int r = 0; r < rank; ++r) + { + MTFTable[r] = MTFTable[r + 1]; + } + MTFTable[rank] = currentChar; +#endif + } + + avgRank = (avgRank * 124 + rank * 4) >> 7; + rank = rank - 1; + history = runHistory[currentChar]; + state = model_run_state(contextRank0, contextRun, rank, history); + statePredictor = & model->Run.StateModel[state]; + charPredictor = & model->Run.CharModel[currentChar]; + staticPredictor = & model->Run.StaticModel; + + int runSize = 1; + if (coder.DecodeBit((*charPredictor * F_RUN_TM_LR0 + *statePredictor * F_RUN_TM_LR1 + *staticPredictor * F_RUN_TM_LR2) >> 5)) + { + ProbabilityCounter::UpdateBit1(*statePredictor, F_RUN_TS_TH1, F_RUN_TS_AR1); + ProbabilityCounter::UpdateBit1(*charPredictor, F_RUN_TC_TH1, F_RUN_TC_AR1); + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RUN_TP_TH1, F_RUN_TP_AR1); + + statePredictor = & model->Run.Exponent.StateModel[state][0]; + charPredictor = & model->Run.Exponent.CharModel[currentChar][0]; + staticPredictor = & model->Run.Exponent.StaticModel[0]; + + int bitRunSize = 1; + while (true) + { + if (coder.DecodeBit((*charPredictor * F_RUN_EM_LR0 + *statePredictor * F_RUN_EM_LR1 + *staticPredictor * F_RUN_EM_LR2) >> 5)) + { + ProbabilityCounter::UpdateBit1(*statePredictor, F_RUN_ES_TH1, F_RUN_ES_AR1); statePredictor++; + ProbabilityCounter::UpdateBit1(*charPredictor, F_RUN_EC_TH1, F_RUN_EC_AR1); charPredictor++; + ProbabilityCounter::UpdateBit1(*staticPredictor, F_RUN_EP_TH1, F_RUN_EP_AR1); staticPredictor++; + bitRunSize++; + } + else + { + ProbabilityCounter::UpdateBit0(*statePredictor, F_RUN_ES_TH0, F_RUN_ES_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RUN_EC_TH0, F_RUN_EC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RUN_EP_TH0, F_RUN_EP_AR0); + break; + } + } + + runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2; + + statePredictor = & model->Run.Mantissa[bitRunSize].StateModel[state][0]; + charPredictor = & model->Run.Mantissa[bitRunSize].CharModel[currentChar][0]; + staticPredictor = & model->Run.Mantissa[bitRunSize].StaticModel[0]; + + for (int context = 1, bit = bitRunSize - 1; bit >= 0; --bit) + { + unsigned int b = (unsigned int)coder.DecodeBit((charPredictor[context] * F_RUN_MM_LR0 + statePredictor[context] * F_RUN_MM_LR1 + staticPredictor[context] * F_RUN_MM_LR2) >> 5); + + ProbabilityCounter::UpdateBit(b, statePredictor[context], F_RUN_MS_TH0, F_RUN_MS_AR0, F_RUN_MS_TH1, F_RUN_MS_AR1); + ProbabilityCounter::UpdateBit(b, charPredictor[context], F_RUN_MC_TH0, F_RUN_MC_AR0, F_RUN_MC_TH1, F_RUN_MC_AR1); + ProbabilityCounter::UpdateBit(b, staticPredictor[context], F_RUN_MP_TH0, F_RUN_MP_AR0, F_RUN_MP_TH1, F_RUN_MP_AR1); + + runSize += runSize + b; int ctx = context + context + b; context++; if (bitRunSize <= 5) { context = ctx; } + } + } + else + { + runHistory[currentChar] = (runHistory[currentChar] + 2) >> 2; + ProbabilityCounter::UpdateBit0(*statePredictor, F_RUN_TS_TH0, F_RUN_TS_AR0); + ProbabilityCounter::UpdateBit0(*charPredictor, F_RUN_TC_TH0, F_RUN_TC_AR0); + ProbabilityCounter::UpdateBit0(*staticPredictor, F_RUN_TP_TH0, F_RUN_TP_AR0); + } + + contextRank0 = ((contextRank0 << 1) | (rank == 0 ? 1 : 0)) & 0x7; + contextRank4 = ((contextRank4 << 2) | (rank < 3 ? rank : 3)) & 0xff; + contextRun = ((contextRun << 1) | (runSize < 3 ? 1 : 0)) & 0xf; + + for (; runSize > 0; --runSize) output[i++] = currentChar; + } + + return n; +} + +#endif + +#if defined(QLFC_STATIC_DECODE_FUNCTION_NAME) + +int QLFC_FAST_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned char * output, QlfcStatisticalModel2 * model) +{ + unsigned char ALIGNED(64) MTFTable[ALPHABET_SIZE]; + + bsc_qlfc_init_model(model); + + RangeCoder coder; + coder.InitDecoder(input); + + int n = (int)coder.DecodeWord(); + + unsigned char usedChar[ALPHABET_SIZE]; + for (int i = 0; i < ALPHABET_SIZE; ++i) usedChar[i] = 0; + + int prevChar = -1; + for (int rank = 0; rank < ALPHABET_SIZE; ++rank) + { + int currentChar = 0; + + for (int bit = 7; bit >= 0; --bit) + { + bool bit0 = false, bit1 = false; + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + if (c == prevChar || usedChar[c] == 0) + { + if (currentChar == (c >> (bit + 1))) + { + if (c & (1 << bit)) bit1 = true; else bit0 = true; + if (bit0 && bit1) break; + } + } + } + + if (bit0 && bit1) + { + currentChar += currentChar + coder.DecodeBit<1>(1); + } + else + { + if (bit0) currentChar += currentChar + 0; + if (bit1) currentChar += currentChar + 1; + } + } + + MTFTable[rank] = currentChar; + + if (currentChar == prevChar) + { + break; + } + + prevChar = currentChar; usedChar[currentChar] = 1; + } + + const unsigned char * outputEnd = output + n; + + for (; output < outputEnd; ) + { + unsigned int currentChar = MTFTable[0]; + + { + short * RESTRICT predictor = &model->Rank.Exponent[currentChar][0]; + + int p = predictor[0]; + if (coder.PeakBit<13>(p)) + { + ProbabilityCounter::UpdateBit<4>(predictor[0], 83); + coder.DecodeBit1<13>(p); + + int bitRankSize = 1; + while (bitRankSize < 7) + { + p = predictor[bitRankSize]; + if (coder.PeakBit<13>(p)) + { + ProbabilityCounter::UpdateBit<4>(predictor[bitRankSize], 122); + bitRankSize++; + coder.DecodeBit1<13>(p); + } + else + { + ProbabilityCounter::UpdateBit<4>(predictor[bitRankSize], 8114); + coder.DecodeBit0<13>(p); + break; + } + } + + predictor = & model->Rank.Mantissa[currentChar][bitRankSize][0]; + + unsigned int rank = 1; + while (--bitRankSize >= 0) + { + unsigned int b = coder.DecodeBit<13>(predictor[rank]); + ProbabilityCounter::UpdateBit<7>(b, predictor[rank], 7999, 235); + rank += rank + b; + } + + { +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE41 + __m128i * MTFTable_p = (__m128i *)&MTFTable[rank & (-16)]; + __m128i r = _mm_load_si128(MTFTable_p); _mm_store_si128(MTFTable_p, _mm_shuffle_epi8(_mm_insert_epi8(r, currentChar, 0), rank16_shuffle[rank & 15])); + + while ((--MTFTable_p) >= (__m128i *)MTFTable) + { + __m128i t = _mm_load_si128(MTFTable_p); _mm_store_si128(MTFTable_p, _mm_alignr_epi8(r, t, 1)); r = t; + } +#else + for (unsigned int r = 0; r < rank; ++r) + { + MTFTable[r] = MTFTable[r + 1]; + } + + MTFTable[rank] = currentChar; +#endif + } + } + else + { + MTFTable[0] = MTFTable[1]; MTFTable[1] = currentChar; ProbabilityCounter::UpdateBit<4>(predictor[0], 8016); coder.DecodeBit0<13>(p); + } + } + + { + short * RESTRICT predictor = &model->Run.Exponent[currentChar][0]; + + int p = predictor[0]; + if (coder.PeakBit<11>(p)) + { + ProbabilityCounter::UpdateBit<5>(predictor[0], 42); + coder.DecodeBit1<11>(p); + + int bitRunSize = 1; + while (true) + { + p = predictor[bitRunSize]; + if (coder.PeakBit<11>(p)) + { + ProbabilityCounter::UpdateBit<4>(predictor[bitRunSize], 142); + bitRunSize++; + coder.DecodeBit1<11>(p); + } + else + { + ProbabilityCounter::UpdateBit<4>(predictor[bitRunSize], 1962); + coder.DecodeBit0<11>(p); + break; + } + } + + predictor = &model->Run.Mantissa[currentChar][bitRunSize][0]; + + if (bitRunSize <= 5) + { + unsigned int runSize = 1; + while (--bitRunSize >= 0) + { + unsigned int b = coder.DecodeBit<11>(predictor[runSize]); + ProbabilityCounter::UpdateBit<6>(b, predictor[runSize], 1951, 147); + runSize += runSize + b; + } + + for (; runSize > 0; --runSize) { *output++ = currentChar; } + } + else + { + unsigned int runSize = 1; + for (int context = 1; context <= bitRunSize; ++context) + { + unsigned int b = coder.DecodeBit<11>(predictor[context]); + ProbabilityCounter::UpdateBit<5>(b, predictor[context], 1987, 46); + runSize += runSize + b; + } + + for (; runSize > 0; --runSize) { *output++ = currentChar; } + } + } + else + { + *output++ = currentChar; ProbabilityCounter::UpdateBit<5>(predictor[0], 2025); coder.DecodeBit0<11>(p); + } + } + } + + return n; +} + +#endif + +#if !defined(LIBBSC_DYNAMIC_CPU_DISPATCH) || LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE2 + +int bsc_qlfc_init(int features) +{ + return bsc_qlfc_init_static_model(); +} + +int bsc_qlfc_static_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize) +{ + if (QlfcStatisticalModel1 * model = (QlfcStatisticalModel1 *)bsc_malloc(sizeof(QlfcStatisticalModel1))) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(inputSize * sizeof(unsigned char))) + { + int result = bsc_qlfc_static_encode(input, output, buffer, inputSize, outputSize, model); + + bsc_free(buffer); bsc_free(model); + + return result; + }; + bsc_free(model); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_qlfc_adaptive_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize) +{ + if (QlfcStatisticalModel1 * model = (QlfcStatisticalModel1 *)bsc_malloc(sizeof(QlfcStatisticalModel1))) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(inputSize * sizeof(unsigned char))) + { + int result = bsc_qlfc_adaptive_encode(input, output, buffer, inputSize, outputSize, model); + + bsc_free(buffer); bsc_free(model); + + return result; + }; + bsc_free(model); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_qlfc_fast_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize) +{ + if (QlfcStatisticalModel2 * model = (QlfcStatisticalModel2 *)bsc_malloc(sizeof(QlfcStatisticalModel2))) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(inputSize * sizeof(unsigned char))) + { + int result = bsc_qlfc_fast_encode(input, output, buffer, inputSize, outputSize, model); + + bsc_free(buffer); bsc_free(model); + + return result; + }; + bsc_free(model); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_qlfc_static_decode_block(const unsigned char * input, unsigned char * output) +{ + if (QlfcStatisticalModel1 * model = (QlfcStatisticalModel1 *)bsc_malloc(sizeof(QlfcStatisticalModel1))) + { + int result = bsc_qlfc_static_decode(input, output, model); + + bsc_free(model); + + return result; + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_qlfc_adaptive_decode_block(const unsigned char * input, unsigned char * output) +{ + if (QlfcStatisticalModel1 * model = (QlfcStatisticalModel1 *)bsc_malloc(sizeof(QlfcStatisticalModel1))) + { + int result = bsc_qlfc_adaptive_decode(input, output, model); + + bsc_free(model); + + return result; + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_qlfc_fast_decode_block(const unsigned char * input, unsigned char * output) +{ + if (QlfcStatisticalModel2 * model = (QlfcStatisticalModel2 *)bsc_malloc(sizeof(QlfcStatisticalModel2))) + { + int result = bsc_qlfc_fast_decode(input, output, model); + + bsc_free(model); + + return result; + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +#endif + +/*-----------------------------------------------------------*/ +/* End qlfc.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/qlfc/qlfc.h b/libbsc/libbsc/coder/qlfc/qlfc.h new file mode 100644 index 00000000..b6f2714c --- /dev/null +++ b/libbsc/libbsc/coder/qlfc/qlfc.h @@ -0,0 +1,109 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to Quantized Local Frequency Coding functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_QLFC_H +#define _LIBBSC_QLFC_H + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * You should call this function before you call any of the other functions in qlfc. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_qlfc_init(int features); + + /** + * Compress a memory block using Quantized Local Frequency Coding algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @param inputSize - the length of the input memory block. + * @param outputSize - the length of the output memory block. + * @return the length of compressed memory block if no error occurred, error code otherwise. + */ + int bsc_qlfc_static_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize); + + /** + * Decompress a memory block using Quantized Local Frequency Coding algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @param inputSize - the length of the input memory block. + * @param outputSize - the length of the output memory block. + * @return the length of decompressed memory block if no error occurred, error code otherwise. + */ + int bsc_qlfc_adaptive_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize); + + /** + * Decompress a memory block using Quantized Local Frequency Coding algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @param inputSize - the length of the input memory block. + * @param outputSize - the length of the output memory block. + * @return the length of decompressed memory block if no error occurred, error code otherwise. + */ + int bsc_qlfc_fast_encode_block(const unsigned char * input, unsigned char * output, int inputSize, int outputSize); + + /** + * Compress a memory block using Quantized Local Frequency Coding algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @return the length of compressed memory block if no error occurred, error code otherwise. + */ + int bsc_qlfc_static_decode_block(const unsigned char * input, unsigned char * output); + + /** + * Decompress a memory block using Quantized Local Frequency Coding algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @return the length of decompressed memory block if no error occurred, error code otherwise. + */ + int bsc_qlfc_adaptive_decode_block(const unsigned char * input, unsigned char * output); + + /** + * Decompress a memory block using Quantized Local Frequency Coding algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @return the length of decompressed memory block if no error occurred, error code otherwise. + */ + int bsc_qlfc_fast_decode_block(const unsigned char * input, unsigned char * output); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End qlfc.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/qlfc/qlfc_model.cpp b/libbsc/libbsc/coder/qlfc/qlfc_model.cpp new file mode 100644 index 00000000..78fc09a7 --- /dev/null +++ b/libbsc/libbsc/coder/qlfc/qlfc_model.cpp @@ -0,0 +1,91 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Statistical data compression model for QLFC */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include + +#include "qlfc_model.h" + +#include "../../libbsc.h" +#include "../../platform/platform.h" + +QlfcStatisticalModel1 g_QlfcStatisticalModel1; +QlfcStatisticalModel2 g_QlfcStatisticalModel2; + +void bsc_qlfc_memset(void * dst, int size, short v) +{ + for (int i = 0; i < size / 2; ++i) ((short *)dst)[i] = v; +} + +int bsc_qlfc_init_static_model() +{ + for (int mixer = 0; mixer < ALPHABET_SIZE; ++mixer) + { + g_QlfcStatisticalModel1.mixerOfRank[mixer].Init(); + g_QlfcStatisticalModel1.mixerOfRankEscape[mixer].Init(); + g_QlfcStatisticalModel1.mixerOfRun[mixer].Init(); + } + for (int bit = 0; bit < 8; ++bit) + { + g_QlfcStatisticalModel1.mixerOfRankMantissa[bit].Init(); + for (int context = 0; context < 8; ++context) + g_QlfcStatisticalModel1.mixerOfRankExponent[context][bit].Init(); + } + for (int bit = 0; bit < 32; ++bit) + { + g_QlfcStatisticalModel1.mixerOfRunMantissa[bit].Init(); + for (int context = 0; context < 32; ++context) + g_QlfcStatisticalModel1.mixerOfRunExponent[context][bit].Init(); + } + + bsc_qlfc_memset(&g_QlfcStatisticalModel1.Rank, sizeof(g_QlfcStatisticalModel1.Rank), 2048); + bsc_qlfc_memset(&g_QlfcStatisticalModel1.Run, sizeof(g_QlfcStatisticalModel1.Run), 2048); + + bsc_qlfc_memset(&g_QlfcStatisticalModel2.Rank, sizeof(g_QlfcStatisticalModel2.Rank), 4096); + bsc_qlfc_memset(&g_QlfcStatisticalModel2.Run, sizeof(g_QlfcStatisticalModel2.Run), 1024); + + return LIBBSC_NO_ERROR; +} + +void bsc_qlfc_init_model(QlfcStatisticalModel1 * model) +{ + memcpy(model, &g_QlfcStatisticalModel1, sizeof(QlfcStatisticalModel1)); +} + +void bsc_qlfc_init_model(QlfcStatisticalModel2 * model) +{ + memcpy(model, &g_QlfcStatisticalModel2, sizeof(QlfcStatisticalModel2)); +} + +/*-----------------------------------------------------------*/ +/* End qlfc_model.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/coder/qlfc/qlfc_model.h b/libbsc/libbsc/coder/qlfc/qlfc_model.h new file mode 100644 index 00000000..110ae058 --- /dev/null +++ b/libbsc/libbsc/coder/qlfc/qlfc_model.h @@ -0,0 +1,269 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Statistical data compression model for QLFC */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_QLFC_MODEL_H +#define _LIBBSC_QLFC_MODEL_H + +#include "../common/predictor.h" + +const int M_RANK_TS_TH0 = 1; const int M_RANK_TS_AR0 = 57; +const int M_RANK_TS_TH1 = -111; const int M_RANK_TS_AR1 = 31; +const int M_RANK_TC_TH0 = 291; const int M_RANK_TC_AR0 = 250; +const int M_RANK_TC_TH1 = 154; const int M_RANK_TC_AR1 = 528; +const int M_RANK_TP_TH0 = 375; const int M_RANK_TP_AR0 = 163; +const int M_RANK_TP_TH1 = 313; const int M_RANK_TP_AR1 = 639; +const int M_RANK_TM_TH0 = -41; const int M_RANK_TM_AR0 = 96; +const int M_RANK_TM_TH1 = 53; const int M_RANK_TM_AR1 = 49; +const int M_RANK_TM_LR0 = 20; const int M_RANK_TM_LR1 = 47; +const int M_RANK_TM_LR2 = 27; + +const int M_RANK_ES_TH0 = -137; const int M_RANK_ES_AR0 = 17; +const int M_RANK_ES_TH1 = 482; const int M_RANK_ES_AR1 = 40; +const int M_RANK_EC_TH0 = 61; const int M_RANK_EC_AR0 = 192; +const int M_RANK_EC_TH1 = 200; const int M_RANK_EC_AR1 = 133; +const int M_RANK_EP_TH0 = 54; const int M_RANK_EP_AR0 = 1342; +const int M_RANK_EP_TH1 = 578; const int M_RANK_EP_AR1 = 1067; +const int M_RANK_EM_TH0 = -11; const int M_RANK_EM_AR0 = 318; +const int M_RANK_EM_TH1 = 144; const int M_RANK_EM_AR1 = 848; +const int M_RANK_EM_LR0 = 49; const int M_RANK_EM_LR1 = 41; +const int M_RANK_EM_LR2 = 40; + +const int M_RANK_MS_TH0 = -145; const int M_RANK_MS_AR0 = 18; +const int M_RANK_MS_TH1 = 114; const int M_RANK_MS_AR1 = 24; +const int M_RANK_MC_TH0 = -43; const int M_RANK_MC_AR0 = 69; +const int M_RANK_MC_TH1 = -36; const int M_RANK_MC_AR1 = 78; +const int M_RANK_MP_TH0 = -2; const int M_RANK_MP_AR0 = 1119; +const int M_RANK_MP_TH1 = 11; const int M_RANK_MP_AR1 = 1181; +const int M_RANK_MM_TH0 = -203; const int M_RANK_MM_AR0 = 20; +const int M_RANK_MM_TH1 = -271; const int M_RANK_MM_AR1 = 15; +const int M_RANK_MM_LR0 = 263; const int M_RANK_MM_LR1 = 175; +const int M_RANK_MM_LR2 = 17; + +const int M_RANK_PS_TH0 = -99; const int M_RANK_PS_AR0 = 32; +const int M_RANK_PS_TH1 = 318; const int M_RANK_PS_AR1 = 42; +const int M_RANK_PC_TH0 = 17; const int M_RANK_PC_AR0 = 101; +const int M_RANK_PC_TH1 = 1116; const int M_RANK_PC_AR1 = 246; +const int M_RANK_PP_TH0 = 22; const int M_RANK_PP_AR0 = 964; +const int M_RANK_PP_TH1 = -2; const int M_RANK_PP_AR1 = 1110; +const int M_RANK_PM_TH0 = -194; const int M_RANK_PM_AR0 = 21; +const int M_RANK_PM_TH1 = -129; const int M_RANK_PM_AR1 = 20; +const int M_RANK_PM_LR0 = 480; const int M_RANK_PM_LR1 = 202; +const int M_RANK_PM_LR2 = 17; + +const int M_RUN_TS_TH0 = -93; const int M_RUN_TS_AR0 = 34; +const int M_RUN_TS_TH1 = -4; const int M_RUN_TS_AR1 = 51; +const int M_RUN_TC_TH0 = 139; const int M_RUN_TC_AR0 = 423; +const int M_RUN_TC_TH1 = 244; const int M_RUN_TC_AR1 = 162; +const int M_RUN_TP_TH0 = 275; const int M_RUN_TP_AR0 = 450; +const int M_RUN_TP_TH1 = -6; const int M_RUN_TP_AR1 = 579; +const int M_RUN_TM_TH0 = -68; const int M_RUN_TM_AR0 = 25; +const int M_RUN_TM_TH1 = 1; const int M_RUN_TM_AR1 = 64; +const int M_RUN_TM_LR0 = 15; const int M_RUN_TM_LR1 = 50; +const int M_RUN_TM_LR2 = 78; + +const int M_RUN_ES_TH0 = -116; const int M_RUN_ES_AR0 = 31; +const int M_RUN_ES_TH1 = 43; const int M_RUN_ES_AR1 = 45; +const int M_RUN_EC_TH0 = 165; const int M_RUN_EC_AR0 = 222; +const int M_RUN_EC_TH1 = 30; const int M_RUN_EC_AR1 = 324; +const int M_RUN_EP_TH0 = 315; const int M_RUN_EP_AR0 = 857; +const int M_RUN_EP_TH1 = 109; const int M_RUN_EP_AR1 = 867; +const int M_RUN_EM_TH0 = -14; const int M_RUN_EM_AR0 = 215; +const int M_RUN_EM_TH1 = 61; const int M_RUN_EM_AR1 = 73; +const int M_RUN_EM_LR0 = 35; const int M_RUN_EM_LR1 = 37; +const int M_RUN_EM_LR2 = 42; + +const int M_RUN_MS_TH0 = -176; const int M_RUN_MS_AR0 = 14; +const int M_RUN_MS_TH1 = -141; const int M_RUN_MS_AR1 = 21; +const int M_RUN_MC_TH0 = 84; const int M_RUN_MC_AR0 = 172; +const int M_RUN_MC_TH1 = 37; const int M_RUN_MC_AR1 = 263; +const int M_RUN_MP_TH0 = 2; const int M_RUN_MP_AR0 = 15; +const int M_RUN_MP_TH1 = -197; const int M_RUN_MP_AR1 = 20; +const int M_RUN_MM_TH0 = -27; const int M_RUN_MM_AR0 = 142; +const int M_RUN_MM_TH1 = -146; const int M_RUN_MM_AR1 = 27; +const int M_RUN_MM_LR0 = 51; const int M_RUN_MM_LR1 = 44; +const int M_RUN_MM_LR2 = 80; + +const int F_RANK_TS_TH0 = -116; const int F_RANK_TS_AR0 = 33; +const int F_RANK_TS_TH1 = -78; const int F_RANK_TS_AR1 = 34; +const int F_RANK_TC_TH0 = -2; const int F_RANK_TC_AR0 = 282; +const int F_RANK_TC_TH1 = 12; const int F_RANK_TC_AR1 = 274; +const int F_RANK_TP_TH0 = 4; const int F_RANK_TP_AR0 = 697; +const int F_RANK_TP_TH1 = 55; const int F_RANK_TP_AR1 = 1185; +const int F_RANK_TM_LR0 = 17; const int F_RANK_TM_LR1 = 14; +const int F_RANK_TM_LR2 = 1; + +const int F_RANK_ES_TH0 = -177; const int F_RANK_ES_AR0 = 23; +const int F_RANK_ES_TH1 = -370; const int F_RANK_ES_AR1 = 11; +const int F_RANK_EC_TH0 = -14; const int F_RANK_EC_AR0 = 271; +const int F_RANK_EC_TH1 = 3; const int F_RANK_EC_AR1 = 308; +const int F_RANK_EP_TH0 = -3; const int F_RANK_EP_AR0 = 788; +const int F_RANK_EP_TH1 = 135; const int F_RANK_EP_AR1 = 1364; +const int F_RANK_EM_LR0 = 22; const int F_RANK_EM_LR1 = 6; +const int F_RANK_EM_LR2 = 4; + +const int F_RANK_MS_TH0 = -254; const int F_RANK_MS_AR0 = 16; +const int F_RANK_MS_TH1 = -177; const int F_RANK_MS_AR1 = 20; +const int F_RANK_MC_TH0 = -55; const int F_RANK_MC_AR0 = 73; +const int F_RANK_MC_TH1 = -54; const int F_RANK_MC_AR1 = 74; +const int F_RANK_MP_TH0 = -6; const int F_RANK_MP_AR0 = 575; +const int F_RANK_MP_TH1 = 1670; const int F_RANK_MP_AR1 = 1173; +const int F_RANK_MM_LR0 = 15; const int F_RANK_MM_LR1 = 10; +const int F_RANK_MM_LR2 = 7; + +const int F_RANK_PS_TH0 = -126; const int F_RANK_PS_AR0 = 32; +const int F_RANK_PS_TH1 = -126; const int F_RANK_PS_AR1 = 32; +const int F_RANK_PC_TH0 = -33; const int F_RANK_PC_AR0 = 120; +const int F_RANK_PC_TH1 = -25; const int F_RANK_PC_AR1 = 157; +const int F_RANK_PP_TH0 = -6; const int F_RANK_PP_AR0 = 585; +const int F_RANK_PP_TH1 = 150; const int F_RANK_PP_AR1 = 275; +const int F_RANK_PM_LR0 = 16; const int F_RANK_PM_LR1 = 11; +const int F_RANK_PM_LR2 = 5; + +const int F_RUN_TS_TH0 = -68; const int F_RUN_TS_AR0 = 38; +const int F_RUN_TS_TH1 = -112; const int F_RUN_TS_AR1 = 36; +const int F_RUN_TC_TH0 = -4; const int F_RUN_TC_AR0 = 221; +const int F_RUN_TC_TH1 = -13; const int F_RUN_TC_AR1 = 231; +const int F_RUN_TP_TH0 = 0; const int F_RUN_TP_AR0 = 0; +const int F_RUN_TP_TH1 = 0; const int F_RUN_TP_AR1 = 0; +const int F_RUN_TM_LR0 = 14; const int F_RUN_TM_LR1 = 18; +const int F_RUN_TM_LR2 = 0; + +const int F_RUN_ES_TH0 = -90; const int F_RUN_ES_AR0 = 45; +const int F_RUN_ES_TH1 = -92; const int F_RUN_ES_AR1 = 44; +const int F_RUN_EC_TH0 = -3; const int F_RUN_EC_AR0 = 325; +const int F_RUN_EC_TH1 = -11; const int F_RUN_EC_AR1 = 341; +const int F_RUN_EP_TH0 = 24; const int F_RUN_EP_AR0 = 887; +const int F_RUN_EP_TH1 = -4; const int F_RUN_EP_AR1 = 765; +const int F_RUN_EM_LR0 = 14; const int F_RUN_EM_LR1 = 15; +const int F_RUN_EM_LR2 = 3; + +const int F_RUN_MS_TH0 = -275; const int F_RUN_MS_AR0 = 14; +const int F_RUN_MS_TH1 = -185; const int F_RUN_MS_AR1 = 22; +const int F_RUN_MC_TH0 = -18; const int F_RUN_MC_AR0 = 191; +const int F_RUN_MC_TH1 = -15; const int F_RUN_MC_AR1 = 241; +const int F_RUN_MP_TH0 = -73; const int F_RUN_MP_AR0 = 54; +const int F_RUN_MP_TH1 = -214; const int F_RUN_MP_AR1 = 19; +const int F_RUN_MM_LR0 = 7; const int F_RUN_MM_LR1 = 15; +const int F_RUN_MM_LR2 = 10; + +struct QlfcStatisticalModel1 +{ + +public: + + struct Rank + { + short StaticModel; + short StateModel[ALPHABET_SIZE]; + short CharModel[ALPHABET_SIZE]; + + struct Exponent + { + short StaticModel[8]; + short StateModel[ALPHABET_SIZE][8]; + short CharModel[ALPHABET_SIZE][8]; + } Exponent; + + struct Mantissa + { + short StaticModel[ALPHABET_SIZE]; + short StateModel[ALPHABET_SIZE][ALPHABET_SIZE]; + short CharModel[ALPHABET_SIZE][ALPHABET_SIZE]; + } Mantissa[8]; + + struct Escape + { + short StaticModel[ALPHABET_SIZE]; + short StateModel[ALPHABET_SIZE][ALPHABET_SIZE]; + short CharModel[ALPHABET_SIZE][ALPHABET_SIZE]; + } Escape; + + } Rank; + + struct Run + { + short StaticModel; + short StateModel[ALPHABET_SIZE]; + short CharModel[ALPHABET_SIZE]; + + struct Exponent + { + short StaticModel[32]; + short StateModel[ALPHABET_SIZE][32]; + short CharModel[ALPHABET_SIZE][32]; + } Exponent; + + struct Mantissa + { + short StaticModel[32]; + short StateModel[ALPHABET_SIZE][32]; + short CharModel[ALPHABET_SIZE][32]; + } Mantissa[32]; + + } Run; + + ProbabilityMixer mixerOfRank[ALPHABET_SIZE]; + ProbabilityMixer mixerOfRankExponent[8][8]; + ProbabilityMixer mixerOfRankMantissa[8]; + ProbabilityMixer mixerOfRankEscape[ALPHABET_SIZE]; + ProbabilityMixer mixerOfRun[ALPHABET_SIZE]; + ProbabilityMixer mixerOfRunExponent[32][32]; + ProbabilityMixer mixerOfRunMantissa[32]; +}; + +struct QlfcStatisticalModel2 +{ + +public: + + struct Rank + { + short Exponent[ALPHABET_SIZE][8]; + short Mantissa[ALPHABET_SIZE][8][ALPHABET_SIZE]; + } Rank; + + struct Run + { + short Exponent[ALPHABET_SIZE][32]; + short Mantissa[ALPHABET_SIZE][32][32]; + } Run; +}; + +int bsc_qlfc_init_static_model(); +void bsc_qlfc_init_model(QlfcStatisticalModel1 * model); +void bsc_qlfc_init_model(QlfcStatisticalModel2 * model); + +#endif + +/*-----------------------------------------------------------*/ +/* End qlfc_model.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/filters.h b/libbsc/libbsc/filters.h new file mode 100644 index 00000000..bf732dca --- /dev/null +++ b/libbsc/libbsc/filters.h @@ -0,0 +1,109 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to data preprocessing filters */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_FILTERS_H +#define _LIBBSC_FILTERS_H + +#define LIBBSC_CONTEXTS_FOLLOWING 1 +#define LIBBSC_CONTEXTS_PRECEDING 2 + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * Autodetects segments for better compression of heterogeneous files. + * @param input - the input memory block of n bytes. + * @param n - the length of the input memory block. + * @param segments - the output array of segments of k elements size. + * @param k - the size of the output segments array. + * @param features - the set of additional features. + * @return The number of segments if no error occurred, error code otherwise. + */ + int bsc_detect_segments(const unsigned char * input, int n, int * segments, int k, int features); + + /** + * Autodetects order of contexts for better compression of binary files. + * @param input - the input memory block of n bytes. + * @param n - the length of the input memory block. + * @param features - the set of additional features. + * @return The detected contexts order if no error occurred, error code otherwise. + */ + int bsc_detect_contextsorder(const unsigned char * input, int n, int features); + + /** + * Reverses memory block to change order of contexts. + * @param T - the input/output memory block of n bytes. + * @param n - the length of the memory block. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_reverse_block(unsigned char * T, int n, int features); + + /** + * Autodetects record size for better compression of multimedia files. + * @param input - the input memory block of n bytes. + * @param n - the length of the input memory block. + * @param features - the set of additional features. + * @return The size of record if no error occurred, error code otherwise. + */ + int bsc_detect_recordsize(const unsigned char * input, int n, int features); + + /** + * Reorders memory block for specific size of record (Forward transform). + * @param T - the input/output memory block of n bytes. + * @param n - the length of the memory block. + * @param recordSize - the size of record. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_reorder_forward(unsigned char * T, int n, int recordSize, int features); + + /** + * Reorders memory block for specific size of record (Reverse transform). + * @param T - the input/output memory block of n bytes. + * @param n - the length of the memory block. + * @param recordSize - the size of record. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_reorder_reverse(unsigned char * T, int n, int recordSize, int features); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-------------------------------------------------*/ +/* End filters.h */ +/*-------------------------------------------------*/ diff --git a/libbsc/libbsc/filters/detectors.cpp b/libbsc/libbsc/filters/detectors.cpp new file mode 100644 index 00000000..09c445c2 --- /dev/null +++ b/libbsc/libbsc/filters/detectors.cpp @@ -0,0 +1,585 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Detectors of blocksize, recordsize and contexts reorder. */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include + +#include "../filters.h" + +#include "../platform/platform.h" +#include "../libbsc.h" + +#include "tables.h" + +#define DETECTORS_MAX_RECORD_SIZE 4 +#define DETECTORS_NUM_BLOCKS 48 +#define DETECTORS_BLOCK_SIZE 24576 + +struct BscSegmentationModel +{ + struct + { + int left, right; + } contextsCount[ALPHABET_SIZE]; + struct + { + struct + { + int left, right; + } Frequencies[ALPHABET_SIZE]; + } contexts[ALPHABET_SIZE]; +}; + +struct BscReorderingModel +{ + struct + { + int frequencies[ALPHABET_SIZE]; + } contexts[DETECTORS_MAX_RECORD_SIZE][ALPHABET_SIZE]; +}; + +int bsc_detect_segments_serial(BscSegmentationModel * RESTRICT model, const unsigned char * RESTRICT input, int n) +{ + memset(model, 0, sizeof(BscSegmentationModel)); + + for (int context = 0, i = 0; i < n; ++i) + { + unsigned char symbol = input[i]; + model->contexts[context].Frequencies[symbol].right++; + context = (unsigned char)((context << 5) ^ symbol); + } + + long long entropy = 0; + for (int context = 0; context < ALPHABET_SIZE; ++context) + { + int count = 0; + for (int symbol = 0; symbol < ALPHABET_SIZE; ++symbol) + { + int frequency = model->contexts[context].Frequencies[symbol].right; + count += frequency; entropy -= bsc_entropy(frequency); + } + model->contextsCount[context].right = count; entropy += bsc_entropy(count); + } + + int blockSize = n; + + long long localEntropy = entropy, bestEntropy = entropy - (entropy >> 5) - (65536LL * 12 * 1024); + for (int context = 0, i = 0; i < n; ++i) + { + if (localEntropy < bestEntropy) + { + bestEntropy = localEntropy; + blockSize = i; + } + + unsigned char symbol = input[i]; + + localEntropy += bsc_delta(--model->contexts[context].Frequencies[symbol].right); + localEntropy -= bsc_delta(model->contexts[context].Frequencies[symbol].left++); + localEntropy -= bsc_delta(--model->contextsCount[context].right); + localEntropy += bsc_delta(model->contextsCount[context].left++); + + context = (unsigned char)((context << 5) ^ symbol); + } + + return blockSize; +} + +#ifdef LIBBSC_OPENMP + +int bsc_detect_segments_parallel(BscSegmentationModel * RESTRICT model0, BscSegmentationModel * RESTRICT model1, const unsigned char * RESTRICT input, int n) +{ + int globalBlockSize = n; long long globalEntropy, globalBestEntropy; + + #pragma omp parallel num_threads(2) + { + int nThreads = omp_get_num_threads(); + int threadId = omp_get_thread_num(); + + if (nThreads == 1) + { + globalBlockSize = bsc_detect_segments_serial(model0, input, n); + } + else + { + int median = n / 2; + + { + if (threadId == 0) + { + memset(model0, 0, sizeof(BscSegmentationModel)); + + int context = 0; + for (int i = 0; i < median; ++i) + { + unsigned char symbol = input[i]; + model0->contexts[context].Frequencies[symbol].right++; + context = (unsigned char)((context << 5) ^ symbol); + } + } + else + { + memset(model1, 0, sizeof(BscSegmentationModel)); + + int context = (unsigned char)((input[median - 2] << 5) ^ input[median - 1]); + for (int i = median; i < n; ++i) + { + unsigned char symbol = input[i]; + model1->contexts[context].Frequencies[symbol].left++; + context = (unsigned char)((context << 5) ^ symbol); + } + } + + #pragma omp barrier + } + + { + #pragma omp single + { + long long entropy = 0; + for (int context = 0; context < ALPHABET_SIZE; ++context) + { + int count = 0; + for (int symbol = 0; symbol < ALPHABET_SIZE; ++symbol) + { + int frequency = model0->contexts[context].Frequencies[symbol].right + model1->contexts[context].Frequencies[symbol].left; + model0->contexts[context].Frequencies[symbol].right = model1->contexts[context].Frequencies[symbol].left = frequency; + + count += frequency; entropy -= bsc_entropy(frequency); + } + model0->contextsCount[context].right = model1->contextsCount[context].left = count; entropy += bsc_entropy(count); + } + + globalEntropy = entropy; globalBestEntropy = entropy - (entropy >> 5) - (65536LL * 12 * 1024); + } + } + + { + int localBlockSize = n; long long localBestEntropy = globalEntropy - (globalEntropy >> 5) - (65536LL * 12 * 1024); + + if (threadId == 0) + { + long long localEntropy = globalEntropy; + for (int context = 0, i = 0; i < median; ++i) + { + if (localEntropy < localBestEntropy) + { + localBestEntropy = localEntropy; + localBlockSize = i; + } + + unsigned char symbol = input[i]; + + localEntropy += bsc_delta(--model0->contexts[context].Frequencies[symbol].right); + localEntropy -= bsc_delta(model0->contexts[context].Frequencies[symbol].left++); + localEntropy -= bsc_delta(--model0->contextsCount[context].right); + localEntropy += bsc_delta(model0->contextsCount[context].left++); + + context = (unsigned char)((context << 5) ^ symbol); + } + } + else + { + long long localEntropy = globalEntropy; + for (int i = n - 1; i >= median; --i) + { + unsigned char symbol = input[i]; + int context = (unsigned char)((input[i - 2] << 5) ^ input[i - 1]); + + localEntropy -= bsc_delta(model1->contexts[context].Frequencies[symbol].right++); + localEntropy += bsc_delta(--model1->contexts[context].Frequencies[symbol].left); + localEntropy += bsc_delta(model1->contextsCount[context].right++); + localEntropy -= bsc_delta(--model1->contextsCount[context].left); + + if (localEntropy <= localBestEntropy) + { + localBestEntropy = localEntropy; + localBlockSize = i; + } + } + } + + if (globalBestEntropy > localBestEntropy) + { + #pragma omp critical + { + if (globalBestEntropy > localBestEntropy) + { + globalBlockSize = localBlockSize; globalBestEntropy = localBestEntropy; + } + } + } + } + } + } + + return globalBlockSize; +} + + +#endif + +int bsc_detect_segments_recursive(BscSegmentationModel * model0, BscSegmentationModel * model1, const unsigned char * input, int n, int * segments, int k, int features) +{ + if (n < DETECTORS_BLOCK_SIZE || k == 1) + { + segments[0] = n; + return 1; + } + + int blockSize = n; + +#ifdef LIBBSC_OPENMP + + if (features & LIBBSC_FEATURE_MULTITHREADING) + { + blockSize = bsc_detect_segments_parallel(model0, model1, input, n); + } + else + +#endif + + { + blockSize = bsc_detect_segments_serial(model0, input, n); + } + + if (blockSize == n) + { + segments[0] = n; + return 1; + } + + int leftResult = bsc_detect_segments_recursive(model0, model1, input, blockSize, segments, k - 1, features); + if (leftResult < LIBBSC_NO_ERROR) return leftResult; + + int rightResult = bsc_detect_segments_recursive(model0, model1, input + blockSize, n - blockSize, segments + leftResult, k - leftResult, features); + if (rightResult < LIBBSC_NO_ERROR) return rightResult; + + return leftResult + rightResult; +} + +int bsc_detect_segments(const unsigned char * input, int n, int * segments, int k, int features) +{ + if (n < DETECTORS_BLOCK_SIZE || k == 1) + { + segments[0] = n; + return 1; + } + + if (BscSegmentationModel * model0 = (BscSegmentationModel *)bsc_malloc(sizeof(BscSegmentationModel))) + { + if (BscSegmentationModel * model1 = (BscSegmentationModel *)bsc_malloc(sizeof(BscSegmentationModel))) + { + int result = bsc_detect_segments_recursive(model0, model1, input, n, segments, k, features); + + bsc_free(model1); bsc_free(model0); + + return result; + } + bsc_free(model0); + }; + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +static long long bsc_estimate_contextsorder(const unsigned char * input, int n) +{ + int frequencies[ALPHABET_SIZE][3]; + + memset(frequencies, 0, sizeof(frequencies)); + + unsigned char MTF0 = 0; + unsigned char MTF1 = 1; + unsigned char MTFC = 0; + + for (int i = 0; i < n; ++i) + { + unsigned char C = input[i]; + if (C == MTF0) + { + frequencies[MTFC][0]++; MTFC = MTFC << 2; + } + else + { + if (C == MTF1) + { + frequencies[MTFC][1]++; MTFC = (MTFC << 2) | 1; + } + else + { + frequencies[MTFC][2]++; MTFC = (MTFC << 2) | 2; + } + MTF1 = MTF0; MTF0 = C; + } + } + + long long entropy = 0; + for (int context = 0; context < ALPHABET_SIZE; ++context) + { + int count = 0; + for (int rank = 0; rank < 3; ++rank) + { + count += frequencies[context][rank]; + entropy -= bsc_entropy(frequencies[context][rank]); + } + entropy += bsc_entropy(count); + } + + return entropy; +} + +int bsc_detect_contextsorder(const unsigned char * RESTRICT input, int n, int features) +{ + int sortingContexts = LIBBSC_NOT_ENOUGH_MEMORY; + + if ((n > DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE) && (features & LIBBSC_FEATURE_FASTMODE)) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE * sizeof(unsigned char))) + { + int blockStride = (((n - DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE) / DETECTORS_NUM_BLOCKS) / 48) * 48; + + for (int block = 0; block < DETECTORS_NUM_BLOCKS; ++block) + { + memcpy(buffer + block * DETECTORS_BLOCK_SIZE, input + block * (DETECTORS_BLOCK_SIZE + blockStride), DETECTORS_BLOCK_SIZE); + } + + sortingContexts = bsc_detect_contextsorder(buffer, DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE, features); + + bsc_free(buffer); + } + + return sortingContexts; + } + + if (unsigned char * RESTRICT buffer = (unsigned char *)bsc_malloc(n * sizeof(unsigned char))) + { + if (int * RESTRICT bucket0 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + if (int * RESTRICT bucket1 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + unsigned char C0 = input[n - 1]; + for (int i = 0; i < n; ++i) + { + unsigned char C1 = input[i]; + bucket0[(C0 << 8) | C1]++; + bucket1[(C1 << 8) | C0]++; + C0 = C1; + } + + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket0[i]; bucket0[i] = tmp; + } + + unsigned char F0 = input[n - 2]; + unsigned char F1 = input[n - 1]; + for (int i = 0; i < n; ++i) + { + unsigned char F2 = input[i]; + buffer[bucket0[(F1 << 8) | F2]++] = F0; + F0 = F1; F1 = F2; + } + + long long following = bsc_estimate_contextsorder(buffer, n); + + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket1[i]; bucket1[i] = tmp; + } + + unsigned char P0 = input[1]; + unsigned char P1 = input[0]; + for (int i = n - 1; i >= 0; --i) + { + unsigned char P2 = input[i]; + buffer[bucket1[(P1 << 8) | P2]++] = P0; + P0 = P1; P1 = P2; + } + + long long preceding = bsc_estimate_contextsorder(buffer, n); + + sortingContexts = (preceding < following) ? LIBBSC_CONTEXTS_PRECEDING : LIBBSC_CONTEXTS_FOLLOWING; + + bsc_free(bucket1); + } + bsc_free(bucket0); + }; + bsc_free(buffer); + } + + return sortingContexts; +} + +long long bsc_estimate_reordering(BscReorderingModel * model, int recordSize) +{ + long long entropy = 0; + for (int record = 0; record < recordSize; ++record) + { + for (int context = 0; context < ALPHABET_SIZE; ++context) + { + int count = 0; + for (int symbol = 0; symbol < ALPHABET_SIZE; ++symbol) + { + int frequency = model->contexts[record][context].frequencies[symbol]; + count += frequency; entropy -= bsc_entropy(frequency); + } + entropy += (65536LL * 8 * (count < 256 ? count : 256)) + bsc_entropy(count); + } + } + return entropy; +} + +int bsc_detect_recordsize(const unsigned char * RESTRICT input, int n, int features) +{ + int result = LIBBSC_NOT_ENOUGH_MEMORY; + + if ((n > DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE) && (features & LIBBSC_FEATURE_FASTMODE)) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE * sizeof(unsigned char))) + { + int blockStride = (((n - DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE) / DETECTORS_NUM_BLOCKS) / 48) * 48; + + for (int block = 0; block < DETECTORS_NUM_BLOCKS; ++block) + { + memcpy(buffer + block * DETECTORS_BLOCK_SIZE, input + block * (DETECTORS_BLOCK_SIZE + blockStride), DETECTORS_BLOCK_SIZE); + } + + result = bsc_detect_recordsize(buffer, DETECTORS_NUM_BLOCKS * DETECTORS_BLOCK_SIZE, features); + + bsc_free(buffer); + } + + return result; + } + + if (BscReorderingModel * RESTRICT model = (BscReorderingModel *)bsc_malloc(sizeof(BscReorderingModel))) + { + long long Entropy[DETECTORS_MAX_RECORD_SIZE]; + + if ((n % 48) != 0) n = n - (n % 48); + + for (int recordSize = 1; recordSize <= DETECTORS_MAX_RECORD_SIZE; ++recordSize) + { + memset(model, 0, sizeof(BscReorderingModel)); + + if (recordSize == 1) + { + int ctx0 = 0; + for (int i = 0; i < n; i += 8) + { + unsigned char c0 = input[i + 0]; model->contexts[0][ctx0].frequencies[c0]++; ctx0 = c0; + unsigned char c1 = input[i + 1]; model->contexts[0][ctx0].frequencies[c1]++; ctx0 = c1; + unsigned char c2 = input[i + 2]; model->contexts[0][ctx0].frequencies[c2]++; ctx0 = c2; + unsigned char c3 = input[i + 3]; model->contexts[0][ctx0].frequencies[c3]++; ctx0 = c3; + unsigned char c4 = input[i + 4]; model->contexts[0][ctx0].frequencies[c4]++; ctx0 = c4; + unsigned char c5 = input[i + 5]; model->contexts[0][ctx0].frequencies[c5]++; ctx0 = c5; + unsigned char c6 = input[i + 6]; model->contexts[0][ctx0].frequencies[c6]++; ctx0 = c6; + unsigned char c7 = input[i + 7]; model->contexts[0][ctx0].frequencies[c7]++; ctx0 = c7; + } + } + + if (recordSize == 2) + { + int ctx0 = 0, ctx1 = 0; + for (int i = 0; i < n; i += 8) + { + unsigned char c0 = input[i + 0]; model->contexts[0][ctx0].frequencies[c0]++; ctx0 = c0; + unsigned char c1 = input[i + 1]; model->contexts[1][ctx1].frequencies[c1]++; ctx1 = c1; + unsigned char c2 = input[i + 2]; model->contexts[0][ctx0].frequencies[c2]++; ctx0 = c2; + unsigned char c3 = input[i + 3]; model->contexts[1][ctx1].frequencies[c3]++; ctx1 = c3; + unsigned char c4 = input[i + 4]; model->contexts[0][ctx0].frequencies[c4]++; ctx0 = c4; + unsigned char c5 = input[i + 5]; model->contexts[1][ctx1].frequencies[c5]++; ctx1 = c5; + unsigned char c6 = input[i + 6]; model->contexts[0][ctx0].frequencies[c6]++; ctx0 = c6; + unsigned char c7 = input[i + 7]; model->contexts[1][ctx1].frequencies[c7]++; ctx1 = c7; + } + } + + if (recordSize == 3) + { + int ctx0 = 0, ctx1 = 0, ctx2 = 0; + for (int i = 0; i < n; i += 6) + { + unsigned char c0 = input[i + 0]; model->contexts[0][ctx0].frequencies[c0]++; ctx0 = c0; + unsigned char c1 = input[i + 1]; model->contexts[1][ctx1].frequencies[c1]++; ctx1 = c1; + unsigned char c2 = input[i + 2]; model->contexts[2][ctx2].frequencies[c2]++; ctx2 = c2; + unsigned char c3 = input[i + 3]; model->contexts[0][ctx0].frequencies[c3]++; ctx0 = c3; + unsigned char c4 = input[i + 4]; model->contexts[1][ctx1].frequencies[c4]++; ctx1 = c4; + unsigned char c5 = input[i + 5]; model->contexts[2][ctx2].frequencies[c5]++; ctx2 = c5; + } + } + + if (recordSize == 4) + { + int ctx0 = 0, ctx1 = 0, ctx2 = 0, ctx3 = 0; + for (int i = 0; i < n; i += 8) + { + unsigned char c0 = input[i + 0]; model->contexts[0][ctx0].frequencies[c0]++; ctx0 = c0; + unsigned char c1 = input[i + 1]; model->contexts[1][ctx1].frequencies[c1]++; ctx1 = c1; + unsigned char c2 = input[i + 2]; model->contexts[2][ctx2].frequencies[c2]++; ctx2 = c2; + unsigned char c3 = input[i + 3]; model->contexts[3][ctx3].frequencies[c3]++; ctx3 = c3; + unsigned char c4 = input[i + 4]; model->contexts[0][ctx0].frequencies[c4]++; ctx0 = c4; + unsigned char c5 = input[i + 5]; model->contexts[1][ctx1].frequencies[c5]++; ctx1 = c5; + unsigned char c6 = input[i + 6]; model->contexts[2][ctx2].frequencies[c6]++; ctx2 = c6; + unsigned char c7 = input[i + 7]; model->contexts[3][ctx3].frequencies[c7]++; ctx3 = c7; + } + } + + if (recordSize > 4) + { + int Context[DETECTORS_MAX_RECORD_SIZE] = { 0 }; + for (int record = 0, i = 0; i < n; ++i) + { + model->contexts[record][Context[record]].frequencies[input[i]]++; + Context[record] = input[i]; record++; if (record == recordSize) record = 0; + } + } + + Entropy[recordSize - 1] = bsc_estimate_reordering(model, recordSize); + } + + long long bestSize = Entropy[0] - (Entropy[0] >> 4) - (65536LL * 8 * 1024); + + result = 1; + for (int recordSize = 1; recordSize <= DETECTORS_MAX_RECORD_SIZE; ++recordSize) + { + if (bestSize > Entropy[recordSize - 1]) { bestSize = Entropy[recordSize - 1]; result = recordSize; } + } + + bsc_free(model); + }; + + return result; +} + +/*-------------------------------------------------*/ +/* End detectors.cpp */ +/*-------------------------------------------------*/ diff --git a/libbsc/libbsc/filters/preprocessing.cpp b/libbsc/libbsc/filters/preprocessing.cpp new file mode 100644 index 00000000..7003ae5c --- /dev/null +++ b/libbsc/libbsc/filters/preprocessing.cpp @@ -0,0 +1,180 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Data preprocessing functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include + +#include "../filters.h" + +#include "../platform/platform.h" +#include "../libbsc.h" + +int bsc_reverse_block(unsigned char * T, int n, int features) +{ + +#ifdef LIBBSC_OPENMP + + if (features & LIBBSC_FEATURE_MULTITHREADING) + { + #pragma omp parallel for + for (int i = 0; i < n / 2; ++i) + { + unsigned char tmp = T[i]; T[i] = T[n - 1 - i]; T[n - 1 - i] = tmp; + } + } + else + +#endif + + { + for (int i = 0, j = n - 1; i < j; ++i, --j) + { + unsigned char tmp = T[i]; T[i] = T[j]; T[j] = tmp; + } + } + + return LIBBSC_NO_ERROR; +} + +int bsc_reorder_forward(unsigned char * T, int n, int recordSize, int features) +{ + if (recordSize <= 0) return LIBBSC_BAD_PARAMETER; + if (recordSize == 1) return LIBBSC_NO_ERROR; + + if (unsigned char * buffer = (unsigned char *)bsc_malloc(n)) + { + memcpy(buffer, T, n); + + unsigned char * RESTRICT S = buffer; + unsigned char * RESTRICT D = T; + + int chunk = (n / recordSize); + +#ifdef LIBBSC_OPENMP + + if (features & LIBBSC_FEATURE_MULTITHREADING) + { + switch (recordSize) + { + case 2: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { D[i] = S[2 * i]; D[chunk + i] = S[2 * i + 1]; } break; + case 3: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { D[i] = S[3 * i]; D[chunk + i] = S[3 * i + 1]; D[chunk * 2 + i] = S[3 * i + 2]; } break; + case 4: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { D[i] = S[4 * i]; D[chunk + i] = S[4 * i + 1]; D[chunk * 2 + i] = S[4 * i + 2]; D[chunk * 3 + i] = S[4 * i + 3]; } break; + default: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { for (int j = 0; j < recordSize; ++j) D[j * chunk + i] = S[recordSize * i + j]; } + } + } + else + +#endif + + { + switch (recordSize) + { + case 2: for (int i = 0; i < chunk; ++i) { D[0] = S[0]; D[chunk] = S[1]; D++; S += 2; } break; + case 3: for (int i = 0; i < chunk; ++i) { D[0] = S[0]; D[chunk] = S[1]; D[chunk * 2] = S[2]; D++; S += 3; } break; + case 4: for (int i = 0; i < chunk; ++i) { D[0] = S[0]; D[chunk] = S[1]; D[chunk * 2] = S[2]; D[chunk * 3] = S[3]; D++; S += 4; } break; + default: + for (int i = 0; i < chunk; ++i) { for (int j = 0; j < recordSize; ++j) D[j * chunk] = S[j]; D++; S += recordSize; } + } + } + + bsc_free(buffer); return LIBBSC_NO_ERROR; + } + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_reorder_reverse(unsigned char * T, int n, int recordSize, int features) +{ + if (recordSize <= 0) return LIBBSC_BAD_PARAMETER; + if (recordSize == 1) return LIBBSC_NO_ERROR; + + if (unsigned char * buffer = (unsigned char *)bsc_malloc(n)) + { + memcpy(buffer, T, n); + + unsigned char * RESTRICT S = buffer; + unsigned char * RESTRICT D = T; + + int chunk = (n / recordSize); + +#ifdef LIBBSC_OPENMP + + if (features & LIBBSC_FEATURE_MULTITHREADING) + { + switch (recordSize) + { + case 2: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { D[2 * i] = S[i]; D[2 * i + 1] = S[chunk + i]; } break; + case 3: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { D[3 * i] = S[i]; D[3 * i + 1] = S[chunk + i]; D[3 * i + 2] = S[chunk * 2 + i]; } break; + case 4: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { D[4 * i] = S[i]; D[4 * i + 1] = S[chunk + i]; D[4 * i + 2] = S[chunk * 2 + i]; D[4 * i + 3] = S[chunk * 3 + i]; } break; + default: + #pragma omp parallel for + for (int i = 0; i < chunk; ++i) { for (int j = 0; j < recordSize; ++j) D[recordSize * i + j] = S[j * chunk + i]; } + } + } + else + +#endif + + { + switch (recordSize) + { + case 2: for (int i = 0; i < chunk; ++i) { D[0] = S[0]; D[1] = S[chunk]; D += 2; S++; } break; + case 3: for (int i = 0; i < chunk; ++i) { D[0] = S[0]; D[1] = S[chunk]; D[2] = S[chunk * 2]; D += 3; S++; } break; + case 4: for (int i = 0; i < chunk; ++i) { D[0] = S[0]; D[1] = S[chunk]; D[2] = S[chunk * 2]; D[3] = S[chunk * 3]; D += 4; S++; } break; + default: + for (int i = 0; i < chunk; ++i) { for (int j = 0; j < recordSize; ++j) D[j] = S[j * chunk]; D += recordSize; S++; } + } + } + + bsc_free(buffer); return LIBBSC_NO_ERROR; + } + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +/*-------------------------------------------------*/ +/* End preprocessing.cpp */ +/*-------------------------------------------------*/ diff --git a/libbsc/libbsc/filters/tables.h b/libbsc/libbsc/filters/tables.h new file mode 100644 index 00000000..ebf9f089 --- /dev/null +++ b/libbsc/libbsc/filters/tables.h @@ -0,0 +1,754 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Static tables of constant values */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_FILTERS_TABLES_H +#define _LIBBSC_FILTERS_TABLES_H + +#include "../platform/platform.h" + +static const unsigned int bsc_code_table[4096] = +{ + 0, 0, 65536, 103872, 131072, 152169, 169408, 183982, 196608, 207744, 217705, 226717, + 234944, 242512, 249518, 256041, 262144, 267875, 273280, 278392, 283241, 287854, 292253, 296456, + 300480, 304339, 308048, 311616, 315054, 318372, 321577, 324678, 327680, 330589, 333411, 336152, + 338816, 341406, 343928, 346384, 348777, 351112, 353390, 355615, 357789, 359914, 361992, 364025, + 366016, 367965, 369875, 371748, 373584, 375384, 377152, 378887, 380590, 382264, 383908, 385524, + 387113, 388676, 390214, 391727, 393216, 394681, 396125, 397547, 398947, 400328, 401688, 403029, + 404352, 405656, 406942, 408211, 409464, 410700, 411920, 413124, 414313, 415488, 416648, 417794, + 418926, 420045, 421151, 422244, 423325, 424393, 425450, 426494, 427528, 428550, 429561, 430562, + 431552, 432531, 433501, 434461, 435411, 436352, 437284, 438206, 439120, 440024, 440920, 441808, + 442688, 443559, 444423, 445278, 446126, 446967, 447800, 448626, 449444, 450256, 451060, 451858, + 452649, 453434, 454212, 454984, 455750, 456509, 457263, 458010, 458752, 459487, 460217, 460942, + 461661, 462374, 463083, 463786, 464483, 465176, 465864, 466546, 467224, 467897, 468565, 469229, + 469888, 470542, 471192, 471837, 472478, 473115, 473747, 474376, 475000, 475620, 476236, 476848, + 477456, 478060, 478660, 479257, 479849, 480438, 481024, 481606, 482184, 482759, 483330, 483898, + 484462, 485024, 485581, 486136, 486687, 487235, 487780, 488322, 488861, 489396, 489929, 490459, + 490986, 491509, 492030, 492548, 493064, 493576, 494086, 494593, 495097, 495599, 496098, 496594, + 497088, 497579, 498067, 498553, 499037, 499518, 499997, 500473, 500947, 501419, 501888, 502355, + 502820, 503282, 503742, 504200, 504656, 505109, 505560, 506009, 506456, 506901, 507344, 507785, + 508224, 508661, 509095, 509528, 509959, 510387, 510814, 511239, 511662, 512083, 512503, 512920, + 513336, 513750, 514162, 514572, 514980, 515387, 515792, 516195, 516596, 516996, 517394, 517791, + 518185, 518579, 518970, 519360, 519748, 520135, 520520, 520904, 521286, 521666, 522045, 522423, + 522799, 523173, 523546, 523917, 524288, 524656, 525023, 525389, 525753, 526116, 526478, 526838, + 527197, 527554, 527910, 528265, 528619, 528971, 529322, 529671, 530019, 530366, 530712, 531057, + 531400, 531742, 532082, 532422, 532760, 533097, 533433, 533768, 534101, 534434, 534765, 535095, + 535424, 535751, 536078, 536403, 536728, 537051, 537373, 537694, 538014, 538333, 538651, 538968, + 539283, 539598, 539912, 540224, 540536, 540846, 541156, 541464, 541772, 542078, 542384, 542688, + 542992, 543294, 543596, 543896, 544196, 544495, 544793, 545089, 545385, 545680, 545974, 546268, + 546560, 546851, 547142, 547431, 547720, 548008, 548295, 548581, 548866, 549150, 549434, 549717, + 549998, 550279, 550560, 550839, 551117, 551395, 551672, 551948, 552223, 552498, 552771, 553044, + 553316, 553588, 553858, 554128, 554397, 554665, 554932, 555199, 555465, 555730, 555995, 556259, + 556522, 556784, 557045, 557306, 557566, 557826, 558084, 558342, 558600, 558856, 559112, 559367, + 559622, 559876, 560129, 560381, 560633, 560884, 561135, 561384, 561634, 561882, 562130, 562377, + 562624, 562870, 563115, 563359, 563603, 563847, 564089, 564332, 564573, 564814, 565054, 565294, + 565533, 565771, 566009, 566247, 566483, 566719, 566955, 567190, 567424, 567658, 567891, 568124, + 568356, 568587, 568818, 569048, 569278, 569507, 569736, 569964, 570192, 570419, 570645, 570871, + 571096, 571321, 571545, 571769, 571992, 572215, 572437, 572659, 572880, 573101, 573321, 573541, + 573760, 573978, 574197, 574414, 574631, 574848, 575064, 575280, 575495, 575709, 575923, 576137, + 576350, 576563, 576775, 576987, 577198, 577409, 577619, 577829, 578039, 578248, 578456, 578664, + 578872, 579079, 579286, 579492, 579698, 579903, 580108, 580312, 580516, 580720, 580923, 581125, + 581328, 581530, 581731, 581932, 582132, 582332, 582532, 582731, 582930, 583129, 583327, 583524, + 583721, 583918, 584115, 584311, 584506, 584701, 584896, 585090, 585284, 585478, 585671, 585864, + 586056, 586248, 586440, 586631, 586822, 587012, 587202, 587392, 587581, 587770, 587959, 588147, + 588335, 588522, 588709, 588896, 589082, 589268, 589453, 589639, 589824, 590008, 590192, 590376, + 590559, 590742, 590925, 591107, 591289, 591471, 591652, 591833, 592014, 592194, 592374, 592554, + 592733, 592912, 593090, 593269, 593446, 593624, 593801, 593978, 594155, 594331, 594507, 594682, + 594858, 595033, 595207, 595382, 595555, 595729, 595902, 596075, 596248, 596421, 596593, 596764, + 596936, 597107, 597278, 597448, 597618, 597788, 597958, 598127, 598296, 598465, 598633, 598801, + 598969, 599137, 599304, 599471, 599637, 599804, 599970, 600135, 600301, 600466, 600631, 600795, + 600960, 601124, 601287, 601451, 601614, 601777, 601939, 602102, 602264, 602426, 602587, 602748, + 602909, 603070, 603230, 603390, 603550, 603710, 603869, 604028, 604187, 604345, 604504, 604662, + 604819, 604977, 605134, 605291, 605448, 605604, 605760, 605916, 606072, 606227, 606382, 606537, + 606692, 606846, 607000, 607154, 607308, 607461, 607614, 607767, 607920, 608072, 608224, 608376, + 608528, 608679, 608830, 608981, 609132, 609282, 609432, 609582, 609732, 609882, 610031, 610180, + 610329, 610477, 610625, 610774, 610921, 611069, 611216, 611364, 611510, 611657, 611804, 611950, + 612096, 612242, 612387, 612533, 612678, 612823, 612967, 613112, 613256, 613400, 613544, 613687, + 613831, 613974, 614117, 614260, 614402, 614544, 614686, 614828, 614970, 615111, 615253, 615394, + 615534, 615675, 615815, 615956, 616096, 616235, 616375, 616514, 616653, 616792, 616931, 617070, + 617208, 617346, 617484, 617622, 617759, 617897, 618034, 618171, 618307, 618444, 618580, 618716, + 618852, 618988, 619124, 619259, 619394, 619529, 619664, 619798, 619933, 620067, 620201, 620335, + 620468, 620602, 620735, 620868, 621001, 621134, 621266, 621399, 621531, 621663, 621795, 621926, + 622058, 622189, 622320, 622451, 622581, 622712, 622842, 622972, 623102, 623232, 623362, 623491, + 623620, 623749, 623878, 624007, 624136, 624264, 624392, 624520, 624648, 624776, 624903, 625031, + 625158, 625285, 625412, 625538, 625665, 625791, 625917, 626043, 626169, 626295, 626420, 626545, + 626671, 626796, 626920, 627045, 627170, 627294, 627418, 627542, 627666, 627790, 627913, 628036, + 628160, 628283, 628406, 628528, 628651, 628773, 628895, 629017, 629139, 629261, 629383, 629504, + 629625, 629747, 629868, 629988, 630109, 630230, 630350, 630470, 630590, 630710, 630830, 630950, + 631069, 631188, 631307, 631426, 631545, 631664, 631783, 631901, 632019, 632137, 632255, 632373, + 632491, 632608, 632726, 632843, 632960, 633077, 633194, 633310, 633427, 633543, 633660, 633776, + 633892, 634007, 634123, 634239, 634354, 634469, 634584, 634699, 634814, 634929, 635043, 635158, + 635272, 635386, 635500, 635614, 635728, 635841, 635955, 636068, 636181, 636294, 636407, 636520, + 636632, 636745, 636857, 636969, 637081, 637193, 637305, 637417, 637528, 637640, 637751, 637862, + 637973, 638084, 638195, 638306, 638416, 638527, 638637, 638747, 638857, 638967, 639077, 639186, + 639296, 639405, 639514, 639624, 639733, 639841, 639950, 640059, 640167, 640276, 640384, 640492, + 640600, 640708, 640816, 640923, 641031, 641138, 641245, 641352, 641459, 641566, 641673, 641780, + 641886, 641993, 642099, 642205, 642311, 642417, 642523, 642629, 642734, 642840, 642945, 643050, + 643155, 643260, 643365, 643470, 643575, 643679, 643784, 643888, 643992, 644096, 644200, 644304, + 644408, 644511, 644615, 644718, 644822, 644925, 645028, 645131, 645234, 645336, 645439, 645541, + 645644, 645746, 645848, 645950, 646052, 646154, 646256, 646357, 646459, 646560, 646661, 646763, + 646864, 646965, 647066, 647166, 647267, 647367, 647468, 647568, 647668, 647768, 647868, 647968, + 648068, 648168, 648267, 648367, 648466, 648566, 648665, 648764, 648863, 648962, 649060, 649159, + 649257, 649356, 649454, 649552, 649651, 649749, 649847, 649944, 650042, 650140, 650237, 650335, + 650432, 650529, 650626, 650723, 650820, 650917, 651014, 651110, 651207, 651303, 651400, 651496, + 651592, 651688, 651784, 651880, 651976, 652071, 652167, 652262, 652358, 652453, 652548, 652643, + 652738, 652833, 652928, 653023, 653117, 653212, 653306, 653400, 653495, 653589, 653683, 653777, + 653871, 653964, 654058, 654151, 654245, 654338, 654432, 654525, 654618, 654711, 654804, 654897, + 654989, 655082, 655175, 655267, 655360, 655452, 655544, 655636, 655728, 655820, 655912, 656004, + 656095, 656187, 656278, 656370, 656461, 656552, 656643, 656734, 656825, 656916, 657007, 657098, + 657188, 657279, 657369, 657460, 657550, 657640, 657730, 657820, 657910, 658000, 658090, 658179, + 658269, 658358, 658448, 658537, 658626, 658716, 658805, 658894, 658982, 659071, 659160, 659249, + 659337, 659426, 659514, 659602, 659691, 659779, 659867, 659955, 660043, 660131, 660218, 660306, + 660394, 660481, 660569, 660656, 660743, 660830, 660918, 661005, 661091, 661178, 661265, 661352, + 661438, 661525, 661611, 661698, 661784, 661870, 661957, 662043, 662129, 662214, 662300, 662386, + 662472, 662557, 662643, 662728, 662814, 662899, 662984, 663069, 663154, 663239, 663324, 663409, + 663494, 663579, 663663, 663748, 663832, 663917, 664001, 664085, 664169, 664253, 664337, 664421, + 664505, 664589, 664673, 664756, 664840, 664923, 665007, 665090, 665173, 665257, 665340, 665423, + 665506, 665589, 665671, 665754, 665837, 665919, 666002, 666084, 666167, 666249, 666331, 666414, + 666496, 666578, 666660, 666742, 666823, 666905, 666987, 667068, 667150, 667231, 667313, 667394, + 667475, 667557, 667638, 667719, 667800, 667881, 667962, 668042, 668123, 668204, 668284, 668365, + 668445, 668526, 668606, 668686, 668766, 668846, 668926, 669006, 669086, 669166, 669246, 669325, + 669405, 669485, 669564, 669644, 669723, 669802, 669881, 669961, 670040, 670119, 670198, 670277, + 670355, 670434, 670513, 670591, 670670, 670748, 670827, 670905, 670984, 671062, 671140, 671218, + 671296, 671374, 671452, 671530, 671608, 671685, 671763, 671841, 671918, 671996, 672073, 672150, + 672228, 672305, 672382, 672459, 672536, 672613, 672690, 672767, 672844, 672920, 672997, 673074, + 673150, 673227, 673303, 673379, 673456, 673532, 673608, 673684, 673760, 673836, 673912, 673988, + 674064, 674139, 674215, 674291, 674366, 674442, 674517, 674592, 674668, 674743, 674818, 674893, + 674968, 675043, 675118, 675193, 675268, 675343, 675418, 675492, 675567, 675641, 675716, 675790, + 675865, 675939, 676013, 676087, 676161, 676236, 676310, 676383, 676457, 676531, 676605, 676679, + 676752, 676826, 676900, 676973, 677046, 677120, 677193, 677266, 677340, 677413, 677486, 677559, + 677632, 677705, 677778, 677851, 677923, 677996, 678069, 678141, 678214, 678286, 678359, 678431, + 678503, 678576, 678648, 678720, 678792, 678864, 678936, 679008, 679080, 679152, 679223, 679295, + 679367, 679438, 679510, 679581, 679653, 679724, 679796, 679867, 679938, 680009, 680080, 680151, + 680222, 680293, 680364, 680435, 680506, 680577, 680647, 680718, 680789, 680859, 680930, 681000, + 681070, 681141, 681211, 681281, 681351, 681422, 681492, 681562, 681632, 681701, 681771, 681841, + 681911, 681981, 682050, 682120, 682189, 682259, 682328, 682398, 682467, 682536, 682606, 682675, + 682744, 682813, 682882, 682951, 683020, 683089, 683158, 683226, 683295, 683364, 683433, 683501, + 683570, 683638, 683707, 683775, 683843, 683912, 683980, 684048, 684116, 684184, 684252, 684320, + 684388, 684456, 684524, 684592, 684660, 684727, 684795, 684863, 684930, 684998, 685065, 685132, + 685200, 685267, 685334, 685402, 685469, 685536, 685603, 685670, 685737, 685804, 685871, 685938, + 686004, 686071, 686138, 686205, 686271, 686338, 686404, 686471, 686537, 686604, 686670, 686736, + 686802, 686869, 686935, 687001, 687067, 687133, 687199, 687265, 687331, 687396, 687462, 687528, + 687594, 687659, 687725, 687790, 687856, 687921, 687987, 688052, 688117, 688183, 688248, 688313, + 688378, 688443, 688508, 688573, 688638, 688703, 688768, 688833, 688898, 688962, 689027, 689092, + 689156, 689221, 689285, 689350, 689414, 689479, 689543, 689607, 689672, 689736, 689800, 689864, + 689928, 689992, 690056, 690120, 690184, 690248, 690312, 690376, 690439, 690503, 690567, 690630, + 690694, 690757, 690821, 690884, 690948, 691011, 691074, 691138, 691201, 691264, 691327, 691390, + 691453, 691516, 691579, 691642, 691705, 691768, 691831, 691893, 691956, 692019, 692081, 692144, + 692207, 692269, 692332, 692394, 692456, 692519, 692581, 692643, 692706, 692768, 692830, 692892, + 692954, 693016, 693078, 693140, 693202, 693264, 693326, 693387, 693449, 693511, 693572, 693634, + 693696, 693757, 693819, 693880, 693942, 694003, 694064, 694126, 694187, 694248, 694309, 694370, + 694431, 694492, 694553, 694614, 694675, 694736, 694797, 694858, 694919, 694980, 695040, 695101, + 695161, 695222, 695283, 695343, 695404, 695464, 695524, 695585, 695645, 695705, 695766, 695826, + 695886, 695946, 696006, 696066, 696126, 696186, 696246, 696306, 696366, 696426, 696486, 696545, + 696605, 696665, 696724, 696784, 696843, 696903, 696962, 697022, 697081, 697141, 697200, 697259, + 697319, 697378, 697437, 697496, 697555, 697614, 697673, 697732, 697791, 697850, 697909, 697968, + 698027, 698086, 698144, 698203, 698262, 698320, 698379, 698438, 698496, 698555, 698613, 698671, + 698730, 698788, 698846, 698905, 698963, 699021, 699079, 699137, 699196, 699254, 699312, 699370, + 699428, 699485, 699543, 699601, 699659, 699717, 699775, 699832, 699890, 699948, 700005, 700063, + 700120, 700178, 700235, 700293, 700350, 700407, 700465, 700522, 700579, 700636, 700694, 700751, + 700808, 700865, 700922, 700979, 701036, 701093, 701150, 701207, 701264, 701320, 701377, 701434, + 701491, 701547, 701604, 701660, 701717, 701774, 701830, 701886, 701943, 701999, 702056, 702112, + 702168, 702225, 702281, 702337, 702393, 702449, 702505, 702561, 702617, 702673, 702729, 702785, + 702841, 702897, 702953, 703009, 703064, 703120, 703176, 703232, 703287, 703343, 703398, 703454, + 703509, 703565, 703620, 703676, 703731, 703786, 703842, 703897, 703952, 704007, 704063, 704118, + 704173, 704228, 704283, 704338, 704393, 704448, 704503, 704558, 704613, 704668, 704722, 704777, + 704832, 704887, 704941, 704996, 705050, 705105, 705160, 705214, 705269, 705323, 705377, 705432, + 705486, 705540, 705595, 705649, 705703, 705757, 705812, 705866, 705920, 705974, 706028, 706082, + 706136, 706190, 706244, 706298, 706352, 706405, 706459, 706513, 706567, 706620, 706674, 706728, + 706781, 706835, 706888, 706942, 706995, 707049, 707102, 707156, 707209, 707262, 707316, 707369, + 707422, 707476, 707529, 707582, 707635, 707688, 707741, 707794, 707847, 707900, 707953, 708006, + 708059, 708112, 708165, 708218, 708270, 708323, 708376, 708428, 708481, 708534, 708586, 708639, + 708691, 708744, 708796, 708849, 708901, 708954, 709006, 709058, 709111, 709163, 709215, 709268, + 709320, 709372, 709424, 709476, 709528, 709580, 709632, 709684, 709736, 709788, 709840, 709892, + 709944, 709996, 710047, 710099, 710151, 710203, 710254, 710306, 710358, 710409, 710461, 710512, + 710564, 710615, 710667, 710718, 710770, 710821, 710872, 710924, 710975, 711026, 711077, 711129, + 711180, 711231, 711282, 711333, 711384, 711435, 711486, 711537, 711588, 711639, 711690, 711741, + 711792, 711843, 711893, 711944, 711995, 712046, 712096, 712147, 712197, 712248, 712299, 712349, + 712400, 712450, 712501, 712551, 712602, 712652, 712702, 712753, 712803, 712853, 712903, 712954, + 713004, 713054, 713104, 713154, 713204, 713254, 713304, 713355, 713404, 713454, 713504, 713554, + 713604, 713654, 713704, 713754, 713803, 713853, 713903, 713953, 714002, 714052, 714102, 714151, + 714201, 714250, 714300, 714349, 714399, 714448, 714498, 714547, 714596, 714646, 714695, 714744, + 714793, 714843, 714892, 714941, 714990, 715039, 715088, 715138, 715187, 715236, 715285, 715334, + 715383, 715431, 715480, 715529, 715578, 715627, 715676, 715725, 715773, 715822, 715871, 715919, + 715968, 716017, 716065, 716114, 716162, 716211, 716259, 716308, 716356, 716405, 716453, 716502, + 716550, 716598, 716646, 716695, 716743, 716791, 716839, 716888, 716936, 716984, 717032, 717080, + 717128, 717176, 717224, 717272, 717320, 717368, 717416, 717464, 717512, 717560, 717607, 717655, + 717703, 717751, 717798, 717846, 717894, 717941, 717989, 718037, 718084, 718132, 718179, 718227, + 718274, 718322, 718369, 718416, 718464, 718511, 718559, 718606, 718653, 718700, 718748, 718795, + 718842, 718889, 718936, 718983, 719031, 719078, 719125, 719172, 719219, 719266, 719313, 719360, + 719407, 719453, 719500, 719547, 719594, 719641, 719687, 719734, 719781, 719828, 719874, 719921, + 719968, 720014, 720061, 720107, 720154, 720200, 720247, 720293, 720340, 720386, 720433, 720479, + 720525, 720572, 720618, 720664, 720711, 720757, 720803, 720849, 720896, 720942, 720988, 721034, + 721080, 721126, 721172, 721218, 721264, 721310, 721356, 721402, 721448, 721494, 721540, 721585, + 721631, 721677, 721723, 721769, 721814, 721860, 721906, 721951, 721997, 722043, 722088, 722134, + 722179, 722225, 722270, 722316, 722361, 722407, 722452, 722498, 722543, 722588, 722634, 722679, + 722724, 722770, 722815, 722860, 722905, 722950, 722996, 723041, 723086, 723131, 723176, 723221, + 723266, 723311, 723356, 723401, 723446, 723491, 723536, 723581, 723626, 723671, 723715, 723760, + 723805, 723850, 723894, 723939, 723984, 724028, 724073, 724118, 724162, 724207, 724252, 724296, + 724341, 724385, 724430, 724474, 724518, 724563, 724607, 724652, 724696, 724740, 724785, 724829, + 724873, 724918, 724962, 725006, 725050, 725094, 725138, 725183, 725227, 725271, 725315, 725359, + 725403, 725447, 725491, 725535, 725579, 725623, 725667, 725711, 725754, 725798, 725842, 725886, + 725930, 725973, 726017, 726061, 726105, 726148, 726192, 726236, 726279, 726323, 726366, 726410, + 726454, 726497, 726541, 726584, 726627, 726671, 726714, 726758, 726801, 726844, 726888, 726931, + 726974, 727018, 727061, 727104, 727147, 727191, 727234, 727277, 727320, 727363, 727406, 727449, + 727493, 727536, 727579, 727622, 727665, 727708, 727750, 727793, 727836, 727879, 727922, 727965, + 728008, 728051, 728093, 728136, 728179, 728222, 728264, 728307, 728350, 728392, 728435, 728478, + 728520, 728563, 728605, 728648, 728690, 728733, 728775, 728818, 728860, 728903, 728945, 728988, + 729030, 729072, 729115, 729157, 729199, 729241, 729284, 729326, 729368, 729410, 729453, 729495, + 729537, 729579, 729621, 729663, 729705, 729747, 729789, 729831, 729873, 729915, 729957, 729999, + 730041, 730083, 730125, 730167, 730209, 730250, 730292, 730334, 730376, 730418, 730459, 730501, + 730543, 730584, 730626, 730668, 730709, 730751, 730793, 730834, 730876, 730917, 730959, 731000, + 731042, 731083, 731125, 731166, 731207, 731249, 731290, 731331, 731373, 731414, 731455, 731497, + 731538, 731579, 731620, 731662, 731703, 731744, 731785, 731826, 731867, 731909, 731950, 731991, + 732032, 732073, 732114, 732155, 732196, 732237, 732278, 732319, 732359, 732400, 732441, 732482, + 732523, 732564, 732604, 732645, 732686, 732727, 732767, 732808, 732849, 732890, 732930, 732971, + 733011, 733052, 733093, 733133, 733174, 733214, 733255, 733295, 733336, 733376, 733417, 733457, + 733498, 733538, 733578, 733619, 733659, 733699, 733740, 733780, 733820, 733861, 733901, 733941, + 733981, 734021, 734062, 734102, 734142, 734182, 734222, 734262, 734302, 734342, 734382, 734422, + 734462, 734502, 734542, 734582, 734622, 734662, 734702, 734742, 734782, 734822, 734861, 734901, + 734941, 734981, 735021, 735060, 735100, 735140, 735180, 735219, 735259, 735299, 735338, 735378, + 735417, 735457, 735497, 735536, 735576, 735615, 735655, 735694, 735734, 735773, 735813, 735852, + 735891, 735931, 735970, 736009, 736049, 736088, 736127, 736167, 736206, 736245, 736284, 736324, + 736363, 736402, 736441, 736480, 736520, 736559, 736598, 736637, 736676, 736715, 736754, 736793, + 736832, 736871, 736910, 736949, 736988, 737027, 737066, 737105, 737144, 737183, 737221, 737260, + 737299, 737338, 737377, 737415, 737454, 737493, 737532, 737570, 737609, 737648, 737686, 737725, + 737764, 737802, 737841, 737879, 737918, 737957, 737995, 738034, 738072, 738111, 738149, 738188, + 738226, 738264, 738303, 738341, 738380, 738418, 738456, 738495, 738533, 738571, 738610, 738648, + 738686, 738724, 738763, 738801, 738839, 738877, 738915, 738953, 738992, 739030, 739068, 739106, + 739144, 739182, 739220, 739258, 739296, 739334, 739372, 739410, 739448, 739486, 739524, 739562, + 739600, 739637, 739675, 739713, 739751, 739789, 739827, 739864, 739902, 739940, 739978, 740015, + 740053, 740091, 740128, 740166, 740204, 740241, 740279, 740317, 740354, 740392, 740429, 740467, + 740504, 740542, 740579, 740617, 740654, 740692, 740729, 740767, 740804, 740841, 740879, 740916, + 740954, 740991, 741028, 741066, 741103, 741140, 741177, 741215, 741252, 741289, 741326, 741363, + 741401, 741438, 741475, 741512, 741549, 741586, 741623, 741660, 741697, 741734, 741772, 741809, + 741846, 741883, 741919, 741956, 741993, 742030, 742067, 742104, 742141, 742178, 742215, 742252, + 742288, 742325, 742362, 742399, 742436, 742472, 742509, 742546, 742582, 742619, 742656, 742693, + 742729, 742766, 742802, 742839, 742876, 742912, 742949, 742985, 743022, 743058, 743095, 743131, + 743168, 743204, 743241, 743277, 743314, 743350, 743387, 743423, 743459, 743496, 743532, 743568, + 743605, 743641, 743677, 743713, 743750, 743786, 743822, 743858, 743895, 743931, 743967, 744003, + 744039, 744075, 744112, 744148, 744184, 744220, 744256, 744292, 744328, 744364, 744400, 744436, + 744472, 744508, 744544, 744580, 744616, 744652, 744688, 744724, 744759, 744795, 744831, 744867, + 744903, 744939, 744974, 745010, 745046, 745082, 745117, 745153, 745189, 745225, 745260, 745296, + 745332, 745367, 745403, 745438, 745474, 745510, 745545, 745581, 745616, 745652, 745687, 745723, + 745758, 745794, 745829, 745865, 745900, 745936, 745971, 746007, 746042, 746077, 746113, 746148, + 746183, 746219, 746254, 746289, 746325, 746360, 746395, 746430, 746466, 746501, 746536, 746571, + 746606, 746642, 746677, 746712, 746747, 746782, 746817, 746852, 746887, 746922, 746958, 746993, + 747028, 747063, 747098, 747133, 747168, 747202, 747237, 747272, 747307, 747342, 747377, 747412, + 747447, 747482, 747517, 747551, 747586, 747621, 747656, 747691, 747725, 747760, 747795, 747830, + 747864, 747899, 747934, 747968, 748003, 748038, 748072, 748107, 748142, 748176, 748211, 748245, + 748280, 748314, 748349, 748383, 748418, 748453, 748487, 748521, 748556, 748590, 748625, 748659, + 748694, 748728, 748762, 748797, 748831, 748866, 748900, 748934, 748969, 749003, 749037, 749071, + 749106, 749140, 749174, 749208, 749243, 749277, 749311, 749345, 749379, 749413, 749448, 749482, + 749516, 749550, 749584, 749618, 749652, 749686, 749720, 749754, 749788, 749822, 749856, 749890, + 749924, 749958, 749992, 750026, 750060, 750094, 750128, 750162, 750196, 750229, 750263, 750297, + 750331, 750365, 750399, 750432, 750466, 750500, 750534, 750567, 750601, 750635, 750668, 750702, + 750736, 750769, 750803, 750837, 750870, 750904, 750938, 750971, 751005, 751038, 751072, 751105, + 751139, 751173, 751206, 751240, 751273, 751307, 751340, 751373, 751407, 751440, 751474, 751507, + 751540, 751574, 751607, 751641, 751674, 751707, 751741, 751774, 751807, 751840, 751874, 751907, + 751940, 751974, 752007, 752040, 752073, 752106, 752140, 752173, 752206, 752239, 752272, 752305, + 752338, 752371, 752405, 752438, 752471, 752504, 752537, 752570, 752603, 752636, 752669, 752702, + 752735, 752768, 752801, 752834, 752867, 752899, 752932, 752965, 752998, 753031, 753064, 753097, + 753130, 753162, 753195, 753228, 753261, 753294, 753326, 753359, 753392, 753425, 753457, 753490, + 753523, 753555, 753588, 753621, 753653, 753686, 753719, 753751, 753784, 753816, 753849, 753882, + 753914, 753947, 753979, 754012, 754044, 754077, 754109, 754142, 754174, 754207, 754239, 754272, + 754304, 754337, 754369, 754401, 754434, 754466, 754498, 754531, 754563, 754595, 754628, 754660, + 754692, 754725, 754757, 754789, 754821, 754854, 754886, 754918, 754950, 754983, 755015, 755047, + 755079, 755111, 755143, 755176, 755208, 755240, 755272, 755304, 755336, 755368, 755400, 755432, + 755464, 755496, 755528, 755560, 755592, 755624, 755656, 755688, 755720, 755752, 755784, 755816, + 755848, 755880, 755912, 755943, 755975, 756007, 756039, 756071, 756103, 756134, 756166, 756198, + 756230, 756262, 756293, 756325, 756357, 756389, 756420, 756452, 756484, 756515, 756547, 756579, + 756610, 756642, 756674, 756705, 756737, 756768, 756800, 756832, 756863, 756895, 756926, 756958, + 756989, 757021, 757052, 757084, 757115, 757147, 757178, 757210, 757241, 757272, 757304, 757335, + 757367, 757398, 757429, 757461, 757492, 757524, 757555, 757586, 757617, 757649, 757680, 757711, + 757743, 757774, 757805, 757836, 757868, 757899, 757930, 757961, 757992, 758024, 758055, 758086, + 758117, 758148, 758179, 758210, 758242, 758273, 758304, 758335, 758366, 758397, 758428, 758459, + 758490, 758521, 758552, 758583, 758614, 758645, 758676, 758707, 758738, 758769, 758800, 758831, + 758862, 758892, 758923, 758954, 758985, 759016, 759047, 759078, 759108, 759139, 759170, 759201, + 759232, 759262, 759293, 759324, 759355, 759385, 759416, 759447, 759478, 759508, 759539, 759570, + 759600, 759631, 759662, 759692, 759723, 759753, 759784, 759815, 759845, 759876, 759906, 759937, + 759967, 759998, 760028, 760059, 760089, 760120, 760150, 760181, 760211, 760242, 760272, 760303, + 760333, 760364, 760394, 760424, 760455, 760485, 760516, 760546, 760576, 760607, 760637, 760667, + 760697, 760728, 760758, 760788, 760819, 760849, 760879, 760909, 760940, 760970, 761000, 761030, + 761060, 761091, 761121, 761151, 761181, 761211, 761241, 761272, 761302, 761332, 761362, 761392, + 761422, 761452, 761482, 761512, 761542, 761572, 761602, 761632, 761662, 761692, 761722, 761752, + 761782, 761812, 761842, 761872, 761902, 761932, 761962, 761992, 762022, 762051, 762081, 762111, + 762141, 762171, 762201, 762231, 762260, 762290, 762320, 762350, 762379, 762409, 762439, 762469, + 762498, 762528, 762558, 762588, 762617, 762647, 762677, 762706, 762736, 762766, 762795, 762825, + 762855, 762884, 762914, 762943, 762973, 763003, 763032, 763062, 763091, 763121, 763150, 763180, + 763209, 763239, 763268, 763298, 763327, 763357, 763386, 763416, 763445, 763475, 763504, 763533, + 763563, 763592, 763622, 763651, 763680, 763710, 763739, 763768, 763798, 763827, 763856, 763886, + 763915, 763944, 763974, 764003, 764032, 764061, 764091, 764120, 764149, 764178, 764207, 764237, + 764266, 764295, 764324, 764353, 764382, 764412, 764441, 764470, 764499, 764528, 764557, 764586, + 764615, 764644, 764673, 764703, 764732, 764761, 764790, 764819, 764848, 764877, 764906, 764935, + 764964, 764993, 765021, 765050, 765079, 765108, 765137, 765166, 765195, 765224, 765253, 765282, + 765311, 765339, 765368, 765397, 765426, 765455, 765484, 765512, 765541, 765570, 765599, 765627, + 765656, 765685, 765714, 765742, 765771, 765800, 765829, 765857, 765886, 765915, 765943, 765972, + 766001, 766029, 766058, 766087, 766115, 766144, 766172, 766201, 766230, 766258, 766287, 766315, + 766344, 766372, 766401, 766429, 766458, 766486, 766515, 766543, 766572, 766600, 766629, 766657, + 766686, 766714, 766743, 766771, 766800, 766828, 766856, 766885, 766913, 766941, 766970, 766998, + 767027, 767055, 767083, 767112, 767140, 767168, 767196, 767225, 767253, 767281, 767310, 767338, + 767366, 767394, 767422, 767451, 767479, 767507, 767535, 767563, 767592, 767620, 767648, 767676, + 767704, 767732, 767761, 767789, 767817, 767845, 767873, 767901, 767929, 767957, 767985, 768013, + 768041, 768069, 768097, 768125, 768153, 768181, 768209, 768237, 768265, 768293, 768321, 768349, + 768377, 768405, 768433, 768461, 768489, 768517, 768545, 768573, 768600, 768628, 768656, 768684, + 768712, 768740, 768768, 768795, 768823, 768851, 768879, 768907, 768934, 768962, 768990, 769018, + 769045, 769073, 769101, 769129, 769156, 769184, 769212, 769239, 769267, 769295, 769322, 769350, + 769378, 769405, 769433, 769461, 769488, 769516, 769543, 769571, 769599, 769626, 769654, 769681, + 769709, 769736, 769764, 769792, 769819, 769847, 769874, 769902, 769929, 769957, 769984, 770011, + 770039, 770066, 770094, 770121, 770149, 770176, 770204, 770231, 770258, 770286, 770313, 770340, + 770368, 770395, 770423, 770450, 770477, 770504, 770532, 770559, 770586, 770614, 770641, 770668, + 770696, 770723, 770750, 770777, 770805, 770832, 770859, 770886, 770913, 770941, 770968, 770995, + 771022, 771049, 771076, 771104, 771131, 771158, 771185, 771212, 771239, 771266, 771293, 771321, + 771348, 771375, 771402, 771429, 771456, 771483, 771510, 771537, 771564, 771591, 771618, 771645, + 771672, 771699, 771726, 771753, 771780, 771807, 771834, 771861, 771888, 771914, 771941, 771968, + 771995, 772022, 772049, 772076, 772103, 772130, 772156, 772183, 772210, 772237, 772264, 772291, + 772317, 772344, 772371, 772398, 772424, 772451, 772478, 772505, 772531, 772558, 772585, 772612, + 772638, 772665, 772692, 772718, 772745, 772772, 772798, 772825, 772852, 772878, 772905, 772932, + 772958, 772985, 773012, 773038, 773065, 773091, 773118, 773144, 773171, 773198, 773224, 773251, + 773277, 773304, 773330, 773357, 773383, 773410, 773436, 773463, 773489, 773516, 773542, 773569, + 773595, 773621, 773648, 773674, 773701, 773727, 773754, 773780, 773806, 773833, 773859, 773885, + 773912, 773938, 773964, 773991, 774017, 774043, 774070, 774096, 774122, 774149, 774175, 774201, + 774227, 774254, 774280, 774306, 774332, 774359, 774385, 774411, 774437, 774464, 774490, 774516, + 774542, 774568, 774594, 774621, 774647, 774673, 774699, 774725, 774751, 774777, 774804, 774830, + 774856, 774882, 774908, 774934, 774960, 774986, 775012, 775038, 775064, 775090, 775116, 775142, + 775168, 775194, 775220, 775246, 775272, 775298, 775324, 775350, 775376, 775402, 775428, 775454, + 775480, 775506, 775532, 775558, 775583, 775609, 775635, 775661, 775687, 775713, 775739, 775764, + 775790, 775816, 775842, 775868, 775894, 775919, 775945, 775971, 775997, 776022, 776048, 776074, + 776100, 776126, 776151, 776177, 776203, 776228, 776254, 776280, 776306, 776331, 776357, 776383, + 776408, 776434, 776460, 776485, 776511, 776536, 776562, 776588, 776613, 776639, 776665, 776690, + 776716, 776741, 776767, 776792, 776818, 776844, 776869, 776895, 776920, 776946, 776971, 776997, + 777022, 777048, 777073, 777099, 777124, 777150, 777175, 777201, 777226, 777251, 777277, 777302, + 777328, 777353, 777379, 777404, 777429, 777455, 777480, 777505, 777531, 777556, 777582, 777607, + 777632, 777658, 777683, 777708, 777733, 777759, 777784, 777809, 777835, 777860, 777885, 777910, + 777936, 777961, 777986, 778011, 778037, 778062, 778087, 778112, 778138, 778163, 778188, 778213, + 778238, 778263, 778289, 778314, 778339, 778364, 778389, 778414, 778439, 778465, 778490, 778515, + 778540, 778565, 778590, 778615, 778640, 778665, 778690, 778715, 778740, 778765, 778790, 778815, + 778840, 778866, 778891, 778916, 778940, 778965, 778990, 779015, 779040, 779065, 779090, 779115, + 779140, 779165, 779190, 779215, 779240, 779265, 779290, 779315, 779339, 779364, 779389, 779414, + 779439, 779464, 779489, 779513, 779538, 779563, 779588, 779613, 779638, 779662, 779687, 779712, + 779737, 779761, 779786, 779811, 779836, 779861, 779885, 779910, 779935, 779959, 779984, 780009, + 780034, 780058, 780083, 780108, 780132, 780157, 780182, 780206, 780231, 780256, 780280, 780305, + 780329, 780354, 780379, 780403, 780428, 780453, 780477, 780502, 780526, 780551, 780575, 780600, + 780624, 780649, 780674, 780698, 780723, 780747, 780772, 780796, 780821, 780845, 780870, 780894, + 780919, 780943, 780967, 780992, 781016, 781041, 781065, 781090, 781114, 781139, 781163, 781187, + 781212, 781236, 781261, 781285, 781309, 781334, 781358, 781382, 781407, 781431, 781455, 781480, + 781504, 781528, 781553, 781577, 781601, 781626, 781650, 781674, 781698, 781723, 781747, 781771, + 781795, 781820, 781844, 781868, 781892, 781917, 781941, 781965, 781989, 782013, 782038, 782062, + 782086, 782110, 782134, 782158, 782182, 782207, 782231, 782255, 782279, 782303, 782327, 782351, + 782375, 782400, 782424, 782448, 782472, 782496, 782520, 782544, 782568, 782592, 782616, 782640, + 782664, 782688, 782712, 782736, 782760, 782784, 782808, 782832, 782856, 782880, 782904, 782928, + 782952, 782976, 783000, 783024, 783048, 783072, 783096, 783119, 783143, 783167, 783191, 783215, + 783239, 783263, 783287, 783310, 783334, 783358, 783382, 783406, 783430, 783454, 783477, 783501, + 783525, 783549, 783573, 783596, 783620, 783644, 783668, 783691, 783715, 783739, 783763, 783786, + 783810, 783834, 783858, 783881, 783905, 783929, 783952, 783976, 784000, 784024, 784047, 784071, + 784095, 784118, 784142, 784165, 784189, 784213, 784236, 784260, 784284, 784307, 784331, 784354, + 784378, 784402, 784425, 784449, 784472, 784496, 784519, 784543, 784567, 784590, 784614, 784637, + 784661, 784684, 784708, 784731, 784755, 784778, 784802, 784825, 784849, 784872, 784896, 784919, + 784943, 784966, 784989, 785013, 785036, 785060, 785083, 785107, 785130, 785153, 785177, 785200, + 785223, 785247, 785270, 785294, 785317, 785340, 785364, 785387, 785410, 785434, 785457, 785480, + 785504, 785527, 785550, 785574, 785597, 785620, 785643, 785667, 785690, 785713, 785736, 785760, + 785783, 785806, 785829, 785853, 785876, 785899, 785922, 785946, 785969, 785992, 786015, 786038, + 786061, 786085, 786108, 786131, 786154, 786177, 786200, 786224, 786247, 786270, 786293, 786316, + 786339, 786362, 786385, 786408 +}; + +static const unsigned int bsc_delta_table[4096] = +{ + 0, 131072, 180544, 212672, 236557, 255603, 271426, 284990, 296832, 307354, 316837, 325441, + 333328, 340596, 347363, 353689, 359571, 365165, 370408, 375372, 380114, 384632, 388922, 393032, + 396955, 400773, 404384, 407880, 411276, 414522, 417708, 420742, 423677, 426537, 429346, 432056, + 434646, 437242, 439712, 442104, 444512, 446788, 449065, 451271, 453414, 455502, 457543, 459593, + 461517, 463465, 465398, 467220, 468984, 470856, 472577, 474255, 476008, 477616, 479252, 480864, + 482456, 484032, 485533, 487023, 488441, 489985, 491399, 492747, 494236, 495528, 496899, 498285, + 499544, 500820, 502117, 503439, 504636, 505860, 507036, 508244, 509488, 510608, 511766, 512882, + 514041, 515161, 516242, 517372, 518377, 519523, 520454, 521622, 522574, 523584, 524656, 525602, + 526515, 527591, 528541, 529461, 530452, 531416, 532250, 533262, 534040, 535000, 535936, 536848, + 537627, 538599, 539328, 540254, 541159, 541929, 542790, 543514, 544448, 545128, 546022, 546778, + 547634, 548350, 549168, 549968, 550625, 551513, 552132, 552986, 553567, 554387, 555192, 555850, + 556490, 557380, 557988, 558578, 559424, 560120, 560662, 561466, 562117, 562753, 563517, 564125, + 564718, 565442, 566007, 566705, 567391, 567915, 568726, 569224, 569860, 570484, 571096, 571696, + 572284, 572860, 573583, 573977, 574678, 575370, 575890, 576398, 577059, 577545, 578186, 578650, + 579440, 579714, 580486, 580908, 581491, 582065, 582630, 583186, 583556, 584270, 584799, 585319, + 585649, 586331, 586824, 587492, 587784, 588436, 588895, 589345, 589975, 590409, 590834, 591442, + 591851, 592251, 592837, 593417, 593794, 594360, 594721, 595273, 595819, 596157, 596689, 597215, + 597530, 598042, 598548, 599048, 599333, 599819, 600299, 600773, 601241, 601703, 602159, 602609, + 603053, 603273, 603922, 604348, 604547, 605181, 605589, 605991, 606387, 607003, 607162, 607768, + 608142, 608510, 608872, 609228, 609811, 610157, 610497, 610831, 611396, 611720, 612277, 612351, + 613139, 613201, 613740, 614032, 614563, 614845, 615368, 615640, 615906, 616416, 616923, 617175, + 617421, 617915, 618151, 618893, 618864, 619342, 619817, 620029, 620496, 620960, 621158, 621614, + 621802, 622250, 622695, 623137, 623307, 623741, 623901, 624327, 624750, 625170, 625587, 625725, + 626134, 626262, 626942, 627062, 627457, 627849, 628238, 628340, 629006, 629100, 629475, 629847, + 629927, 630581, 630653, 631303, 631367, 631719, 632068, 632414, 632757, 633097, 633434, 633468, + 634098, 634426, 634448, 635072, 635086, 635706, 635712, 636328, 636326, 636938, 636928, 637536, + 637518, 638122, 638096, 638696, 638979, 639259, 639217, 639809, 640080, 640348, 640936, 640876, + 641135, 641717, 641645, 642223, 642472, 642718, 642961, 643201, 643438, 644006, 644239, 644133, + 644695, 645257, 645141, 645359, 645915, 646129, 646340, 646548, 647098, 646956, 647502, 647700, + 648244, 648088, 648628, 648816, 649001, 649183, 649717, 649895, 650070, 650600, 650771, 650939, + 651104, 651266, 651788, 651946, 652466, 652254, 652770, 653286, 653064, 653576, 653717, 654227, + 654364, 654498, 654629, 655133, 655260, 655762, 655506, 656384, 656122, 656618, 656731, 657225, + 657334, 657440, 657543, 658031, 658519, 658227, 659102, 658804, 659286, 659374, 659854, 659938, + 660019, 660495, 660971, 660647, 661119, 661591, 661660, 661726, 662194, 662256, 662722, 662780, + 662835, 663297, 663348, 663808, 663855, 664313, 664356, 664812, 664851, 664887, 665339, 665371, + 665821, 665849, 666297, 666321, 666767, 666787, 667231, 667247, 667689, 667701, 668141, 668149, + 668154, 669024, 668592, 669026, 669460, 669456, 669888, 669880, 669869, 670297, 670725, 670709, + 671135, 671115, 671539, 671515, 671937, 671909, 672329, 672749, 672716, 672680, 673096, 673512, + 673471, 673885, 673840, 674252, 674203, 674613, 674560, 674968, 675376, 675318, 675257, 676129, + 676066, 676000, 676402, 676332, 676732, 677132, 677057, 677455, 677853, 677773, 677690, 678084, + 678478, 678872, 678783, 678691, 679081, 679471, 679374, 679762, 680150, 680048, 680434, 680328, + 680712, 681096, 680985, 681367, 681252, 681632, 682012, 681892, 682270, 682648, 682523, 682899, + 682770, 683144, 683518, 683384, 683756, 683618, 684499, 684359, 684216, 684584, 684952, 684804, + 685170, 685536, 685383, 685747, 686111, 685953, 686315, 686677, 686514, 686874, 687234, 687066, + 687424, 687252, 688139, 687433, 688320, 688142, 688496, 688850, 688667, 689019, 688832, 689722, + 689533, 689341, 690232, 689494, 690385, 690187, 690533, 690879, 691225, 691021, 690814, 691708, + 691499, 691841, 691628, 691968, 692308, 692648, 692429, 692767, 693105, 692881, 693217, 693553, + 693889, 693659, 693993, 693759, 694660, 694424, 694185, 695087, 694846, 695176, 694931, 695835, + 695588, 695338, 696243, 695991, 696317, 696061, 696968, 696710, 697034, 696772, 697094, 697416, + 697738, 697470, 697790, 698110, 698430, 698156, 698474, 698792, 698513, 699427, 699146, 698862, + 699777, 699491, 699805, 700119, 699828, 700140, 700452, 700764, 700467, 700777, 701087, 701397, + 701094, 701402, 701710, 702018, 701709, 702015, 702321, 702627, 702312, 702616, 702920, 703224, + 702903, 703205, 703507, 703809, 703482, 703782, 704082, 704382, 704682, 704348, 704646, 704944, + 704605, 704901, 705836, 704854, 705789, 705443, 706380, 705388, 706325, 706619, 706266, 706558, + 706850, 706492, 707433, 707073, 707363, 706999, 707942, 707576, 707864, 708152, 707781, 708727, + 708354, 708640, 708926, 708548, 708832, 709116, 709400, 709684, 709299, 710251, 709864, 709474, + 710427, 710035, 710990, 710596, 710199, 711155, 710756, 711034, 711312, 711590, 711868, 711462, + 711738, 712014, 712290, 711878, 712841, 712427, 712701, 712283, 713248, 712828, 713100, 713372, + 713644, 713916, 713489, 713759, 714029, 714299, 713866, 714838, 714403, 714671, 714939, 714499, + 715474, 715032, 715298, 715564, 715830, 715382, 716361, 715911, 716175, 716439, 715984, 716966, + 716509, 716771, 717033, 716571, 717556, 717092, 717352, 717612, 717872, 718132, 717661, 717919, + 718177, 718435, 718693, 718951, 718472, 718728, 718984, 719240, 719496, 719010, 720007, 719519, + 719773, 720027, 719534, 720534, 720039, 720291, 720543, 720795, 721047, 720545, 720795, 721801, + 721296, 720788, 721795, 722045, 721534, 721782, 722030, 722278, 722526, 722008, 722254, 723268, + 722747, 722993, 722468, 723484, 722957, 723201, 723445, 723689, 723933, 724177, 723642, 723884, + 724907, 724369, 723828, 724852, 725094, 724550, 724790, 725030, 725270, 725510, 725750, 725198, + 725436, 725674, 725912, 726150, 726388, 726626, 726065, 726301, 726537, 726773, 727009, 727245, + 726676, 727716, 727145, 727379, 727613, 727847, 727270, 728314, 727735, 728781, 728200, 728432, + 727847, 728895, 729127, 728539, 728769, 728999, 729229, 729459, 729689, 729093, 730148, 729550, + 729778, 730006, 730234, 730462, 729857, 730917, 730310, 730536, 730762, 730988, 731214, 730600, + 731665, 731049, 731273, 731497, 731721, 731945, 732169, 731545, 732616, 731990, 732212, 732434, + 732656, 732878, 733100, 732466, 733543, 732907, 733127, 733347, 733567, 733787, 733144, 734226, + 733581, 733799, 734884, 734236, 733585, 734671, 734889, 734235, 735324, 734668, 734884, 735100, + 735316, 735532, 734869, 735963, 735298, 735512, 735726, 735940, 736154, 736368, 736582, 735908, + 737009, 736333, 736545, 736757, 736969, 737181, 737393, 736709, 737816, 737130, 737340, 737550, + 737760, 737970, 738180, 738390, 737695, 738809, 738112, 738320, 738528, 738736, 738944, 739152, + 738447, 739567, 738860, 739982, 739273, 739479, 739685, 739891, 739176, 740302, 739585, 740713, + 739994, 740198, 740402, 740606, 740810, 741014, 740287, 741421, 740692, 740894, 742031, 741299, + 741501, 741703, 740966, 742106, 741367, 742509, 741768, 741968, 742168, 742368, 742568, 742768, + 742968, 742218, 743367, 742615, 743766, 743012, 743210, 743408, 743606, 742846, 744001, 743239, + 744396, 743632, 743828, 744988, 744221, 744417, 743646, 744808, 745004, 744230, 745395, 744619, + 744813, 745007, 745201, 745395, 745589, 745783, 744998, 746170, 745383, 746557, 745768, 745960, + 746152, 746344, 746536, 746728, 745931, 747111, 746312, 747494, 746693, 746883, 747073, 747263, + 747453, 747643, 747833, 747023, 748212, 747400, 747588, 748780, 747965, 748153, 748341, 748529, + 747708, 748904, 748081, 749279, 748454, 749654, 748827, 749013, 749199, 749385, 749571, 748737, + 749942, 750128, 749291, 750499, 749660, 749844, 750028, 750212, 750396, 750580, 750764, 749916, + 751131, 750281, 751498, 750646, 750828, 751010, 751192, 751374, 751556, 751738, 751920, 751058, + 752283, 751419, 752646, 751780, 751960, 752140, 752320, 752500, 752680, 752860, 751985, 753219, + 752342, 753578, 752699, 752877, 754116, 753234, 753412, 752526, 753767, 753945, 754123, 753233, + 754478, 753586, 753762, 755010, 754115, 754291, 754467, 754643, 754819, 753917, 755170, 755346, + 754441, 755697, 754790, 754964, 755138, 756398, 755487, 754573, 755834, 756008, 756182, 755264, + 756529, 755609, 756876, 755954, 756126, 757396, 756471, 756643, 755714, 756986, 757158, 757330, + 756397, 757673, 756738, 758016, 757079, 757249, 757419, 757589, 757759, 757929, 758099, 758269, + 758439, 757491, 758778, 757828, 759117, 758165, 758333, 758501, 758669, 758837, 759005, 759173, + 759341, 759509, 758546, 759844, 758879, 760179, 759212, 759378, 760681, 759711, 759877, 760043, + 760209, 759233, 760540, 760706, 759727, 761037, 760056, 761368, 760385, 760549, 761864, 760878, + 761042, 761206, 761370, 760378, 761697, 761861, 760866, 762188, 761191, 762515, 761516, 761678, + 763005, 762003, 762165, 762327, 762489, 762651, 761642, 762974, 763136, 762124, 763459, 762445, + 763782, 762766, 762926, 763086, 763246, 763406, 763566, 763726, 763886, 764046, 763019, 764365, + 764525, 763495, 764844, 763812, 763970, 764128, 765481, 764445, 764603, 764761, 764919, 763877, + 765234, 765392, 764347, 765707, 764660, 766022, 764973, 766337, 765286, 765442, 765598, 765754, + 765910, 766066, 766222, 766378, 765317, 766689, 766845, 765781, 767156, 766090, 766244, 767622, + 766553, 766707, 766861, 767015, 767169, 767323, 767477, 767631, 766552, 767938, 768092, 767010, + 768399, 767315, 767467, 768859, 767772, 767924, 768076, 768228, 768380, 768532, 768684, 768836, + 767739, 769139, 769291, 768191, 769594, 768492, 768642, 770048, 768943, 769093, 769243, 769393, + 769543, 769693, 769843, 769993, 770143, 770293, 769176, 770592, 769473, 770891, 769770, 771190, + 770067, 770215, 770363, 770511, 771936, 770808, 769677, 771103, 771251, 771399, 771547, 770411, + 771842, 771990, 770851, 770997, 772432, 771290, 771436, 772874, 771729, 771875, 772021, 772167, + 772313, 772459, 772605, 771451, 772896, 773042, 771885, 773333, 772174, 773624, 772463, 772607, + 774060, 772896, 773040, 773184, 773328, 773472, 773616, 773760, 773904, 772730, 774191, 774335, + 773158, 774622, 773443, 774909, 773728, 775196, 774013, 774155, 774297, 774439, 774581, 774723, + 774865, 775007, 775149, 775291, 775433, 774237, 775716, 775858, 774659, 776141, 774940, 775080, + 776565, 775361, 775501, 775641, 777130, 775922, 776062, 776202, 774989, 776481, 776621, 776761, + 776901, 775683, 777180, 775960, 777459, 776237, 777738, 776514, 776652, 778156, 776929, 777067, + 777205, 777343, 777481, 777619, 777757, 777895, 776658, 778170, 778308, 778446, 777205, 778721, + 777478, 778996, 777751, 777887, 779408, 778160, 778296, 778432, 778568, 778704, 778840, 778976, + 779112, 779248, 779384, 779520, 778259, 779791, 779927, 778663, 780198, 778932, 779066, 780604, + 779335, 779469, 781010, 779738, 779872, 780006, 780140, 780274, 780408, 780542, 780676, 779394, + 780943, 781077, 781211, 779925, 781478, 780190, 781745, 780455, 782012, 780720, 780852, 780984, + 782545, 781249, 781381, 781513, 781645, 781777, 781909, 782041, 780736, 782304, 782436, 782568, + 781259, 782831, 781520, 783094, 781781, 783357, 782042, 782172, 783751, 782433, 782563, 782693, + 782823, 782953, 783083, 783213, 783343, 783473, 783603, 783733, 782402, 783992, 784122, 782788, + 784381, 783045, 784640, 783302, 784899, 783559, 783687, 785287, 783944, 784072, 784200, 784328, + 784456, 784584, 784712, 784840, 784968, 785096, 785224, 783868, 785479, 785607, 784248, 785862, + 784501, 786117, 784754, 786372, 785007, 785133, 786754, 785386, 785512, 785638, 785764, 785890, + 786016, 786142, 786268, 786394, 786520, 786646, 785265, 786897, 787023, 785639, 787274, 787400, + 786013, 787651, 786262, 786386, 788027, 786635, 786759, 788403, 787008, 787132, 787256, 787380, + 787504, 787628, 787752, 787876, 788000, 788124, 786717, 788371, 788495, 787085, 788742, 788866, + 787453, 789113, 787698, 789360, 787943, 788065, 789730, 788310, 788432, 788554, 788676, 788798, + 788920, 789042, 789164, 789286, 789408, 789530, 789652, 789774, 789896, 788460, 790139, 788701, + 790382, 790504, 789063, 790747, 789304, 789424, 791111, 789665, 789785, 791475, 790026, 790146, + 790266, 790386, 790506, 790626, 790746, 790866, 790986, 791106, 791226, 791346, 789883, 791585, + 791705, 790239, 791944, 790476, 792183, 790713, 792422, 790950, 792661, 791187, 791305, 793019, + 791542, 791660, 791778, 791896, 792014, 792132, 792250, 792368, 792486, 792604, 792722, 792840, + 792958, 791466, 793193, 793311, 791816, 793546, 793664, 792166, 793899, 792399, 792515, 794251, + 792748, 792864, 794603, 793097, 793213, 793329, 793445, 795189, 793678, 793794, 793910, 794026, + 792509, 794257, 794373, 794489, 794605, 794721, 793198, 794952, 795068, 793542, 795299, 793771, + 795530, 794000, 795761, 794229, 794343, 796107, 794572, 794686, 794800, 796568, 795029, 795143, + 795257, 795371, 795485, 795599, 795713, 795827, 795941, 796055, 794504, 796282, 796396, 796510, + 794955, 796737, 795180, 796964, 797078, 795518, 795630, 797418, 795855, 797645, 796080, 796192, + 797985, 796417, 796529, 796641, 796753, 796865, 796977, 797089, 797201, 797313, 797425, 797537, + 797649, 797761, 797873, 796289, 798096, 798208, 798320, 796732, 798543, 796953, 798766, 797174, + 798989, 797395, 799212, 797616, 797726, 799546, 797947, 798057, 798167, 799991, 798388, 798498, + 798608, 798718, 798828, 798938, 799048, 799158, 799268, 799378, 799488, 797872, 799707, 799817, + 799927, 798307, 800146, 798524, 800365, 800475, 798850, 800694, 799067, 799175, 801022, 799392, + 799500, 801350, 799717, 799825, 799933, 801787, 800150, 800258, 800366, 800474, 800582, 800690, + 800798, 800906, 801014, 801122, 799473, 801337, 801445, 801553, 799900, 801768, 801876, 800220, + 802091, 800433, 802306, 800646, 802521, 800859, 802736, 801072, 801178, 803058, 801391, 801497, + 803380, 801710, 801816, 801922, 802028, 802134, 802240, 802346, 802452, 802558, 802664, 802770, + 802876, 802982, 803088, 801402, 803299, 803405, 801716, 803616, 803722, 802030, 803933, 802239, + 804144, 802448, 804355, 802657, 804566, 802866, 802970, 804882, 803179, 803283, 805198, 803492, + 803596, 803700, 803804, 803908, 804012, 804116, 804220, 804324, 804428, 804532, 804636, 804740, + 804844, 803122, 805051, 805155, 805259, 803533, 805466, 805570, 803841, 805777, 804046, 805984, + 804251, 806191, 804456, 806398, 804661, 804763, 806708, 804968, 805070, 805172, 807121, 805377, + 805479, 805581, 805683, 805785, 805887, 805989, 806091, 806193, 806295, 806397, 806499, 806601, + 806703, 804943, 806906, 807008, 807110, 805346, 807313, 805547, 807516, 807618, 805849, 807821, + 806050, 808024, 806251, 808227, 806452, 806552, 808531, 806753, 806853, 806953, 808936, 807154, + 807254, 807354, 807454, 807554, 807654, 807754, 809745, 806063, 808054, 808154, 808254, 808354, + 808454, 808554, 808654, 806854, 808853, 808953, 809053, 807249, 809252, 809352, 807545, 809551, + 807742, 809750, 807939, 809949, 808136, 810148, 808333, 808431, 810446, 808628, 808726, 808824, + 810843, 809021, 809119, 809217, 809315, 809413, 811438, 809610, 809708, 809806, 809904, 810002, + 808167, 810197, 810295, 810393, 810491, 810589, 810687, 808845, 810882, 810980, 809135, 811175, + 811273, 809425, 811468, 809618, 811663, 809811, 811858, 810004, 812053, 810197, 812248, 810390, + 810486, 810582, 812637, 810775, 810871, 810967, 813026, 811160, 811256, 811352, 811448, 811544, + 811640, 811736, 811832, 811928, 812024, 812120, 812216, 812312, 812408, 810526, 812599, 812695, + 812791, 810905, 812982, 813078, 811189, 813269, 813365, 811473, 813556, 811662, 813747, 811851, + 813938, 812040, 812134, 814224, 812323, 814415, 812512, 812606, 812700, 814796, 812889, 812983, + 813077, 813171, 813265, 815367, 813454, 813548, 813642, 813736, 813830, 813924, 814018, 814112, + 812189, 814299, 814393, 814487, 814581, 812653, 814768, 814862, 814956, 813024, 815143, 815237, + 813302, 815424, 813487, 815611, 813672, 815798, 813857, 815985, 814042, 816172, 814227, 814319, + 816452, 814504, 814596, 816732, 814781, 814873, 814965, 817105, 815150, 815242, 815334, 815426, + 815518, 815610, 815702, 815794, 815886, 815978, 816070, 816162, 816254, 816346, 814375, 816529, + 816621, 816713, 816805, 814829, 816988, 817080, 815101, 817263, 817355, 815373, 817538, 815554, + 817721, 815735, 817904, 815916, 818087, 816097, 818270, 816278, 816368, 818544, 816549, 816639, + 818818, 816820, 816910, 817000, 817090, 819274, 817271, 817361, 817451, 817541, 817631, 817721, + 817811, 817901, 817991, 818081, 818171, 818261, 818351, 818441, 818531, 816511, 818710, 818800, + 818890, 816866, 819069, 819159, 817132, 819338, 819428, 817398, 819607, 819697, 817664, 819876, + 817841, 820055, 818018, 818106, 820323, 818283, 820502, 818460, 818548, 820770, 818725, 818813, + 821038, 818990, 819078, 819166, 819254, 819342, 821573, 819519, 819607, 819695, 819783, 819871, + 819959, 820047, 820135, 820223, 820311, 820399, 820487, 818419, 820662, 820750, 820838, 820926, + 818853, 821101, 821189, 821277, 819200, 821452, 821540, 819460, 821715, 819633, 821890, 821978, + 819893, 822153, 820066, 820152, 822415, 820325, 822590, 820498, 820584, 822852, 820757, 820843, + 823114, 821016, 821102, 821188, 823463, 821361, 821447, 821533, 821619, 821705, 821791, 824073, + 821964, 822050, 822136, 822222, 822308, 820192, 822479, 822565, 822651, 822737, 822823, 822909, + 822995, 820871, 823166, 823252, 823338, 821210, 823509, 823595, 821464, 823766, 823852, 821718, + 824023, 821887, 824194, 822056, 824365, 822225, 824536, 822394, 824707, 822563, 824878, 822732, + 822816, 825134, 822985, 823069, 823153, 825475, 823322, 823406, 823490, 825816, 823659, 823743, + 823827, 823911, 823995, 824079, 824163, 824247, 824331, 824415, 824499, 824583, 824667, 824751, + 824835, 824919, 825003, 825087, 822910, 825254, 825338, 825422, 825506, 823324, 825673, 825757, + 823572, 825924, 826008, 823820, 826175, 826259, 824068, 826426, 824233, 826593, 824398, 826760, + 824563, 826927, 824728, 824810, 827177, 824975, 825057, 827427, 825222, 825304, 827677, 825469, + 825551, 825633, 828010, 825798, 825880, 825962, 826044, 826126, 828509, 826291, 826373, 826455, + 826537, 826619, 826701, 826783, 826865, 826947, 827029, 824799, 827192, 827274, 827356, 827438, + 827520, 825284, 827683, 827765, 827847, 825607, 828010, 828092, 828174, 825930, 828337, 826091, + 828500, 828582, 826333, 828745, 826494, 828908, 826655, 829071, 826816, 829234, 826977, 829397, + 827138, 827218, 829641, 827379, 827459, 829885, 827620, 827700, 830129, 827861, 827941, 828021, + 828101, 830535, 828262, 828342, 828422, 828502, 828582, 828662, 828742, 828822, 828902, 828982, + 829062, 829142, 829222, 829302, 829382, 829462, 829542, 829622, 829702, 827408, 829861, 829941, + 830021, 830101, 827802, 830260, 830340, 830420, 828117, 830579, 830659, 828353, 830818, 828510, + 830977, 831057, 828746, 831216, 828903, 831375, 829060, 831534, 829217, 831693, 829374, 829452, + 831931, 829609, 829687, 832169, 829844, 829922, 832407, 830079, 830157, 830235, 832724, 830392, + 830470, 830548, 830626, 833120, 830783, 830861, 830939, 831017, 831095, 831173, 831251, 831329, + 831407, 831485, 831563, 831641, 831719, 831797, 831875, 831953, 832031, 829675, 832186, 832264, + 832342, 832420, 830059, 832575, 832653, 832731, 830366, 832886, 832964, 830596, 833119, 833197, + 830826, 833352, 830979, 833507, 833585, 831209, 833740, 831362, 833895, 831515, 834050, 831668, + 831744, 834282, 831897, 834437, 832050, 832126, 834669, 832279, 832355, 834901, 832508, 832584, + 832660, 835210, 832813, 832889, 832965, 833041, 833117, 835673, 833270, 833346, 833422, 833498, + 833574, 833650, 833726, 833802, 833878, 833954, 834030, 834106, 834182, 834258, 834334, 834410, + 831989, 834561, 834637, 834713, 834789, 834865, 832438, 835016, 835092, 835168, 832737, 835319, + 835395, 832961, 835546, 835622, 833185, 835773, 835849, 833409, 836000, 833558, 836151, 833707, + 836302, 833856, 836453, 834005, 836604, 834154, 836755, 834303, 834377, 836981, 834526, 837132, + 834675, 834749, 837358, 834898, 834972, 835046, 837659, 835195, 835269, 835343, 835417, 838035, + 835566, 835640, 835714, 835788, 835862, 835936, 836010, 836084, 836158, 838786, 836307, 836381, + 836455, 833971, 836602, 836676, 836750, 836824, 836898, 836972, 837046, 837120, 837194, 834700, + 837341, 837415, 837489, 837563, 835064, 837710, 837784, 835282, 837931, 838005, 838079, 835573, + 838226, 835718, 838373, 838447, 835936, 838594, 836081, 838741, 836226, 838888, 836371, 839035, + 836516, 839182, 836661, 839329, 836806, 839476, 836951, 837023, 839696, 837168, 837240, 839916, + 837385, 837457, 837529, 840209, 837674, 837746, 837818, 840502, 837963, 838035, 838107, 838179, + 838251, 840941, 838396, 838468, 838540, 838612, 838684, 838756, 838828, 838900, 838972, 839044, + 839116, 839188, 839260, 839332, 839404, 839476, 839548, 836984, 839691, 839763, 839835, 839907, + 839979, 837409, 840122, 840194, 840266, 837692, 840409, 840481, 840553, 837975, 840696, 840768, + 838187, 840911, 838328, 841054, 841126, 838540, 841269, 838681, 841412, 838822, 841555, 838963, + 841698, 839104, 841841, 839245, 841984, 839386, 842127, 839527, 839597, 842341, 839738, 839808, + 842555, 839949, 840019, 842769, 840160, 840230, 840300, 843054, 840441, 840511, 840581, 840651, + 843410, 840792, 840862, 840932, 841002, 841072, 841142, 841212, 841282, 844050, 841423, 841493, + 841563, 841633, 841703, 841773, 839138, 841912, 841982, 842052, 842122, 842192, 842262, 842332, + 842402, 842472, 839827, 842611, 842681, 842751, 842821, 840171, 842960, 843030, 843100, 840446, + 843239, 843309, 840652, 843448, 843518, 840858, 843657, 843727, 841064, 843866, 841201, 844005, + 841338, 844144, 841475, 844283, 844353, 841681, 841749, 844561, 841886, 844700, 842023, 844839, + 842160, 842228, 845047, 842365, 845186, 842502, 842570, 845394, 842707, 842775, 842843, 845671, + 842980, 843048, 843116, 845948, 843253, 843321, 843389, 843457, 843525, 846363, 843662, 843730, + 843798, 843866, 843934, 844002, 844070, 844138, 844206, 844274, 844342, 844410, 844478, 844546, + 844614, 844682, 844750, 844818, 844886, 844954, 845022, 845090, 842365, 845225, 845293, 845361, + 845429, 845497, 842766, 845632, 845700, 845768, 843033, 845903, 845971, 843233, 846106, 846174, + 843433, 846309, 846377, 843633, 846512, 846580, 843833, 846715, 843966, 846850, 844099, 846985, + 847053, 844299, 847188, 844432, 847323, 844565, 844631, 847525, 844764, 847660, 844897, 844963, + 847862, 845096, 847997, 845229, 845295, 848199, 845428, 845494, 845560, 848468, 845693, 845759, + 848670, 845892, 845958, 846024, 846090, 849006, 846223, 846289, 846355, 846421, 846487, 846553, + 846619, 849543, 846752, 846818, 846884, 846950, 847016, 847082, 847148, 847214, 847280, 847346, + 847412, 847478, 847544, 847610, 844803, 847741, 847807, 847873, 847939, 848005, 848071, 848137, + 845322, 848268, 848334, 848400, 848466, 845646, 848597, 848663, 848729, 845905, 848860, 848926, + 846099, 849057, 849123, 846293, 849254, 849320, 846487, 849451, 846616, 849582, 849648, 846810, + 849779, 846939, 849910, 847068, 850041, 847197, 850172, 847326, 850303, 847455, 850434, 847584, + 850565, 847713, 847777, 850761, 847906, 847970, 850957, 848099, 848163, 851153, 848292, 848356, + 851349, 848485, 848549, 848613, 851610, 848742, 848806, 848870, 851871, 848999, 849063, 849127, + 849191, 849255, 852262, 849384, 849448, 849512, 849576, 849640, 849704, 849768, 849832, 849896, + 849960, 850024, 850088, 850152, 850216, 850280, 850344, 850408, 850472, 850536, 850600, 850664, + 850728, 850792, 847889, 850919, 850983, 851047, 851111, 851175, 848266, 851302, 851366, 851430, + 851494, 848580, 851621, 851685, 851749, 848831, 851876, 851940, 849019, 852067, 852131, 849207, + 852258, 852322, 849395, 852449, 849520, 852576, 852640, 849708, 852767, 849833, 852894, 849958, + 853021, 850083, 853148, 850208, 853275, 850333, 853402, 850458, 850520, 853592, 850645, 853719, + 850770, 850832, 853909, 850957, 854036, 851082, 851144, 851206, 854289, 851331, 851393, 854479, + 851518, 851580, 851642, 854732, 851767, 851829, 851891, 851953, 855048, 852078, 852140, 852202, + 852264, 852326, 852388, 855490, 852513, 852575, 852637, 852699, 852761, 852823, 852885, 852947, + 853009, 853071, 853133, 853195, 853257, 853319, 853381, 853443, 853505, 853567, 853629, 853691, + 850692, 853814, 853876, 853938, 854000, 854062, 854124, 851118, 854247, 854309, 854371, 854433, + 851422, 854556, 854618, 854680, 851665, 854803, 854865, 854927, 851908, 855050, 855112, 852090, + 855235, 855297, 852272, 855420, 852393, 855543, 855605, 852575, 855728, 852696, 855851, 852817, + 855974, 852938, 856097, 853059, 856220, 853180, 856343, 853301, 856466, 853422, 856589, 853543, + 856712, 853664, 853724, 856896, 853845, 857019, 853966, 854026, 857203, 854147, 854207, 854267, + 857448, 854388, 854448, 857632, 854569, 854629, 854689, 857877, 854810, 854870, 854930, 854990, + 858183, 855111, 855171, 855231, 855291, 855351, 858550, 855472, 855532, 855592, 855652, 855712, + 855772, 855832, 855892, 855952, 856012, 856072, 856132, 856192, 856252, 856312, 856372, 856432, + 856492, 856552, 856612, 856672, 856732, 856792, 856852, 856912, 853807, 857031, 857091, 857151, + 857211, 857271, 857331, 854219, 857450, 857510, 857570, 854454, 857689, 857749, 857809, 854689, + 857928, 857988, 858048, 854924, 858167, 858227, 855100, 858346, 858406, 855276, 858525, 858585, + 855452, 858704, 855569, 858823, 858883, 855745, 859002, 855862, 859121, 855979, 859240, 856096, + 859359, 856213, 859478, 856330, 859597, 856447, 859716, 856564, 859835, 856681, 856739, 860013, + 856856, 860132, 856973, 857031, 860310, 857148, 857206, 860488, 857323, 857381, 860666, 857498, + 857556, 860844, 857673, 857731, 857789, 861081, 857906, 857964, 858022, 858080, 861377, 858197, + 858255, 858313, 858371, 858429, 861732, 858546, 858604, 858662, 858720, 858778, 858836, 858894, + 858952, 859010, 862323, 859127, 859185, 859243, 859301, 859359, 859417, 859475, 859533, 859591, + 859649, 856441, 859764, 859822, 859880, 859938, 859996, 860054, 860112, 860170, 860228, 860286, + 857067, 860401, 860459, 860517, 860575, 860633, 857408, 860748, 860806, 860864, 857635, 860979, + 861037, 861095, 857862, 861210, 861268, 861326, 858089, 861441, 861499, 858259, 861614, 861672, + 858429, 861787, 861845, 858599, 861960, 858712, 862075, 862133, 858882, 862248, 858995, 862363, + 859108, 862478, 859221, 862593, 859334, 862708, 859447, 862823, 859560, 862938, 859673, 863053, + 859786, 863168, 859899, 863283, 860012, 860068, 863455, 860181, 860237, 863627, 860350, 863742, + 860463, 860519, 863914, 860632, 860688, 860744, 864143, 860857, 860913, 864315, 861026, 861082, + 861138, 861194, 864601, 861307, 861363, 861419, 861475, 864887, 861588, 861644, 861700, 861756, + 861812, 865230, 861925, 861981, 862037, 862093, 862149, 862205, 862261, 862317, 862373, 862429, + 862485, 862541, 862597, 862653, 862709, 862765, 862821, 862877, 862933, 862989, 863045, 863101, + 863157, 863213, 863269, 863325, 863381, 863437, 863493, 860157, 863604, 863660, 863716, 863772, + 863828, 863884, 860541, 863995, 864051, 864107, 864163, 860815, 864274, 864330, 864386, 861034, + 864497, 864553, 864609, 861253, 864720, 864776, 861417, 864887, 864943, 861581, 865054, 865110, + 861745, 865221, 865277, 861909, 865388, 862018, 865499, 865555, 862182, 865666, 862291, 865777, + 862400, 865888, 865944, 862564, 866055, 862673, 866166, 862782, 866277, 862891, 862945, 866443, + 863054, 866554, 863163, 866665, 863272, 866776, 863381, 863435, 866942, 863544, 863598, 867108, + 863707, 867219, 863816, 863870, 863924, 867440, 864033, 864087, 867606, 864196, 864250, 867772, + 864359, 864413, 864467, 867993, 864576, 864630, 864684, 864738, 868269, 864847, 864901, 864955, + 865009, 865063, 868600, 865172, 865226, 865280, 865334, 865388, 865442, 865496, 869041, 865605, + 865659, 865713, 865767, 865821, 865875, 865929, 865983, 866037, 866091, 866145, 866199, 866253, + 866307, 866361, 866415, 866469, 866523, 866577, 866631, 866685, 863226, 866792, 866846, 866900, + 866954, 867008, 867062, 867116, 867170, 863702, 867277, 867331, 867385, 867439, 867493, 864019, + 867600, 867654, 867708, 864230, 867815, 867869, 867923, 864441, 868030, 868084, 868138, 864652, + 868245, 868299, 864810, 868406, 868460, 864968, 868567, 868621, 865126, 868728, 868782, 865284, + 868889, 868943, 865442, 869050, 865547, 869157, 865652, 869264, 869318, 865810, 869425, 865915, + 869532, 866020, 869639, 866125, 869746, 866230, 869853, 866335, 869960, 866440, 870067, 866545, + 866597, 870227, 866702, 870334, 866807, 870441, 866912, 866964, 870601, 867069, 867121, 870761, + 867226, 867278, 870921, 867383, 867435, 871081, 867540, 867592, 871241, 867697, 867749, 867801, + 871454, 867906, 867958, 868010, 871667, 868115, 868167, 868219, 871880, 868324, 868376, 868428, + 868480, 868532, 872199, 868637, 868689, 868741, 868793, 868845, 868897, 872571, 869002, 869054, + 869106, 869158, 869210, 869262, 869314, 869366, 869418, 869470, 869522, 869574, 869626, 869678, + 869730, 869782, 869834, 869886, 869938, 869990, 870042, 870094, 870146, 870198, 870250, 870302, + 870354, 870406, 870458, 866858, 870561, 870613, 870665, 870717, 870769, 870821, 867214, 870924, + 870976, 871028, 871080, 871132, 867519, 871235, 871287, 871339, 867722, 871442, 871494, 871546, + 871598, 867976, 871701, 871753, 868128, 871856, 871908, 871960, 868331, 872063, 872115, 868483, + 872218, 872270, 868635, 872373, 868736, 872476, 872528, 868888, 872631, 872683, 869040, 872786, + 869141, 872889, 869242, 872992, 873044, 869394, 873147, 869495, 873250, 869596, 873353, 869697, + 873456, 869798, 873559, 869899, 873662, 870000, 873765, 870101, 870151, 873919, 870252, 874022, + 870353, 874125, 870454, 870504, 874279, 870605, 870655, 874433, 870756, 874536, 870857, 870907, + 874690, 871008, 871058, 871108, 874895, 871209, 871259, 875049, 871360, 871410, 871460, 875254, + 871561, 871611, 871661, 875459, 871762, 871812, 871862, 875664, 871963, 872013, 872063, 872113, + 872163, 875971, 872264, 872314, 872364, 872414, 872464, 872514, 876329, 872615, 872665, 872715, + 872765, 872815, 872865, 872915, 872965, 873015, 873065, 873115, 873165, 873215, 873265, 873315, + 877146, 873416, 873466, 869732, 873565, 873615, 873665, 873715, 873765, 873815, 873865, 873915, + 873965, 874015, 874065, 874115, 874165, 874215, 874265, 870515, 874364, 874414, 874464, 874514, + 874564, 874614, 870857, 874713, 874763, 874813, 874863, 874913, 871150, 875012, 875062, 875112, + 871345, 875211, 875261, 875311, 875361, 871589, 875460, 875510, 871735, 875609, 875659, 875709, + 871930, 875808, 875858, 872076, 875957, 876007, 872222, 876106, 876156, 872368, 876255, 872465, + 876354, 876404, 872611, 876503, 876553, 872757, 876652, 872854, 876751, 872951, 876850, 873048, + 876949, 876999, 873194, 877098, 873291, 877197, 873388, 877296, 873485, 877395, 873582, 877494, + 873679, 873727, 877642, 873824, 877741, 873921, 877840, 874018, 877939, 874115, 874163, 878087, + 874260, 878186, 874357, 874405, 878334, 874502, 874550, 878482, 874647, 874695, 878630, 874792, + 874840, 878778, 874937, 874985, 878926, 875082, 875130, 875178, 879123, 875275, 875323, 875371, + 879320, 875468, 875516, 875564, 879517, 875661, 875709, 875757, 875805, 879763, 875902, 875950, + 875998, 876046, 876094, 876142, 880107, 876239, 876287, 876335, 876383, 876431, 876479, 876527, + 880500, 876624, 876672, 876720, 876768, 876816, 876864, 876912, 876960, 877008, 877056, 877104, + 877152, 877200, 877248, 877296, 877344, 877392, 877440, 877488, 877536, 877584, 877632, 877680, + 877728, 877776, 877824, 877872, 877920, 877968, 874061, 878063, 878111, 878159, 878207, 878255, + 878303, 878351, 874436, 878446, 878494, 878542, 878590, 878638, 878686, 874764, 878781, 878829, + 878877, 878925, 874998, 879020, 879068, 879116, 875185, 879211, 879259, 879307, 875372, 879402, + 879450, 879498, 875559, 879593, 879641, 875699, 879736, 879784, 879832, 875886, 879927, 879975, + 876026, 880070, 876119, 880165, 880213, 876259, 880308, 880356, 876399, 880451, 876492, 880546, + 880594, 876632, 880689, 876725, 880784, 876818, 880879, 880927, 876958, 881022, 877051, 881117, + 877144, 881212, 877237, 881307, 877330, 881402, 877423, 881497, 877516, 881592, 877609, 881687, + 877702, 877748, 881829, 877841, 881924, 877934, 882019, 878027, 878073, 882161, 878166, 878212, + 882303, 878305, 882398, 878398, 878444, 882540, 878537, 878583, 882682, 878676, 878722, 882824, + 878815, 878861, 882966, 878954, 879000, 879046, 883155, 879139, 879185, 879231, 883344, 879324, + 879370, 879416, 883533, 879509, 879555, 879601, 883722, 879694, 879740, 879786, 879832, 879878, + 884005, 879971, 880017, 880063, 880109, 880155, 884288, 880248, 880294, 880340, 880386, 880432, + 880478, 880524, 880570, 884712 +}; + +static INLINE long long bsc_entropy(int n) +{ + if (n < 0x1000) return (long long) n * bsc_code_table[n]; + if (n < 0x100000) return (long long) n * (8 * 65536 + bsc_code_table[n >> 8]); + if (n < 0x10000000) return (long long) n * (16 * 65536 + bsc_code_table[n >> 16]); + return (long long) n * (20 * 65536 + bsc_code_table[n >> 20]); +} + +static INLINE long long bsc_delta(int n) +{ + if (n < 0x1000) return (long long) bsc_delta_table[n]; + if ((n & 0xff) != 0xff) + { + if (n < 0x100000) return (long long)(bsc_code_table[n >> 8] + 8 * 65536); + if (n < 0x10000000) return (long long)(bsc_code_table[n >> 16] + 16 * 65536); + return (long long)(bsc_code_table[n >> 20] + 20 * 65536); + } + return bsc_entropy(n + 1) - bsc_entropy(n); +} + +#endif + +/*-----------------------------------------------------------*/ +/* End tables.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/libbsc.h b/libbsc/libbsc/libbsc.h new file mode 100644 index 00000000..b9881b52 --- /dev/null +++ b/libbsc/libbsc/libbsc.h @@ -0,0 +1,157 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to compression/decompression functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_LIBBSC_H +#define _LIBBSC_LIBBSC_H + +#define LIBBSC_NO_ERROR 0 +#define LIBBSC_BAD_PARAMETER -1 +#define LIBBSC_NOT_ENOUGH_MEMORY -2 +#define LIBBSC_NOT_COMPRESSIBLE -3 +#define LIBBSC_NOT_SUPPORTED -4 +#define LIBBSC_UNEXPECTED_EOB -5 +#define LIBBSC_DATA_CORRUPT -6 + +#define LIBBSC_GPU_ERROR -7 +#define LIBBSC_GPU_NOT_SUPPORTED -8 +#define LIBBSC_GPU_NOT_ENOUGH_MEMORY -9 + +#define LIBBSC_BLOCKSORTER_NONE 0 +#define LIBBSC_BLOCKSORTER_BWT 1 + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + #define LIBBSC_BLOCKSORTER_ST3 3 + #define LIBBSC_BLOCKSORTER_ST4 4 + #define LIBBSC_BLOCKSORTER_ST5 5 + #define LIBBSC_BLOCKSORTER_ST6 6 + #define LIBBSC_BLOCKSORTER_ST7 7 + #define LIBBSC_BLOCKSORTER_ST8 8 + +#endif + +#define LIBBSC_CODER_NONE 0 +#define LIBBSC_CODER_QLFC_STATIC 1 +#define LIBBSC_CODER_QLFC_ADAPTIVE 2 +#define LIBBSC_CODER_QLFC_FAST 3 + +#define LIBBSC_FEATURE_NONE 0 +#define LIBBSC_FEATURE_FASTMODE 1 +#define LIBBSC_FEATURE_MULTITHREADING 2 +#define LIBBSC_FEATURE_LARGEPAGES 4 +#define LIBBSC_FEATURE_CUDA 8 + +#define LIBBSC_DEFAULT_LZPHASHSIZE 16 +#define LIBBSC_DEFAULT_LZPMINLEN 128 +#define LIBBSC_DEFAULT_BLOCKSORTER LIBBSC_BLOCKSORTER_BWT +#define LIBBSC_DEFAULT_CODER LIBBSC_CODER_QLFC_STATIC +#define LIBBSC_DEFAULT_FEATURES LIBBSC_FEATURE_FASTMODE | LIBBSC_FEATURE_MULTITHREADING + +#define LIBBSC_HEADER_SIZE 28 + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * You should call this function (or @ref bsc_init_full) before you call any of the other functions in libbsc. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_init(int features); + + /** + * You should call this function (or @ref bsc_init) before you call any of the other functions in libbsc. + * @param features - the set of additional features. + * @param malloc - function to use to allocate buffers + * @param zero_malloc - function to use to allocate zero-filled buffers + * @param free - function used to free buffers + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_init_full(int features, void* (* malloc)(size_t size), void* (* zero_malloc)(size_t size), void (* free)(void* address)); + + /** + * Compress a memory block. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n + LIBBSC_HEADER_SIZE bytes. + * @param n - the length of the input memory block. + * @param lzpHashSize[0, 10..28] - the hash table size if LZP enabled, 0 otherwise. + * @param lzpMinLen[0, 4..255] - the minimum match length if LZP enabled, 0 otherwise. + * @param blockSorter[ST3..ST8, BWT] - the block sorting algorithm. + * @param coder[MTF or QLFC] - the entropy coding algorithm. + * @param features - the set of additional features. + * @return the length of compressed memory block if no error occurred, error code otherwise. + */ + int bsc_compress(const unsigned char * input, unsigned char * output, int n, int lzpHashSize, int lzpMinLen, int blockSorter, int coder, int features); + + /** + * Store a memory block. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n + LIBBSC_HEADER_SIZE bytes. + * @param n - the length of the input memory block. + * @param features - the set of additional features. + * @return the length of stored memory block if no error occurred, error code otherwise. + */ + int bsc_store(const unsigned char * input, unsigned char * output, int n, int features); + + /** + * Determinate the sizes of input and output memory blocks for bsc_decompress function. + * @param blockHeader - the header of input(compressed) memory block of headerSize bytes. + * @param headerSize - the length of header, should be at least LIBBSC_HEADER_SIZE bytes. + * @param pBlockSize[out] - the length of the input memory block for bsc_decompress function. + * @param pDataSize[out] - the length of the output memory block for bsc_decompress function. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_block_info(const unsigned char * blockHeader, int headerSize, int * pBlockSize, int * pDataSize, int features); + + /** + * Decompress a memory block. + * Note : You should call bsc_block_info function to determinate the sizes of input and output memory blocks. + * @param input - the input memory block of inputSize bytes. + * @param inputSize - the length of the input memory block. + * @param output - the output memory block of outputSize bytes. + * @param outputSize - the length of the output memory block. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_decompress(const unsigned char * input, int inputSize, unsigned char * output, int outputSize, int features); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-------------------------------------------------*/ +/* End libbsc.h */ +/*-------------------------------------------------*/ diff --git a/libbsc/libbsc/libbsc/libbsc.cpp b/libbsc/libbsc/libbsc/libbsc.cpp new file mode 100644 index 00000000..8b6f54a6 --- /dev/null +++ b/libbsc/libbsc/libbsc/libbsc.cpp @@ -0,0 +1,620 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Compression/decompression functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include +#include + +#include "../platform/platform.h" +#include "../libbsc.h" + +#include "../adler32/adler32.h" +#include "../bwt/bwt.h" +#include "../lzp/lzp.h" +#include "../coder/coder.h" +#include "../st/st.h" + +int bsc_init_full(int features, void* (* malloc)(size_t size), void* (* zero_malloc)(size_t size), void (* free)(void* address)) +{ + int result = LIBBSC_NO_ERROR; + + if (result == LIBBSC_NO_ERROR) result = bsc_platform_init(features, malloc, zero_malloc, free); + if (result == LIBBSC_NO_ERROR) result = bsc_coder_init(features); + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + if (result == LIBBSC_NO_ERROR) result = bsc_st_init(features); + +#endif + + return result; +} + +int bsc_init(int features) +{ + return bsc_init_full(features, NULL, NULL, NULL); +} + +int bsc_store(const unsigned char * input, unsigned char * output, int n, int features) +{ + unsigned int adler32_data = bsc_adler32(input, n, features); + + memmove(output + LIBBSC_HEADER_SIZE, input, n); + *(int *)(output + 0) = n + LIBBSC_HEADER_SIZE; + *(int *)(output + 4) = n; + *(int *)(output + 8) = 0; + *(int *)(output + 12) = 0; + *(int *)(output + 16) = adler32_data; + *(int *)(output + 20) = adler32_data; + *(int *)(output + 24) = bsc_adler32(output, 24, features); + return n + LIBBSC_HEADER_SIZE; +} + +int bsc_compress_inplace(unsigned char * data, int n, int lzpHashSize, int lzpMinLen, int blockSorter, int coder, int features) +{ + int indexes[256]; + unsigned char num_indexes; + + int mode = 0; + + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : mode = LIBBSC_BLOCKSORTER_BWT; break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : mode = LIBBSC_BLOCKSORTER_ST3; break; + case LIBBSC_BLOCKSORTER_ST4 : mode = LIBBSC_BLOCKSORTER_ST4; break; + case LIBBSC_BLOCKSORTER_ST5 : mode = LIBBSC_BLOCKSORTER_ST5; break; + case LIBBSC_BLOCKSORTER_ST6 : mode = LIBBSC_BLOCKSORTER_ST6; break; + case LIBBSC_BLOCKSORTER_ST7 : mode = LIBBSC_BLOCKSORTER_ST7; break; + case LIBBSC_BLOCKSORTER_ST8 : mode = LIBBSC_BLOCKSORTER_ST8; break; + +#endif + + default : return LIBBSC_BAD_PARAMETER; + } + + switch (coder) + { + case LIBBSC_CODER_QLFC_STATIC : mode += (LIBBSC_CODER_QLFC_STATIC << 5); break; + case LIBBSC_CODER_QLFC_ADAPTIVE : mode += (LIBBSC_CODER_QLFC_ADAPTIVE << 5); break; + case LIBBSC_CODER_QLFC_FAST : mode += (LIBBSC_CODER_QLFC_FAST << 5); break; + + default : return LIBBSC_BAD_PARAMETER; + } + + if (lzpMinLen != 0 || lzpHashSize != 0) + { + if (lzpMinLen < 4 || lzpMinLen > 255) return LIBBSC_BAD_PARAMETER; + if (lzpHashSize < 10 || lzpHashSize > 28) return LIBBSC_BAD_PARAMETER; + mode += (lzpMinLen << 8); + mode += (lzpHashSize << 16); + } + if (n < 0 || n > 2146435072) return LIBBSC_BAD_PARAMETER; + if (n <= LIBBSC_HEADER_SIZE) + { + return bsc_store(data, data, n, features); + } + + unsigned int adler32_data = bsc_adler32(data, n, features); + + int lzSize = n; + if (mode != (mode & 0xff)) + { + unsigned char * buffer = (unsigned char *)bsc_malloc(n); + if (buffer == NULL) return LIBBSC_NOT_ENOUGH_MEMORY; + + lzSize = bsc_lzp_compress(data, buffer, n, lzpHashSize, lzpMinLen, features); + if (lzSize < LIBBSC_NO_ERROR) + { + lzSize = n; mode &= 0xff; + } + else + { + memcpy(data, buffer, lzSize); + } + + bsc_free(buffer); + } + + if (lzSize <= LIBBSC_HEADER_SIZE) + { + blockSorter = LIBBSC_BLOCKSORTER_BWT; + mode = (mode & 0xffffffe0) | LIBBSC_BLOCKSORTER_BWT; + } + + int index = LIBBSC_BAD_PARAMETER; num_indexes = 0; + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : index = bsc_bwt_encode(data, lzSize, &num_indexes, indexes, features); break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : index = bsc_st_encode(data, lzSize, 3, features); break; + case LIBBSC_BLOCKSORTER_ST4 : index = bsc_st_encode(data, lzSize, 4, features); break; + case LIBBSC_BLOCKSORTER_ST5 : index = bsc_st_encode(data, lzSize, 5, features); break; + case LIBBSC_BLOCKSORTER_ST6 : index = bsc_st_encode(data, lzSize, 6, features); break; + case LIBBSC_BLOCKSORTER_ST7 : index = bsc_st_encode(data, lzSize, 7, features); break; + case LIBBSC_BLOCKSORTER_ST8 : index = bsc_st_encode(data, lzSize, 8, features); break; + +#endif + + default : return LIBBSC_BAD_PARAMETER; + } + + if (n < 64 * 1024) num_indexes = 0; + + if (index < LIBBSC_NO_ERROR) + { + return index; + } + + if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize + 4096)) + { + int result = bsc_coder_compress(data, buffer, lzSize, coder, features); + if (result >= LIBBSC_NO_ERROR) memcpy(data + LIBBSC_HEADER_SIZE, buffer, result); + bsc_free(buffer); + if ((result < LIBBSC_NO_ERROR) || (result + 1 + 4 * num_indexes >= n)) + { + return LIBBSC_NOT_COMPRESSIBLE; + } + { + if (num_indexes > 0) + { + memcpy(data + LIBBSC_HEADER_SIZE + result, indexes, 4 * num_indexes); + } + data[LIBBSC_HEADER_SIZE + result + 4 * num_indexes] = num_indexes; + result += 1 + 4 * num_indexes; + } + *(int *)(data + 0) = result + LIBBSC_HEADER_SIZE; + *(int *)(data + 4) = n; + *(int *)(data + 8) = mode; + *(int *)(data + 12) = index; + *(int *)(data + 16) = adler32_data; + *(int *)(data + 20) = bsc_adler32(data + LIBBSC_HEADER_SIZE, result, features); + *(int *)(data + 24) = bsc_adler32(data, 24, features); + return result + LIBBSC_HEADER_SIZE; + } + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_compress(const unsigned char * input, unsigned char * output, int n, int lzpHashSize, int lzpMinLen, int blockSorter, int coder, int features) +{ + if (input == output) + { + return bsc_compress_inplace(output, n, lzpHashSize, lzpMinLen, blockSorter, coder, features); + } + + int indexes[256]; + unsigned char num_indexes; + + int mode = 0; + + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : mode = LIBBSC_BLOCKSORTER_BWT; break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : mode = LIBBSC_BLOCKSORTER_ST3; break; + case LIBBSC_BLOCKSORTER_ST4 : mode = LIBBSC_BLOCKSORTER_ST4; break; + case LIBBSC_BLOCKSORTER_ST5 : mode = LIBBSC_BLOCKSORTER_ST5; break; + case LIBBSC_BLOCKSORTER_ST6 : mode = LIBBSC_BLOCKSORTER_ST6; break; + case LIBBSC_BLOCKSORTER_ST7 : mode = LIBBSC_BLOCKSORTER_ST7; break; + case LIBBSC_BLOCKSORTER_ST8 : mode = LIBBSC_BLOCKSORTER_ST8; break; + +#endif + + default : return LIBBSC_BAD_PARAMETER; + } + + switch (coder) + { + case LIBBSC_CODER_QLFC_STATIC : mode += (LIBBSC_CODER_QLFC_STATIC << 5); break; + case LIBBSC_CODER_QLFC_ADAPTIVE : mode += (LIBBSC_CODER_QLFC_ADAPTIVE << 5); break; + case LIBBSC_CODER_QLFC_FAST : mode += (LIBBSC_CODER_QLFC_FAST << 5); break; + + default : return LIBBSC_BAD_PARAMETER; + } + + if (lzpMinLen != 0 || lzpHashSize != 0) + { + if (lzpMinLen < 4 || lzpMinLen > 255) return LIBBSC_BAD_PARAMETER; + if (lzpHashSize < 10 || lzpHashSize > 28) return LIBBSC_BAD_PARAMETER; + mode += (lzpMinLen << 8); + mode += (lzpHashSize << 16); + } + if (n < 0 || n > 1073741824) return LIBBSC_BAD_PARAMETER; + if (n <= LIBBSC_HEADER_SIZE) + { + return bsc_store(input, output, n, features); + } + int lzSize = 0; + if (mode != (mode & 0xff)) + { + lzSize = bsc_lzp_compress(input, output, n, lzpHashSize, lzpMinLen, features); + if (lzSize < LIBBSC_NO_ERROR) + { + mode &= 0xff; + } + } + if (mode == (mode & 0xff)) + { + lzSize = n; memcpy(output, input, n); + } + + if (lzSize <= LIBBSC_HEADER_SIZE) + { + blockSorter = LIBBSC_BLOCKSORTER_BWT; + mode = (mode & 0xffffffe0) | LIBBSC_BLOCKSORTER_BWT; + } + + int index = LIBBSC_BAD_PARAMETER; num_indexes = 0; + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : index = bsc_bwt_encode(output, lzSize, &num_indexes, indexes, features); break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : index = bsc_st_encode(output, lzSize, 3, features); break; + case LIBBSC_BLOCKSORTER_ST4 : index = bsc_st_encode(output, lzSize, 4, features); break; + case LIBBSC_BLOCKSORTER_ST5 : index = bsc_st_encode(output, lzSize, 5, features); break; + case LIBBSC_BLOCKSORTER_ST6 : index = bsc_st_encode(output, lzSize, 6, features); break; + case LIBBSC_BLOCKSORTER_ST7 : index = bsc_st_encode(output, lzSize, 7, features); break; + case LIBBSC_BLOCKSORTER_ST8 : index = bsc_st_encode(output, lzSize, 8, features); break; + +#endif + + default : return LIBBSC_BAD_PARAMETER; + } + + if (n < 64 * 1024) num_indexes = 0; + + if (index < LIBBSC_NO_ERROR) + { + return index; + } + + if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize + 4096)) + { + int result = bsc_coder_compress(output, buffer, lzSize, coder, features); + if (result >= LIBBSC_NO_ERROR) memcpy(output + LIBBSC_HEADER_SIZE, buffer, result); + bsc_free(buffer); + if ((result < LIBBSC_NO_ERROR) || (result + 1 + 4 * num_indexes >= n)) + { + return bsc_store(input, output, n, features); + } + { + if (num_indexes > 0) + { + memcpy(output + LIBBSC_HEADER_SIZE + result, indexes, 4 * num_indexes); + } + output[LIBBSC_HEADER_SIZE + result + 4 * num_indexes] = num_indexes; + result += 1 + 4 * num_indexes; + } + *(int *)(output + 0) = result + LIBBSC_HEADER_SIZE; + *(int *)(output + 4) = n; + *(int *)(output + 8) = mode; + *(int *)(output + 12) = index; + *(int *)(output + 16) = bsc_adler32(input, n, features); + *(int *)(output + 20) = bsc_adler32(output + LIBBSC_HEADER_SIZE, result, features); + *(int *)(output + 24) = bsc_adler32(output, 24, features); + return result + LIBBSC_HEADER_SIZE; + } + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_block_info(const unsigned char * blockHeader, int headerSize, int * pBlockSize, int * pDataSize, int features) +{ + if (headerSize < LIBBSC_HEADER_SIZE) + { + return LIBBSC_UNEXPECTED_EOB; + } + + if (*(unsigned int *)(blockHeader + 24) != bsc_adler32(blockHeader, 24, features)) + { + return LIBBSC_DATA_CORRUPT; + } + + int blockSize = *(int *)(blockHeader + 0); + int dataSize = *(int *)(blockHeader + 4); + int mode = *(int *)(blockHeader + 8); + int index = *(int *)(blockHeader + 12); + + int lzpHashSize = (mode >> 16) & 0xff; + int lzpMinLen = (mode >> 8) & 0xff; + int coder = (mode >> 5) & 0x7; + int blockSorter = (mode >> 0) & 0x1f; + + int test_mode = 0; + + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : test_mode = LIBBSC_BLOCKSORTER_BWT; break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : test_mode = LIBBSC_BLOCKSORTER_ST3; break; + case LIBBSC_BLOCKSORTER_ST4 : test_mode = LIBBSC_BLOCKSORTER_ST4; break; + case LIBBSC_BLOCKSORTER_ST5 : test_mode = LIBBSC_BLOCKSORTER_ST5; break; + case LIBBSC_BLOCKSORTER_ST6 : test_mode = LIBBSC_BLOCKSORTER_ST6; break; + case LIBBSC_BLOCKSORTER_ST7 : test_mode = LIBBSC_BLOCKSORTER_ST7; break; + case LIBBSC_BLOCKSORTER_ST8 : test_mode = LIBBSC_BLOCKSORTER_ST8; break; + +#endif + + default : if (blockSorter > 0) return LIBBSC_DATA_CORRUPT; + } + + switch (coder) + { + case LIBBSC_CODER_QLFC_STATIC : test_mode += (LIBBSC_CODER_QLFC_STATIC << 5); break; + case LIBBSC_CODER_QLFC_ADAPTIVE : test_mode += (LIBBSC_CODER_QLFC_ADAPTIVE << 5); break; + case LIBBSC_CODER_QLFC_FAST : test_mode += (LIBBSC_CODER_QLFC_FAST << 5); break; + + default : if (coder > 0) return LIBBSC_DATA_CORRUPT; + } + + if (lzpMinLen != 0 || lzpHashSize != 0) + { + if (lzpMinLen < 4 || lzpMinLen > 255) return LIBBSC_DATA_CORRUPT; + if (lzpHashSize < 10 || lzpHashSize > 28) return LIBBSC_DATA_CORRUPT; + test_mode += (lzpMinLen << 8); + test_mode += (lzpHashSize << 16); + } + + if (test_mode != mode) + { + return LIBBSC_DATA_CORRUPT; + } + + if (blockSize < LIBBSC_HEADER_SIZE || blockSize > LIBBSC_HEADER_SIZE + dataSize) + { + return LIBBSC_DATA_CORRUPT; + } + + if (index < 0 || index > dataSize) + { + return LIBBSC_DATA_CORRUPT; + } + + if (pBlockSize != NULL) *pBlockSize = blockSize; + if (pDataSize != NULL) *pDataSize = dataSize; + + return LIBBSC_NO_ERROR; +} + +int bsc_decompress_inplace(unsigned char * data, int inputSize, int outputSize, int features) +{ + int indexes[256]; + unsigned char num_indexes; + + int blockSize = 0, dataSize = 0; + + int info = bsc_block_info(data, inputSize, &blockSize, &dataSize, features); + if (info != LIBBSC_NO_ERROR) + { + return info; + } + + if (inputSize < blockSize || outputSize < dataSize) + { + return LIBBSC_UNEXPECTED_EOB; + } + + if (*(unsigned int *)(data + 20) != bsc_adler32(data + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE, features)) + { + return LIBBSC_DATA_CORRUPT; + } + + int mode = *(int *)(data + 8); + if (mode == 0) + { + memmove(data, data + LIBBSC_HEADER_SIZE, dataSize); + return LIBBSC_NO_ERROR; + } + + int index = *(int *)(data + 12); + unsigned int adler32_data = *(int *)(data + 16); + + num_indexes = data[blockSize - 1]; + if (num_indexes > 0) + { + memcpy(indexes, data + blockSize - 1 - 4 * num_indexes, 4 * num_indexes); + } + + int lzpHashSize = (mode >> 16) & 0xff; + int lzpMinLen = (mode >> 8) & 0xff; + int coder = (mode >> 5) & 0x7; + int blockSorter = (mode >> 0) & 0x1f; + + int lzSize = LIBBSC_NO_ERROR; + { + unsigned char * buffer = (unsigned char *)bsc_malloc(blockSize); + if (buffer == NULL) return LIBBSC_NOT_ENOUGH_MEMORY; + + memcpy(buffer, data, blockSize); + + lzSize = bsc_coder_decompress(buffer + LIBBSC_HEADER_SIZE, data, coder, features); + + bsc_free(buffer); + } + if (lzSize < LIBBSC_NO_ERROR) + { + return lzSize; + } + + int result; + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : result = bsc_bwt_decode(data, lzSize, index, num_indexes, indexes, features); break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : result = bsc_st_decode(data, lzSize, 3, index, features); break; + case LIBBSC_BLOCKSORTER_ST4 : result = bsc_st_decode(data, lzSize, 4, index, features); break; + case LIBBSC_BLOCKSORTER_ST5 : result = bsc_st_decode(data, lzSize, 5, index, features); break; + case LIBBSC_BLOCKSORTER_ST6 : result = bsc_st_decode(data, lzSize, 6, index, features); break; + case LIBBSC_BLOCKSORTER_ST7 : result = bsc_st_decode(data, lzSize, 7, index, features); break; + case LIBBSC_BLOCKSORTER_ST8 : result = bsc_st_decode(data, lzSize, 8, index, features); break; + +#endif + + default : return LIBBSC_DATA_CORRUPT; + } + if (result < LIBBSC_NO_ERROR) + { + return result; + } + + if (mode != (mode & 0xff)) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize)) + { + memcpy(buffer, data, lzSize); + result = bsc_lzp_decompress(buffer, data, lzSize, lzpHashSize, lzpMinLen, features); + bsc_free(buffer); + if (result < LIBBSC_NO_ERROR) + { + return result; + } + return result == dataSize ? (adler32_data == bsc_adler32(data, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT; + } + return LIBBSC_NOT_ENOUGH_MEMORY; + } + + return lzSize == dataSize ? (adler32_data == bsc_adler32(data, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT; +} + +int bsc_decompress(const unsigned char * input, int inputSize, unsigned char * output, int outputSize, int features) +{ + int indexes[256]; + unsigned char num_indexes; + + if (input == output) + { + return bsc_decompress_inplace(output, inputSize, outputSize, features); + } + + int blockSize = 0, dataSize = 0; + + int info = bsc_block_info(input, inputSize, &blockSize, &dataSize, features); + if (info != LIBBSC_NO_ERROR) + { + return info; + } + + if (inputSize < blockSize || outputSize < dataSize) + { + return LIBBSC_UNEXPECTED_EOB; + } + + if (*(unsigned int *)(input + 20) != bsc_adler32(input + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE, features)) + { + return LIBBSC_DATA_CORRUPT; + } + + int mode = *(int *)(input + 8); + if (mode == 0) + { + memcpy(output, input + LIBBSC_HEADER_SIZE, dataSize); + return LIBBSC_NO_ERROR; + } + + int index = *(int *)(input + 12); + unsigned int adler32_data = *(int *)(input + 16); + + num_indexes = input[blockSize - 1]; + if (num_indexes > 0) + { + memcpy(indexes, input + blockSize - 1 - 4 * num_indexes, 4 * num_indexes); + } + + int lzpHashSize = (mode >> 16) & 0xff; + int lzpMinLen = (mode >> 8) & 0xff; + int coder = (mode >> 5) & 0x7; + int blockSorter = (mode >> 0) & 0x1f; + + int lzSize = bsc_coder_decompress(input + LIBBSC_HEADER_SIZE, output, coder, features); + if (lzSize < LIBBSC_NO_ERROR) + { + return lzSize; + } + + int result; + switch (blockSorter) + { + case LIBBSC_BLOCKSORTER_BWT : result = bsc_bwt_decode(output, lzSize, index, num_indexes, indexes, features); break; + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + case LIBBSC_BLOCKSORTER_ST3 : result = bsc_st_decode(output, lzSize, 3, index, features); break; + case LIBBSC_BLOCKSORTER_ST4 : result = bsc_st_decode(output, lzSize, 4, index, features); break; + case LIBBSC_BLOCKSORTER_ST5 : result = bsc_st_decode(output, lzSize, 5, index, features); break; + case LIBBSC_BLOCKSORTER_ST6 : result = bsc_st_decode(output, lzSize, 6, index, features); break; + case LIBBSC_BLOCKSORTER_ST7 : result = bsc_st_decode(output, lzSize, 7, index, features); break; + case LIBBSC_BLOCKSORTER_ST8 : result = bsc_st_decode(output, lzSize, 8, index, features); break; + +#endif + + default : return LIBBSC_DATA_CORRUPT; + } + if (result < LIBBSC_NO_ERROR) + { + return result; + } + + if (mode != (mode & 0xff)) + { + if (unsigned char * buffer = (unsigned char *)bsc_malloc(lzSize)) + { + memcpy(buffer, output, lzSize); + result = bsc_lzp_decompress(buffer, output, lzSize, lzpHashSize, lzpMinLen, features); + bsc_free(buffer); + if (result < LIBBSC_NO_ERROR) + { + return result; + } + return result == dataSize ? (adler32_data == bsc_adler32(output, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT; + } + return LIBBSC_NOT_ENOUGH_MEMORY; + } + + return lzSize == dataSize ? (adler32_data == bsc_adler32(output, dataSize, features) ? LIBBSC_NO_ERROR : LIBBSC_DATA_CORRUPT) : LIBBSC_DATA_CORRUPT; +} + +/*-------------------------------------------------*/ +/* End libbsc.cpp */ +/*-------------------------------------------------*/ diff --git a/libbsc/libbsc/lzp/lzp.cpp b/libbsc/libbsc/lzp/lzp.cpp new file mode 100644 index 00000000..07cc7e9d --- /dev/null +++ b/libbsc/libbsc/lzp/lzp.cpp @@ -0,0 +1,884 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Lempel Ziv Prediction */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include +#include + +#include "lzp.h" + +#include "../platform/platform.h" +#include "../libbsc.h" + +#define LIBBSC_LZP_MATCH_FLAG 0xf2 + +static INLINE int bsc_lzp_num_blocks(int n) +{ + if (n < 256 * 1024) return 1; + if (n < 4 * 1024 * 1024) return 2; + if (n < 16 * 1024 * 1024) return 4; + + return 8; +} + +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) && (defined(__x86_64__) || defined(__aarch64__)) + +template int bsc_lzp_encode_small(const unsigned char * RESTRICT input, const unsigned char * inputEnd, unsigned char * RESTRICT output, unsigned char * outputEnd, int * RESTRICT lookup, int mask) +{ + const unsigned char * inputStart = input; + const unsigned char * inputMinLenEnd = inputEnd - sizeof(T) - 32; + + const unsigned char * outputStart = output; + const unsigned char * outputEOB = outputEnd - 8; + + for (int i = 0; i < 4; ++i) { *output++ = *input++; } + + { + while ((input < inputMinLenEnd) && (output < outputEOB)) + { + unsigned long long next8 = *(unsigned long long *)(input - 4); *(unsigned int *)(output) = (unsigned int)(next8 >> 32); next8 = bsc_byteswap_uint64(next8); + + int value; + { + const unsigned int index0 = (((next8 >> (4 * 8)) >> 15) ^ (next8 >> (4 * 8)) ^ ((next8 >> (4 * 8)) >> 3)) & mask; value = lookup[index0]; lookup[index0] = (int)(input - inputStart + 0); + if (value > 0 && (*(T *)(input + 0) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND1; + if (value > 0 && ((unsigned char)(next8 >> 3 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND1; + + const unsigned int index1 = (((next8 >> (3 * 8)) >> 15) ^ (next8 >> (3 * 8)) ^ ((next8 >> (3 * 8)) >> 3)) & mask; value = lookup[index1]; lookup[index1] = (int)(input - inputStart + 1); + if (value > 0 && (*(T *)(input + 1) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND2; + if (value > 0 && ((unsigned char)(next8 >> 2 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND2; + + const unsigned int index2 = (((next8 >> (2 * 8)) >> 15) ^ (next8 >> (2 * 8)) ^ ((next8 >> (2 * 8)) >> 3)) & mask; value = lookup[index2]; lookup[index2] = (int)(input - inputStart + 2); + if (value > 0 && (*(T *)(input + 2) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND3; + if (value > 0 && ((unsigned char)(next8 >> 1 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND3; + + const unsigned int index3 = (((next8 >> (1 * 8)) >> 15) ^ (next8 >> (1 * 8)) ^ ((next8 >> (1 * 8)) >> 3)) & mask; value = lookup[index3]; lookup[index3] = (int)(input - inputStart + 3); + if (value > 0 && (*(T *)(input + 3) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND4; + if (value > 0 && ((unsigned char)(next8 >> 0 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND4; + + input += 4; output += 4; + + continue; + } + +LIBBSC_LZP_GOOD_MATCH_FOUND4: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND3: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND2: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND1: + + { + const unsigned char * RESTRICT reference = inputStart + value; + + long long len = sizeof(T); + + for (; input + len < inputMinLenEnd; len += sizeof(unsigned long long)) + { + unsigned long long m; + if ((m = (*(unsigned long long *)(input + len)) ^ *(unsigned long long *)(reference + len)) != 0) + { + len += bsc_bit_scan_forward64(m) / 8; break; + } + } + + input += len; len -= sizeof(T); + + *output++ = LIBBSC_LZP_MATCH_FLAG; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; } *output++ = (unsigned char)(len); + + continue; + } + +LIBBSC_LZP_BAD_MATCH_FOUND4: + input += 4; output += 4; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND3: + input += 3; output += 3; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND2: + input += 2; output += 2; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND1: + input += 1; output += 1; *output++ = 255; continue; + } + } + + { + unsigned int context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + while ((input < inputEnd) && (output < outputEOB)) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(input - inputStart); + + unsigned char next = *output++ = *input++; context = (context << 8) | next; + if (next == LIBBSC_LZP_MATCH_FLAG && value > 0) *output++ = 255; + } + } + + return (output >= outputEOB) ? LIBBSC_NOT_COMPRESSIBLE : (int)(output - outputStart); +} + +template int bsc_lzp_encode_small2x(const unsigned char * RESTRICT input, const unsigned char * inputEnd, unsigned char * RESTRICT output, unsigned char * outputEnd, int * RESTRICT lookup, int mask) +{ + const unsigned char * inputStart = input; + const unsigned char * inputMinLenEnd = inputEnd - sizeof(T) - sizeof(T) - 32; + + const unsigned char * outputStart = output; + const unsigned char * outputEOB = outputEnd - 8; + + for (int i = 0; i < 4; ++i) { *output++ = *input++; } + + { + while ((input < inputMinLenEnd) && (output < outputEOB)) + { + unsigned long long next8 = *(unsigned long long *)(input - 4); *(unsigned int *)(output) = (unsigned int)(next8 >> 32); next8 = bsc_byteswap_uint64(next8); + + int value; + { + const unsigned int index0 = (((next8 >> (4 * 8)) >> 15) ^ (next8 >> (4 * 8)) ^ ((next8 >> (4 * 8)) >> 3)) & mask; value = lookup[index0]; lookup[index0] = (int)(input - inputStart + 0); + if (value > 0 && (*(T *)(input + sizeof(T) + 0) == *(T *)(inputStart + value + sizeof(T))) && (*(T *)(input + 0) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND1; + if (value > 0 && ((unsigned char)(next8 >> 3 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND1; + + const unsigned int index1 = (((next8 >> (3 * 8)) >> 15) ^ (next8 >> (3 * 8)) ^ ((next8 >> (3 * 8)) >> 3)) & mask; value = lookup[index1]; lookup[index1] = (int)(input - inputStart + 1); + if (value > 0 && (*(T *)(input + sizeof(T) + 1) == *(T *)(inputStart + value + sizeof(T))) && (*(T *)(input + 1) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND2; + if (value > 0 && ((unsigned char)(next8 >> 2 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND2; + + const unsigned int index2 = (((next8 >> (2 * 8)) >> 15) ^ (next8 >> (2 * 8)) ^ ((next8 >> (2 * 8)) >> 3)) & mask; value = lookup[index2]; lookup[index2] = (int)(input - inputStart + 2); + if (value > 0 && (*(T *)(input + sizeof(T) + 2) == *(T *)(inputStart + value + sizeof(T))) && (*(T *)(input + 2) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND3; + if (value > 0 && ((unsigned char)(next8 >> 1 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND3; + + const unsigned int index3 = (((next8 >> (1 * 8)) >> 15) ^ (next8 >> (1 * 8)) ^ ((next8 >> (1 * 8)) >> 3)) & mask; value = lookup[index3]; lookup[index3] = (int)(input - inputStart + 3); + if (value > 0 && (*(T *)(input + sizeof(T) + 3) == *(T *)(inputStart + value + sizeof(T))) && (*(T *)(input + 3) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND4; + if (value > 0 && ((unsigned char)(next8 >> 0 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND4; + + input += 4; output += 4; + + continue; + } + +LIBBSC_LZP_GOOD_MATCH_FOUND4: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND3: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND2: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND1: + + { + const unsigned char * RESTRICT reference = inputStart + value; + + long long len = sizeof(T) + sizeof(T); + + for (; input + len < inputMinLenEnd; len += sizeof(unsigned long long)) + { + unsigned long long m; + if ((m = (*(unsigned long long *)(input + len)) ^ *(unsigned long long *)(reference + len)) != 0) + { + len += bsc_bit_scan_forward64(m) / 8; break; + } + } + + input += len; len -= sizeof(T) + sizeof(T); + + *output++ = LIBBSC_LZP_MATCH_FLAG; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; } *output++ = (unsigned char)(len); + + continue; + } + +LIBBSC_LZP_BAD_MATCH_FOUND4: + input += 4; output += 4; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND3: + input += 3; output += 3; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND2: + input += 2; output += 2; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND1: + input += 1; output += 1; *output++ = 255; continue; + } + } + + { + unsigned int context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + while ((input < inputEnd) && (output < outputEOB)) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(input - inputStart); + + unsigned char next = *output++ = *input++; context = (context << 8) | next; + if (next == LIBBSC_LZP_MATCH_FLAG && value > 0) *output++ = 255; + } + } + + return (output >= outputEOB) ? LIBBSC_NOT_COMPRESSIBLE : (int)(output - outputStart); +} + +template int bsc_lzp_encode_medium(const unsigned char * RESTRICT input, const unsigned char * inputEnd, unsigned char * RESTRICT output, unsigned char * outputEnd, int * RESTRICT lookup, int mask, int minLen) +{ + const unsigned char * inputStart = input; + const unsigned char * inputMinLenEnd = inputEnd - sizeof(T) - sizeof(T) - 32; + + const unsigned char * outputStart = output; + const unsigned char * outputEOB = outputEnd - 8; + + for (int i = 0; i < 4; ++i) { *output++ = *input++; } + + { + while ((input < inputMinLenEnd) && (output < outputEOB)) + { + unsigned long long next8 = *(unsigned long long *)(input - 4); *(unsigned int *)(output) = (unsigned int)(next8 >> 32); next8 = bsc_byteswap_uint64(next8); + + int value; + { + const unsigned int index0 = (((next8 >> (4 * 8)) >> 15) ^ (next8 >> (4 * 8)) ^ ((next8 >> (4 * 8)) >> 3)) & mask; value = lookup[index0]; lookup[index0] = (int)(input - inputStart + 0); + if (value > 0 && (*(T *)(input + minLen - sizeof(T) + 0) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 0) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND1; + if (value > 0 && ((unsigned char)(next8 >> 3 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND1; + + const unsigned int index1 = (((next8 >> (3 * 8)) >> 15) ^ (next8 >> (3 * 8)) ^ ((next8 >> (3 * 8)) >> 3)) & mask; value = lookup[index1]; lookup[index1] = (int)(input - inputStart + 1); + if (value > 0 && (*(T *)(input + minLen - sizeof(T) + 1) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 1) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND2; + if (value > 0 && ((unsigned char)(next8 >> 2 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND2; + + const unsigned int index2 = (((next8 >> (2 * 8)) >> 15) ^ (next8 >> (2 * 8)) ^ ((next8 >> (2 * 8)) >> 3)) & mask; value = lookup[index2]; lookup[index2] = (int)(input - inputStart + 2); + if (value > 0 && (*(T *)(input + minLen - sizeof(T) + 2) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 2) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND3; + if (value > 0 && ((unsigned char)(next8 >> 1 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND3; + + const unsigned int index3 = (((next8 >> (1 * 8)) >> 15) ^ (next8 >> (1 * 8)) ^ ((next8 >> (1 * 8)) >> 3)) & mask; value = lookup[index3]; lookup[index3] = (int)(input - inputStart + 3); + if (value > 0 && (*(T *)(input + minLen - sizeof(T) + 3) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 3) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND4; + if (value > 0 && ((unsigned char)(next8 >> 0 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND4; + + input += 4; output += 4; + + continue; + } + +LIBBSC_LZP_GOOD_MATCH_FOUND4: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND3: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND2: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND1: + + { + const unsigned char * RESTRICT reference = inputStart + value; + + long long len = minLen; + + for (; input + len < inputMinLenEnd; len += sizeof(unsigned long long)) + { + unsigned long long m; + if ((m = (*(unsigned long long *)(input + len)) ^ *(unsigned long long *)(reference + len)) != 0) + { + len += bsc_bit_scan_forward64(m) / 8; break; + } + } + + input += len; len -= minLen; + + *output++ = LIBBSC_LZP_MATCH_FLAG; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; } *output++ = (unsigned char)(len); + + continue; + } + +LIBBSC_LZP_BAD_MATCH_FOUND4: + input += 4; output += 4; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND3: + input += 3; output += 3; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND2: + input += 2; output += 2; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND1: + input += 1; output += 1; *output++ = 255; continue; + } + } + + { + unsigned int context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + while ((input < inputEnd) && (output < outputEOB)) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(input - inputStart); + + unsigned char next = *output++ = *input++; context = (context << 8) | next; + if (next == LIBBSC_LZP_MATCH_FLAG && value > 0) *output++ = 255; + } + } + + return (output >= outputEOB) ? LIBBSC_NOT_COMPRESSIBLE : (int)(output - outputStart); +} + +template int bsc_lzp_encode_large(const unsigned char * RESTRICT input, const unsigned char * inputEnd, unsigned char * RESTRICT output, unsigned char * outputEnd, int * RESTRICT lookup, int mask, int minLen) +{ + const unsigned char * inputStart = input; + const unsigned char * outputStart = output; + const unsigned char * outputEOB = outputEnd - 8; + + const unsigned char * heuristic = input; + const unsigned char * inputMinLenEnd = inputEnd - minLen - 32; + + for (int i = 0; i < 4; ++i) { *output++ = *input++; } + + { + while ((input < inputMinLenEnd) && (output < outputEOB)) + { + unsigned long long next8 = *(unsigned long long *)(input - 4); *(unsigned int *)(output) = (unsigned int)(next8 >> 32); next8 = bsc_byteswap_uint64(next8); + + int value; + { + const unsigned int index0 = (((next8 >> (4 * 8)) >> 15) ^ (next8 >> (4 * 8)) ^ ((next8 >> (4 * 8)) >> 3)) & mask; value = lookup[index0]; lookup[index0] = (int)(input - inputStart + 0); + if (value > 0 && input > heuristic && (*(T *)(input + minLen - sizeof(T) + 0) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 0) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND1; + if (value > 0 && ((unsigned char)(next8 >> 3 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND1; + + const unsigned int index1 = (((next8 >> (3 * 8)) >> 15) ^ (next8 >> (3 * 8)) ^ ((next8 >> (3 * 8)) >> 3)) & mask; value = lookup[index1]; lookup[index1] = (int)(input - inputStart + 1); + if (value > 0 && input > heuristic && (*(T *)(input + minLen - sizeof(T) + 1) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 1) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND2; + if (value > 0 && ((unsigned char)(next8 >> 2 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND2; + + const unsigned int index2 = (((next8 >> (2 * 8)) >> 15) ^ (next8 >> (2 * 8)) ^ ((next8 >> (2 * 8)) >> 3)) & mask; value = lookup[index2]; lookup[index2] = (int)(input - inputStart + 2); + if (value > 0 && input > heuristic && (*(T *)(input + minLen - sizeof(T) + 2) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 2) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND3; + if (value > 0 && ((unsigned char)(next8 >> 1 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND3; + + const unsigned int index3 = (((next8 >> (1 * 8)) >> 15) ^ (next8 >> (1 * 8)) ^ ((next8 >> (1 * 8)) >> 3)) & mask; value = lookup[index3]; lookup[index3] = (int)(input - inputStart + 3); + if (value > 0 && input > heuristic && (*(T *)(input + minLen - sizeof(T) + 3) == *(T *)(inputStart + value + minLen - sizeof(T))) && (*(T *)(input + 3) == *(T *)(inputStart + value))) goto LIBBSC_LZP_GOOD_MATCH_FOUND4; + if (value > 0 && ((unsigned char)(next8 >> 0 * 8) == LIBBSC_LZP_MATCH_FLAG)) goto LIBBSC_LZP_BAD_MATCH_FOUND4; + + input += 4; output += 4; + + continue; + } + +LIBBSC_LZP_GOOD_MATCH_FOUND4: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND3: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND2: + input += 1; output += 1; +LIBBSC_LZP_GOOD_MATCH_FOUND1: + + { + const unsigned char * RESTRICT reference = inputStart + value; + + long long len = sizeof(T); + + for (; input + len < inputMinLenEnd; len += sizeof(unsigned long long)) + { + unsigned long long m; + if ((m = (*(unsigned long long *)(input + len)) ^ *(unsigned long long *)(reference + len)) != 0) + { + len += bsc_bit_scan_forward64(m) / 8; break; + } + } + + if (len < minLen) { heuristic = input + len; goto LIBBSC_LZP_MATCH_NOT_FOUND; } + + input += len; len -= minLen; + + *output++ = LIBBSC_LZP_MATCH_FLAG; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; } *output++ = (unsigned char)(len); + + continue; + } + +LIBBSC_LZP_MATCH_NOT_FOUND: + if ((*output++ = *input++) == LIBBSC_LZP_MATCH_FLAG) { *output++ = 255; } + + continue; + +LIBBSC_LZP_BAD_MATCH_FOUND4: + input += 4; output += 4; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND3: + input += 3; output += 3; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND2: + input += 2; output += 2; *output++ = 255; continue; +LIBBSC_LZP_BAD_MATCH_FOUND1: + input += 1; output += 1; *output++ = 255; continue; + } + } + + { + unsigned int context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + while ((input < inputEnd) && (output < outputEOB)) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(input - inputStart); + + unsigned char next = *output++ = *input++; context = (context << 8) | next; + if (next == LIBBSC_LZP_MATCH_FLAG && value > 0) *output++ = 255; + } + } + + return (output >= outputEOB) ? LIBBSC_NOT_COMPRESSIBLE : (int)(output - outputStart); +} + +#endif + +int bsc_lzp_encode_generic(const unsigned char * RESTRICT input, const unsigned char * inputEnd, unsigned char * RESTRICT output, unsigned char * outputEnd, int * RESTRICT lookup, int mask, int minLen) +{ + const unsigned char * inputStart = input; + const unsigned char * outputStart = output; + const unsigned char * outputEOB = outputEnd - 8; + + const unsigned char * heuristic = input; + const unsigned char * inputMinLenEnd = inputEnd - minLen - 32; + + for (int i = 0; i < 4; ++i) { *output++ = *input++; } + + { + unsigned int context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + while ((input < inputMinLenEnd) && (output < outputEOB)) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(input - inputStart); + if (value > 0) + { + const unsigned char * RESTRICT reference = inputStart + value; +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) + if ((*(unsigned int *)(input + minLen - 4) == *(unsigned int *)(reference + minLen - 4)) && (*(unsigned int *)(input) == *(unsigned int *)(reference))) +#else + if ((memcmp(input + minLen - 4, reference + minLen - 4, sizeof(unsigned int)) == 0) && (memcmp(input, reference, sizeof(unsigned int)) == 0)) +#endif + { + if ((heuristic > input) && (*(unsigned int *)heuristic != *(unsigned int *)(reference + (heuristic - input)))) + { + goto LIBBSC_LZP_MATCH_NOT_FOUND; + } + + int len = 4; + for (; input + len < inputMinLenEnd; len += sizeof(unsigned int)) + { + if (*(unsigned int *)(input + len) != *(unsigned int *)(reference + len)) break; + } + + if (len < minLen) + { + if (heuristic < input + len) heuristic = input + len; + goto LIBBSC_LZP_MATCH_NOT_FOUND; + } + +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) + len += sizeof(unsigned short) * (*(unsigned short *)(input + len) == *(unsigned short *)(reference + len)); + len += sizeof(unsigned char ) * (*(unsigned char *)(input + len) == *(unsigned char *)(reference + len)); +#else + len += input[len] == reference[len]; + len += input[len] == reference[len]; + len += input[len] == reference[len]; +#endif + + input += len; context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + *output++ = LIBBSC_LZP_MATCH_FLAG; + + len -= minLen; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; } + + *output++ = (unsigned char)(len); + } + else + { + +LIBBSC_LZP_MATCH_NOT_FOUND: + unsigned char next = *output++ = *input++; context = (context << 8) | next; + if (next == LIBBSC_LZP_MATCH_FLAG) *output++ = 255; + } + } + else + { + context = (context << 8) | (*output++ = *input++); + } + } + } + + { + unsigned int context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24); + + while ((input < inputEnd) && (output < outputEOB)) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(input - inputStart); + + unsigned char next = *output++ = *input++; context = (context << 8) | next; + if (next == LIBBSC_LZP_MATCH_FLAG && value > 0) *output++ = 255; + } + } + + return (output >= outputEOB) ? LIBBSC_NOT_COMPRESSIBLE : (int)(output - outputStart); +} + +int bsc_lzp_encode_block(const unsigned char * input, const unsigned char * inputEnd, unsigned char * output, unsigned char * outputEnd, int hashSize, int minLen) +{ + if (inputEnd - input - minLen < 32) + { + return LIBBSC_NOT_COMPRESSIBLE; + } + + int result = LIBBSC_NOT_ENOUGH_MEMORY; + if (int * lookup = (int *)bsc_zero_malloc((int)(1 << hashSize) * sizeof(int))) + { +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) && (defined(__x86_64__) || defined(__aarch64__)) + result = (minLen == 1 * (int)sizeof(unsigned int ) && result == LIBBSC_NOT_ENOUGH_MEMORY) ? bsc_lzp_encode_small (input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1) : result; + result = (minLen == 1 * (int)sizeof(unsigned long long) && result == LIBBSC_NOT_ENOUGH_MEMORY) ? bsc_lzp_encode_small (input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1) : result; + result = (minLen == 2 * (int)sizeof(unsigned long long) && result == LIBBSC_NOT_ENOUGH_MEMORY) ? bsc_lzp_encode_small2x(input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1) : result; + result = (minLen <= 2 * (int)sizeof(unsigned int ) && result == LIBBSC_NOT_ENOUGH_MEMORY) ? bsc_lzp_encode_medium (input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1, minLen) : result; + result = (minLen <= 2 * (int)sizeof(unsigned long long) && result == LIBBSC_NOT_ENOUGH_MEMORY) ? bsc_lzp_encode_medium (input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1, minLen) : result; + + result = result == LIBBSC_NOT_ENOUGH_MEMORY ? bsc_lzp_encode_large(input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1, minLen) : result; +#endif + + result = result == LIBBSC_NOT_ENOUGH_MEMORY ? bsc_lzp_encode_generic(input, inputEnd, output, outputEnd, lookup, (int)(1 << hashSize) - 1, minLen) : result; + + bsc_free(lookup); + } + + return result; +} + +int bsc_lzp_decode_block(const unsigned char * RESTRICT input, const unsigned char * inputEnd, unsigned char * RESTRICT output, int hashSize, int minLen) +{ + if (inputEnd - input < 4) + { + return LIBBSC_UNEXPECTED_EOB; + } + + if (int * RESTRICT lookup = (int *)bsc_zero_malloc((int)(1 << hashSize) * sizeof(int))) + { + unsigned int mask = (int)(1 << hashSize) - 1; + const unsigned char * outputStart = output; + + for (int i = 0; i < 4; ++i) { *output++ = *input++; } + +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) && (defined(__x86_64__) || defined(__aarch64__)) + if (hashSize <= 17) + { + unsigned int prev4 = *(unsigned int *)(output - 4); + + while (input < inputEnd - 8) + { + unsigned int next4 = *(unsigned int *)(output) = *(unsigned int *)(input); + unsigned long long next8 = bsc_byteswap_uint64(((unsigned long long)next4 << 32) | prev4); + + int value; + { + const unsigned int index0 = (((next8 >> (4 * 8)) >> 15) ^ (next8 >> (4 * 8)) ^ ((next8 >> (4 * 8)) >> 3)) & mask; + value = lookup[index0]; lookup[index0] = (int)(output - outputStart + 0); if (((unsigned char)(next8 >> 3 * 8) == LIBBSC_LZP_MATCH_FLAG) && (value > 0)) goto LIBBSC_LZP_MATCH_FOUND1; + + const unsigned int index1 = (((next8 >> (3 * 8)) >> 15) ^ (next8 >> (3 * 8)) ^ ((next8 >> (3 * 8)) >> 3)) & mask; + value = lookup[index1]; lookup[index1] = (int)(output - outputStart + 1); if (((unsigned char)(next8 >> 2 * 8) == LIBBSC_LZP_MATCH_FLAG) && (value > 0)) goto LIBBSC_LZP_MATCH_FOUND2; + + const unsigned int index2 = (((next8 >> (2 * 8)) >> 15) ^ (next8 >> (2 * 8)) ^ ((next8 >> (2 * 8)) >> 3)) & mask; + value = lookup[index2]; lookup[index2] = (int)(output - outputStart + 2); if (((unsigned char)(next8 >> 1 * 8) == LIBBSC_LZP_MATCH_FLAG) && (value > 0)) goto LIBBSC_LZP_MATCH_FOUND3; + + const unsigned int index3 = (((next8 >> (1 * 8)) >> 15) ^ (next8 >> (1 * 8)) ^ ((next8 >> (1 * 8)) >> 3)) & mask; + value = lookup[index3]; lookup[index3] = (int)(output - outputStart + 3); if (((unsigned char)(next8 >> 0 * 8) == LIBBSC_LZP_MATCH_FLAG) && (value > 0)) goto LIBBSC_LZP_MATCH_FOUND4; + + prev4 = next4; input += 4; output += 4; + + continue; + } + +LIBBSC_LZP_MATCH_FOUND4: + input += 1; output += 1; +LIBBSC_LZP_MATCH_FOUND3: + input += 1; output += 1; +LIBBSC_LZP_MATCH_FOUND2: + input += 1; output += 1; +LIBBSC_LZP_MATCH_FOUND1: + input += 1; + + if (*input != 255) + { + int len = minLen; while (true) { len += *input; if (*input++ != 254) break; } + + const unsigned char * reference = outputStart + value; + unsigned char * outputEnd = output + len; + + while (output < outputEnd) { *output++ = *reference++; } + + prev4 = *(unsigned int *)(output - 4); + } + else + { + input++; output++; prev4 = *(unsigned int *)(output - 4); + } + } + } +#endif + + { + unsigned int context = output[-1] | (output[-2] << 8) | (output[-3] << 16) | (output[-4] << 24); + + while (input < inputEnd) + { + unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask; + int value = lookup[index]; lookup[index] = (int)(output - outputStart); + if (*input == LIBBSC_LZP_MATCH_FLAG && value > 0) + { + input++; + if (*input != 255) + { + int len = minLen; while (true) { len += *input; if (*input++ != 254) break; } + + const unsigned char * reference = outputStart + value; + unsigned char * outputEnd = output + len; + + while (output < outputEnd) *output++ = *reference++; + + context = output[-1] | (output[-2] << 8) | (output[-3] << 16) | (output[-4] << 24); + } + else + { + input++; context = (context << 8) | (*output++ = LIBBSC_LZP_MATCH_FLAG); + } + } + else + { + context = (context << 8) | (*output++ = *input++); + } + } + } + + bsc_free(lookup); + + return (int)(output - outputStart); + } + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_lzp_compress_serial(const unsigned char * input, unsigned char * output, int n, int hashSize, int minLen) +{ + if (bsc_lzp_num_blocks(n) == 1) + { + int result = bsc_lzp_encode_block(input, input + n, output + 1, output + n - 1, hashSize, minLen); + if (result >= LIBBSC_NO_ERROR) result = (output[0] = 1, result + 1); + + return result; + } + + int nBlocks = bsc_lzp_num_blocks(n); + int chunkSize = n / nBlocks; + int outputPtr = 1 + 8 * nBlocks; + + output[0] = nBlocks; + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int inputStart = blockId * chunkSize; + int inputSize = blockId != nBlocks - 1 ? chunkSize : n - inputStart; + int outputSize = inputSize; if (outputSize > n - outputPtr) outputSize = n - outputPtr; + + int result = bsc_lzp_encode_block(input + inputStart, input + inputStart + inputSize, output + outputPtr, output + outputPtr + outputSize, hashSize, minLen); + if (result < LIBBSC_NO_ERROR) + { + if (outputPtr + inputSize >= n) return LIBBSC_NOT_COMPRESSIBLE; + result = inputSize; memcpy(output + outputPtr, input + inputStart, inputSize); + } +#if defined(LIBBSC_ALLOW_UNALIGNED_ACCESS) + *(int *)(output + 1 + 8 * blockId + 0) = inputSize; + *(int *)(output + 1 + 8 * blockId + 4) = result; +#else + memcpy(output + 1 + 8 * blockId + 0, &inputSize, sizeof(int)); + memcpy(output + 1 + 8 * blockId + 4, &result, sizeof(int)); +#endif + + outputPtr += result; + } + + return outputPtr; +} + +#ifdef LIBBSC_OPENMP + +int bsc_lzp_compress_parallel(const unsigned char * input, unsigned char * output, int n, int hashSize, int minLen) +{ + if (unsigned char * buffer = (unsigned char *)bsc_malloc(n * sizeof(unsigned char))) + { + int compressionResult[ALPHABET_SIZE]; + + int nBlocks = bsc_lzp_num_blocks(n); + int result = LIBBSC_NO_ERROR; + int chunkSize = n / nBlocks; + + int numThreads = omp_get_max_threads(); + if (numThreads > nBlocks) numThreads = nBlocks; + + output[0] = nBlocks; + #pragma omp parallel num_threads(numThreads) if(numThreads > 1) + { + if (omp_get_num_threads() == 1) + { + result = bsc_lzp_compress_serial(input, output, n, hashSize, minLen); + } + else + { + #pragma omp for schedule(dynamic) + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int blockStart = blockId * chunkSize; + int blockSize = blockId != nBlocks - 1 ? chunkSize : n - blockStart; + + compressionResult[blockId] = bsc_lzp_encode_block(input + blockStart, input + blockStart + blockSize, buffer + blockStart, buffer + blockStart + blockSize, hashSize, minLen); + if (compressionResult[blockId] < LIBBSC_NO_ERROR) compressionResult[blockId] = blockSize; + + *(int *)(output + 1 + 8 * blockId + 0) = blockSize; + *(int *)(output + 1 + 8 * blockId + 4) = compressionResult[blockId]; + } + + #pragma omp single + { + result = 1 + 8 * nBlocks; + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + result += compressionResult[blockId]; + } + + if (result >= n) result = LIBBSC_NOT_COMPRESSIBLE; + } + + if (result >= LIBBSC_NO_ERROR) + { + #pragma omp for schedule(dynamic) + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int blockStart = blockId * chunkSize; + int blockSize = blockId != nBlocks - 1 ? chunkSize : n - blockStart; + + int outputPtr = 1 + 8 * nBlocks; + for (int p = 0; p < blockId; ++p) outputPtr += compressionResult[p]; + + if (compressionResult[blockId] != blockSize) + { + memcpy(output + outputPtr, buffer + blockStart, compressionResult[blockId]); + } + else + { + memcpy(output + outputPtr, input + blockStart, compressionResult[blockId]); + } + } + } + } + } + + bsc_free(buffer); + + return result; + } + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +#endif + +int bsc_lzp_compress(const unsigned char * input, unsigned char * output, int n, int hashSize, int minLen, int features) +{ + +#ifdef LIBBSC_OPENMP + + if ((bsc_lzp_num_blocks(n) != 1) && (features & LIBBSC_FEATURE_MULTITHREADING)) + { + return bsc_lzp_compress_parallel(input, output, n, hashSize, minLen); + } + +#endif + + return bsc_lzp_compress_serial(input, output, n, hashSize, minLen); +} + +int bsc_lzp_decompress(const unsigned char * input, unsigned char * output, int n, int hashSize, int minLen, int features) +{ + int nBlocks = input[0]; + + if (nBlocks == 1) + { + return bsc_lzp_decode_block(input + 1, input + n, output, hashSize, minLen); + } + + int decompressionResult[ALPHABET_SIZE]; + +#ifdef LIBBSC_OPENMP + + if (features & LIBBSC_FEATURE_MULTITHREADING) + { + #pragma omp parallel for schedule(dynamic) + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4); + int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0); + + inputPtr += 1 + 8 * nBlocks; + + int inputSize = *(int *)(input + 1 + 8 * blockId + 4); + int outputSize = *(int *)(input + 1 + 8 * blockId + 0); + + if (inputSize != outputSize) + { + decompressionResult[blockId] = bsc_lzp_decode_block(input + inputPtr, input + inputPtr + inputSize, output + outputPtr, hashSize, minLen); + } + else + { + decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize); + } + } + } + else + +#endif + + { + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + int inputPtr = 0; for (int p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4); + int outputPtr = 0; for (int p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0); + + inputPtr += 1 + 8 * nBlocks; + + int inputSize = *(int *)(input + 1 + 8 * blockId + 4); + int outputSize = *(int *)(input + 1 + 8 * blockId + 0); + + if (inputSize != outputSize) + { + decompressionResult[blockId] = bsc_lzp_decode_block(input + inputPtr, input + inputPtr + inputSize, output + outputPtr, hashSize, minLen); + } + else + { + decompressionResult[blockId] = inputSize; memcpy(output + outputPtr, input + inputPtr, inputSize); + } + } + } + + int dataSize = 0, result = LIBBSC_NO_ERROR; + for (int blockId = 0; blockId < nBlocks; ++blockId) + { + if (decompressionResult[blockId] < LIBBSC_NO_ERROR) result = decompressionResult[blockId]; + dataSize += decompressionResult[blockId]; + } + + return (result == LIBBSC_NO_ERROR) ? dataSize : result; +} + +/*-----------------------------------------------------------*/ +/* End lzp.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/lzp/lzp.h b/libbsc/libbsc/lzp/lzp.h new file mode 100644 index 00000000..e4814a49 --- /dev/null +++ b/libbsc/libbsc/lzp/lzp.h @@ -0,0 +1,72 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to Lempel Ziv Prediction functions */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_LZP_H +#define _LIBBSC_LZP_H + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * Preprocess a memory block by LZP algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block of n bytes. + * @param n - the length of the input/output memory blocks. + * @param hashSize - the hash table size. + * @param minLen - the minimum match length. + * @param features - the set of additional features. + * @return The length of preprocessed memory block if no error occurred, error code otherwise. + */ + int bsc_lzp_compress(const unsigned char * input, unsigned char * output, int n, int hashSize, int minLen, int features); + + /** + * Reconstructs the original memory block after LZP algorithm. + * @param input - the input memory block of n bytes. + * @param output - the output memory block. + * @param n - the length of the input memory block. + * @param hashSize - the hash table size. + * @param minLen - the minimum match length. + * @param features - the set of additional features. + * @return The length of original memory block if no error occurred, error code otherwise. + */ + int bsc_lzp_decompress(const unsigned char * input, unsigned char * output, int n, int hashSize, int minLen, int features); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End lzp.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/platform/platform.cpp b/libbsc/libbsc/platform/platform.cpp new file mode 100644 index 00000000..6a2f9928 --- /dev/null +++ b/libbsc/libbsc/platform/platform.cpp @@ -0,0 +1,264 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Platform specific functions and constants */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#include +#include +#include + +#include "platform.h" + +#include "../libbsc.h" + +#if defined(_WIN32) + #include + SIZE_T g_LargePageSize = 0; +#endif + +#if (LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2) + +#if defined(_MSC_VER) + #include +#endif + +static void bsc_cpuid(unsigned int regs[4], unsigned int level) +{ +#if defined(_MSC_VER) + __cpuid((int *)regs, (int)level); +#else + __asm__ __volatile__ + ( + "xchg %%ebx, %%edi\n\t" + "cpuid\n\t" + "xchg %%ebx, %%edi" + : "=a"(regs[0]), "=D"(regs[1]), "=c"(regs[2]), "=d"(regs[3]) + : "a"(level), "c"(0) + ); +#endif +} + +static unsigned long long bsc_xgetbv() +{ +#if defined(_MSC_VER) + return _xgetbv(0); +#else + unsigned int eax = 0, edx = 0; + __asm__ __volatile__ + ( + "xgetbv" + : "=a"(eax), "=d"(edx) + : "c"(0) + ); + return ((unsigned long long)edx << 32) | eax; +#endif +} + +int bsc_get_cpu_features(void) +{ + static int g_cpu_features = -1; if (g_cpu_features >= 0) { return g_cpu_features; } + + unsigned int regs[4] = { 0, 0, 0, 0 }; + + bsc_cpuid(regs, 0); if (regs[0] < 1) { return g_cpu_features = LIBBSC_CPU_FEATURE_NONE; } + + bsc_cpuid(regs, 1); + if ((regs[3] & (1 << 26)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_NONE; } // no SSE2 + if ((regs[2] & (1 << 0)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE2; } // no SSE3 + if ((regs[2] & (1 << 9)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE3; } // no SSSE3 + if ((regs[2] & (1 << 19)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSSE3; } // no SSE4.1 + if ((regs[2] & (1 << 23)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE41; } // no POPCNT + if ((regs[2] & (1 << 20)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE41; } // no SSE4.2 + if ((regs[2] & (1 << 28)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE42; } // no AVX + if ((regs[2] & (1 << 27)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE42; } // no XSAVE + if ((bsc_xgetbv() & 0x6) != 0x6) { return g_cpu_features = LIBBSC_CPU_FEATURE_SSE42; } // AVX not enabled by OS + + bsc_cpuid(regs, 0); if (regs[0] < 7) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX; } + + bsc_cpuid(regs, 7); + if ((regs[1] & (1 << 5)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX; } // no AVX2 + if ((regs[1] & (1 << 16)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX2; } // no AVX512F + if ((regs[1] & (1 << 28)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX2; } // no AVX512CD + if ((bsc_xgetbv() & 0xE0) != 0xE0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX2; } // AVX512 not enabled by OS + if ((regs[1] & (1 << 17)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX512F; } // no AVX512DQ + if ((regs[1] & (1 << 31)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX512F; } // no AVX512VL + if ((regs[1] & (1 << 30)) == 0) { return g_cpu_features = LIBBSC_CPU_FEATURE_AVX512F; } // no AVX512BW + + return g_cpu_features = LIBBSC_CPU_FEATURE_AVX512BW; +} + +#else + +int bsc_get_cpu_features(void) +{ + return LIBBSC_CPU_FEATURE_NONE; +} + +#endif + +static void * bsc_default_malloc(size_t size) +{ +#if defined(_WIN32) + if ((g_LargePageSize != 0) && (size >= 256 * 1024)) + { + void * address = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (address != NULL) return address; + } + return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); +#else + return malloc(size); +#endif +} + +static void * bsc_default_zero_malloc(size_t size) +{ +#if defined(_WIN32) + if ((g_LargePageSize != 0) && (size >= 256 * 1024)) + { + void * address = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (address != NULL) return address; + } + return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); +#else + return calloc(1, size); +#endif +} + +static void * bsc_wrap_zero_malloc(size_t size) +{ + void *address = bsc_malloc(size); + if(address != NULL) + { + memset(address, 0, size); + } + return address; +} + +static void bsc_default_free(void * address) +{ +#if defined(_WIN32) + VirtualFree(address, 0, MEM_RELEASE); +#else + free(address); +#endif +} + +static void* (* bsc_malloc_fn)(size_t size) = bsc_default_malloc; +static void* (* bsc_zero_malloc_fn)(size_t size) = bsc_default_zero_malloc; +static void (* bsc_free_fn)(void* address) = bsc_default_free; + +void* bsc_malloc(size_t size) +{ + return bsc_malloc_fn(size); +} + +void* bsc_zero_malloc(size_t size) +{ + return bsc_zero_malloc_fn(size); +} + +void bsc_free(void* address) +{ + return bsc_free_fn(address); +} + +int bsc_platform_init(int features, void* (* malloc)(size_t size), void* (* zero_malloc)(size_t size), void (* free)(void* address)) +{ + /* If the caller provides a malloc function but not a zero_malloc + function, we want to use malloc to implement zero_malloc. + Otherwise we'll use the default function which may be slightly + faster on some platforms. */ + if (zero_malloc != NULL) + { + bsc_zero_malloc_fn = zero_malloc; + } + else if (malloc != NULL) + { + bsc_zero_malloc_fn = bsc_wrap_zero_malloc; + } + + if (malloc != NULL) + { + bsc_malloc_fn = malloc; + } + + if (free != NULL) + { + bsc_free_fn = free; + } + +#if defined(_WIN32) + + if (features & LIBBSC_FEATURE_LARGEPAGES) + { + HANDLE hToken = 0; + if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &hToken)) + { + LUID luid; + if (LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &luid)) + { + TOKEN_PRIVILEGES tp; + + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), 0, 0); + } + + CloseHandle(hToken); + } + + { + if (HMODULE hKernel = GetModuleHandle(TEXT("kernel32.dll"))) + { + typedef SIZE_T (WINAPI * GetLargePageMinimumProcT)(); + + GetLargePageMinimumProcT largePageMinimumProc = (GetLargePageMinimumProcT)GetProcAddress(hKernel, "GetLargePageMinimum"); + if (largePageMinimumProc != NULL) + { + SIZE_T largePageSize = largePageMinimumProc(); + + if ((largePageSize & (largePageSize - 1)) != 0) largePageSize = 0; + + g_LargePageSize = largePageSize; + } + } + } + } + +#endif + + return LIBBSC_NO_ERROR; +} + +/*-----------------------------------------------------------*/ +/* End platform.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/platform/platform.h b/libbsc/libbsc/platform/platform.h new file mode 100644 index 00000000..7be61d9f --- /dev/null +++ b/libbsc/libbsc/platform/platform.h @@ -0,0 +1,226 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to platform specific functions and constants */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_PLATFORM_H +#define _LIBBSC_PLATFORM_H + +#define ALPHABET_SIZE (256) + +#define LIBBSC_CPU_FEATURE_NONE 0 +#define LIBBSC_CPU_FEATURE_A64 1 +#define LIBBSC_CPU_FEATURE_SSE2 2 +#define LIBBSC_CPU_FEATURE_SSE3 3 +#define LIBBSC_CPU_FEATURE_SSSE3 4 +#define LIBBSC_CPU_FEATURE_SSE41 5 +#define LIBBSC_CPU_FEATURE_SSE42 6 +#define LIBBSC_CPU_FEATURE_AVX 7 +#define LIBBSC_CPU_FEATURE_AVX2 8 +#define LIBBSC_CPU_FEATURE_AVX512F 9 +#define LIBBSC_CPU_FEATURE_AVX512BW 10 + +#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64)) && !defined(__x86_64__) + #define __x86_64__ 1 +#endif + +#if defined(_M_ARM64) && !defined(__aarch64__) + #define __aarch64__ 1 +#endif + +#ifndef LIBBSC_CPU_FEATURE + #if defined(__AVX512VL__) && defined(__AVX512BW__) && defined(__AVX512DQ__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_AVX512BW + #elif defined(__AVX512F__) || defined(__AVX512__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_AVX512F + #elif defined(__AVX2__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_AVX2 + #elif defined(__AVX__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_AVX + #elif defined(__SSE4_2__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_SSE42 + #elif defined(__SSE4_1__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_SSE41 + #elif defined(__SSSE3__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_SSSE3 + #elif defined(__SSE3__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_SSE3 + #elif defined(__SSE2__) || defined(__x86_64__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_SSE2 + #elif defined(__aarch64__) + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_A64 + #else + #define LIBBSC_CPU_FEATURE LIBBSC_CPU_FEATURE_NONE + #endif +#endif + +#if defined(_OPENMP) && defined(LIBBSC_OPENMP_SUPPORT) + #include + #define LIBBSC_OPENMP +#endif + +#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2 + #if defined(_MSC_VER) + #include + #elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + #include + #endif +#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_A64 + #include +#endif + +#if defined(__GNUC__) + #define INLINE __inline__ +#elif defined(_MSC_VER) + #define INLINE __forceinline +#elif defined(__IBMC__) + #define INLINE _Inline +#elif defined(__cplusplus) + #define INLINE inline +#else + #define INLINE /* */ +#endif + +#if defined(_MSC_VER) + #define NOINLINE __declspec(noinline) +#elif defined(__GNUC__) + #define NOINLINE __attribute__ ((noinline)) +#else + #define NOINLINE /* */ +#endif + +#if defined(_MSC_VER) + #define ALIGNED(x) __declspec(align(x)) +#elif defined(__GNUC__) + #define ALIGNED(x) __attribute__ ((aligned(x))) +#endif + +#if defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) + #define RESTRICT __restrict__ +#elif defined(_MSC_VER) || defined(__INTEL_COMPILER) + #define RESTRICT __restrict +#else + #define RESTRICT /* */ +#endif + +#if defined(__GNUC__) || defined(__clang__) + #define bsc_byteswap_uint64(x) (__builtin_bswap64(x)) + #define bsc_bit_scan_reverse(x) (__builtin_clz(x) ^ 31) + #define bsc_bit_scan_reverse64(x) (__builtin_clzll(x) ^ 63) + #define bsc_bit_scan_forward(x) (__builtin_ctz(x)) + #define bsc_bit_scan_forward64(x) (__builtin_ctzll(x)) +#elif defined(_MSC_VER) + #define bsc_byteswap_uint64(x) (_byteswap_uint64(x)) + + #pragma intrinsic(_BitScanReverse) + #pragma intrinsic(_BitScanForward) + + static inline __forceinline unsigned long bsc_bit_scan_reverse(unsigned long x) + { + unsigned long index; + _BitScanReverse(&index, x); + return index; + } + + static inline __forceinline unsigned long bsc_bit_scan_forward(unsigned long x) + { + unsigned long index; + _BitScanForward(&index, x); + return index; + } + + #if defined(__x86_64__) || defined(__aarch64__) + static inline __forceinline unsigned long bsc_bit_scan_reverse64(unsigned long long x) + { + unsigned long index; + _BitScanReverse64(&index, x); + return index; + } + #endif + + #if defined(__x86_64__) || defined(__aarch64__) + static inline __forceinline unsigned long bsc_bit_scan_forward64(unsigned long long x) + { + unsigned long index; + _BitScanForward64(&index, x); + return index; + } + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * You should call this function before you call any of the other platform specific functions. + * @param malloc - function to use to allocate buffers + * @param zero_malloc - function to use to allocate zero-filled buffers + * @param free - function used to free buffers + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_platform_init(int features, void* (* malloc)(size_t size), void* (* zero_malloc)(size_t size), void (* free)(void* address)); + + /** + * Allocates memory blocks. + * @param size - bytes to allocate. + * @return a pointer to allocated space or NULL if there is insufficient memory available. + */ + void * bsc_malloc(size_t size); + + /** + * Allocates memory blocks and initializes all its bits to zero. + * @param size - bytes to allocate. + * @return a pointer to allocated space or NULL if there is insufficient memory available. + */ + void * bsc_zero_malloc(size_t size); + + /** + * Deallocates or frees a memory block. + * @param address - previously allocated memory block to be freed. + */ + void bsc_free(void * address); + + /** + * Detects supported CPU features (Streaming SIMD Extensions). + * @return highest supported CPU feature. + */ + int bsc_get_cpu_features(void); + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End platform.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/st/st.cpp b/libbsc/libbsc/st/st.cpp new file mode 100644 index 00000000..82123feb --- /dev/null +++ b/libbsc/libbsc/st/st.cpp @@ -0,0 +1,1533 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Sort Transform */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + +#include +#include + +#include "st.h" + +#include "../libbsc.h" +#include "../platform/platform.h" + +#include "st.cuh" + +#define ALPHABET_SQRT_SIZE (16) + +int bsc_st_init(int features) +{ +#ifdef LIBBSC_CUDA_SUPPORT + return bsc_st_cuda_init(features); +#else + return LIBBSC_NO_ERROR; +#endif +} + +static int bsc_st3_transform_serial(unsigned char * RESTRICT T, unsigned short * RESTRICT P, int * RESTRICT bucket, int n) +{ + unsigned int count[ALPHABET_SIZE]; memset(count, 0, ALPHABET_SIZE * sizeof(unsigned int)); + + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + unsigned char C0 = T[n - 1]; + for (int i = 0; i < n; ++i) + { + unsigned char C1 = T[i]; + count[C1]++; bucket[(C0 << 8) | C1]++; + C0 = C1; + } + + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket[i]; bucket[i] = tmp; + } + + for (int sum = 0, i = 0; i < ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += count[i]; count[i] = tmp; + } + + int pos = bucket[(T[1] << 8) | T[2]]; + + unsigned int W = (T[n - 1] << 16) | (T[0] << 8) | T[1]; + for (int i = 0; i < n; ++i) + { + W = (W << 8) | T[i + 2]; + P[bucket[W & 0x0000ffff]++] = W >> 16; + } + + for (int i = 0; i < pos; ++i) + { + T[count[P[i] & 0x00ff]++] = (unsigned char)(P[i] >> 8); + } + int index = count[P[pos] & 0x00ff]; + for (int i = pos; i < n; ++i) + { + T[count[P[i] & 0x00ff]++] = (unsigned char)(P[i] >> 8); + } + + return index; +} + +static int bsc_st4_transform_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int * RESTRICT bucket, int n) +{ + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + unsigned char C0 = T[n - 1]; + for (int i = 0; i < n; ++i) + { + unsigned char C1 = T[i]; + bucket[(C0 << 8) | C1]++; + C0 = C1; + } + + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket[i]; bucket[i] = tmp; + } + + int pos = bucket[(T[2] << 8) | T[3]]; + + unsigned int W = (T[n - 1] << 24) | (T[0] << 16) | (T[1] << 8) | T[2]; + for (int i = 0; i < n; ++i) + { + unsigned char C = (unsigned char)(W >> 24); + W = (W << 8) | T[i + 3]; + P[bucket[W & 0x0000ffff]++] = (W & 0xffff0000) | C; + } + + for (int i = n - 1; i >= pos; --i) + { + T[--bucket[P[i] >> 16]] = P[i] & 0xff; + } + int index = bucket[P[pos] >> 16]; + for (int i = pos - 1; i >= 0; --i) + { + T[--bucket[P[i] >> 16]] = P[i] & 0xff; + } + + return index; +} + +static int bsc_st5_transform_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int * RESTRICT bucket, int n) +{ + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + unsigned char C0 = T[n - 2] & 0xf; + unsigned char C1 = T[n - 1]; + for (int i = 0; i < n; ++i) + { + unsigned char C2 = T[i]; + bucket[(C0 << 16) | (C1 << 8) | C2]++; + C0 = C1 & 0xf; C1 = C2; + } + + for (int sum = 0, i = 0; i < ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket[i]; bucket[i] = tmp; + } + + int pos = bucket[((T[2] & 0xf) << 16) | (T[3] << 8) | T[4]]; + + unsigned char L = T[n - 1]; + unsigned int W = (T[0] << 24) | (T[1] << 16) | (T[2] << 8) | T[3]; + for (int i = 0; i < n; ++i) + { + unsigned int V = (W & 0xfffff000) | L; + L = (unsigned char)(W >> 24); W = (W << 8) | T[i + 4]; + P[bucket[W & 0x000fffff]++] = V; + } + + memset(bucket, 0, ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int)); + + unsigned char P0 = T[n - 2]; + unsigned char P1 = T[n - 1]; + for (int i = 0; i < n; ++i) + { + unsigned char P2 = T[i]; + bucket[(P0 << 12) | (P1 << 4) | (P2 >> 4)]++; + P0 = P1; P1 = P2; + } + + for (int sum = 0, i = 0; i < ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + sum += bucket[i]; bucket[i] = sum; + } + + for (int i = n - 1; i >= pos; --i) + { + T[--bucket[P[i] >> 12]] = P[i] & 0xff; + } + int index = bucket[P[pos] >> 12]; + for (int i = pos - 1; i >= 0; --i) + { + T[--bucket[P[i] >> 12]] = P[i] & 0xff; + } + + return index; +} + +static int bsc_st6_transform_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int * RESTRICT bucket, int n) +{ + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + unsigned int W = (T[n - 2] << 16) | (T[n - 1] << 8) | T[0]; + for (int i = 0; i < n; ++i) + { + W = (W << 8) | T[i + 1]; bucket[W >> 8]++; + } + + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket[i]; bucket[i] = tmp; + } + + int pos = bucket[(T[3] << 16) | (T[4] << 8) | T[5]]; + + unsigned int W0 = (T[n - 2] << 24) | (T[n - 1] << 16) | (T[0] << 8) | T[1]; + unsigned int W1 = (T[ 2] << 24) | (T[ 3] << 16) | (T[4] << 8) | T[5]; + for (int i = 0; i < n; ++i) + { + W0 = (W0 << 8) | T[i + 2]; W1 = (W1 << 8) | T[i + 6]; + P[bucket[W1 >> 8]++] = (W0 << 8) | (W0 >> 24); + } + + for (int i = n - 1; i >= pos; --i) + { + T[--bucket[P[i] >> 8]] = P[i] & 0xff; + } + int index = bucket[P[pos] >> 8]; + for (int i = pos - 1; i >= 0; --i) + { + T[--bucket[P[i] >> 8]] = P[i] & 0xff; + } + + return index; +} + +#ifdef LIBBSC_OPENMP + +static int bsc_st3_transform_parallel(unsigned char * RESTRICT T, unsigned short * RESTRICT P, int * RESTRICT bucket0, int n) +{ + unsigned int count0[ALPHABET_SIZE]; memset(count0, 0, ALPHABET_SIZE * sizeof(unsigned int)); + unsigned int count1[ALPHABET_SIZE]; memset(count1, 0, ALPHABET_SIZE * sizeof(unsigned int)); + + if (int * RESTRICT bucket1 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int pos, index = 0; + + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + #pragma omp parallel num_threads(2) + { + int nThreads = omp_get_num_threads(); + int threadId = omp_get_thread_num(); + + if (nThreads == 1) + { + index = bsc_st3_transform_serial(T, P, bucket0, n); + } + else + { + int median = n / 2; + + { + if (threadId == 0) + { + unsigned char C0 = T[n - 1]; + for (int i = 0; i < median; ++i) + { + unsigned char C1 = T[i]; + count0[C1]++; bucket0[(C0 << 8) | C1]++; + C0 = C1; + } + } + else + { + unsigned char C0 = T[median - 1]; + for (int i = median; i < n; ++i) + { + unsigned char C1 = T[i]; + count1[C1]++; bucket1[(C0 << 8) | C1]++; + C0 = C1; + } + } + + #pragma omp barrier + } + + { + #pragma omp single + { + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket0[i] + bucket1[i]; bucket0[i] = tmp; bucket1[i] = sum - 1; + } + + for (int sum = 0, i = 0; i < ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += count0[i] + count1[i]; count0[i] = tmp; count1[i] = sum - 1; + } + + pos = bucket0[(T[1] << 8) | T[2]]; + } + } + + { + if (threadId == 0) + { + unsigned int W = (T[n - 2] << 24) | (T[n - 1] << 16) | (T[0] << 8) | T[1]; + for (int i = 0; i < median; ++i) + { + W = (W << 8) | T[i + 2]; + P[bucket0[W & 0x0000ffff]++] = W >> 16; + } + } + else + { + unsigned int W = (T[n - 2] << 24) | (T[n - 1] << 16) | (T[0] << 8) | T[1]; + for (int i = n - 1; i >= median; --i) + { + P[bucket1[W & 0x0000ffff]--] = W >> 16; + W = (W >> 8) | (T[i - 2] << 24); + } + } + + #pragma omp barrier + } + + { + if (threadId == 0) + { + if (pos < median) + { + for (int i = 0; i < pos; ++i) + { + T[count0[P[i] & 0x00ff]++] = (unsigned char)(P[i] >> 8); + } + index = count0[P[pos] & 0x00ff]; + for (int i = pos; i < median; ++i) + { + T[count0[P[i] & 0x00ff]++] = (unsigned char)(P[i] >> 8); + } + } + else + { + for (int i = 0; i < median; ++i) + { + T[count0[P[i] & 0x00ff]++] = (unsigned char)(P[i] >> 8); + } + } + } + else + { + if (pos >= median) + { + for (int i = n - 1; i > pos; --i) + { + T[count1[P[i] & 0x00ff]--] = (unsigned char)(P[i] >> 8); + } + index = count1[P[pos] & 0x00ff]; + for (int i = pos; i >= median; --i) + { + T[count1[P[i] & 0x00ff]--] = (unsigned char)(P[i] >> 8); + } + } + else + { + for (int i = n - 1; i >= median; --i) + { + T[count1[P[i] & 0x00ff]--] = (unsigned char)(P[i] >> 8); + } + } + } + } + } + } + + bsc_free(bucket1); + return index; + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +static int bsc_st4_transform_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int * RESTRICT bucket, int n) +{ + if (int * RESTRICT bucket0 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + if (int * RESTRICT bucket1 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int pos, index = 0; + + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + #pragma omp parallel num_threads(2) + { + int nThreads = omp_get_num_threads(); + int threadId = omp_get_thread_num(); + + if (nThreads == 1) + { + index = bsc_st4_transform_serial(T, P, bucket, n); + } + else + { + int median = n / 2; + + { + if (threadId == 0) + { + unsigned char C0 = T[n - 1]; + for (int i = 0; i < median; ++i) + { + unsigned char C1 = T[i]; + bucket0[(C0 << 8) | C1]++; + C0 = C1; + } + } + else + { + unsigned char C0 = T[median - 1]; + for (int i = median; i < n; ++i) + { + unsigned char C1 = T[i]; + bucket1[(C0 << 8) | C1]++; + C0 = C1; + } + } + + #pragma omp barrier + } + + { + #pragma omp single + { + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket0[i] + bucket1[i]; bucket[i] = bucket0[i] = tmp; bucket1[i]= sum - 1; + } + + pos = bucket[(T[2] << 8) | T[3]]; + } + } + + { + if (threadId == 0) + { + unsigned int W = (T[n - 1] << 24) | (T[0] << 16) | (T[1] << 8) | T[2]; + for (int i = 0; i < median; ++i) + { + unsigned char C = (unsigned char)(W >> 24); + W = (W << 8) | T[i + 3]; + P[bucket0[W & 0x0000ffff]++] = (W & 0xffff0000) | C; + } + } + else + { + unsigned int W = (T[n - 1] << 24) | (T[0] << 16) | (T[1] << 8) | T[2]; + for (int i = n - 1; i >= median; --i) + { + unsigned char C = T[i - 1]; + P[bucket1[W & 0x0000ffff]--] = (W & 0xffff0000) | C; + W = (W >> 8) | (C << 24); + } + } + + #pragma omp barrier + } + + { + if (threadId == 0) + { + for (int i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE; ++i) bucket0[i] = bucket[i]; + + if (pos < median) + { + for (int i = 0; i < pos; ++i) + { + T[bucket0[P[i] >> 16]++] = P[i] & 0xff; + } + index = bucket0[P[pos] >> 16]; + for (int i = pos; i < median; ++i) + { + T[bucket0[P[i] >> 16]++] = P[i] & 0xff; + } + } + else + { + for (int i = 0; i < median; ++i) + { + T[bucket0[P[i] >> 16]++] = P[i] & 0xff; + } + } + } + else + { + for (int i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE - 1; ++i) bucket1[i] = bucket[i + 1] - 1; + bucket1[ALPHABET_SIZE * ALPHABET_SIZE - 1] = n - 1; + + if (pos >= median) + { + for (int i = n - 1; i > pos; --i) + { + T[bucket1[P[i] >> 16]--] = P[i] & 0xff; + } + index = bucket1[P[pos] >> 16]; + for (int i = pos; i >= median; --i) + { + T[bucket1[P[i] >> 16]--] = P[i] & 0xff; + } + } + else + { + for (int i = n - 1; i >= median; --i) + { + T[bucket1[P[i] >> 16]--] = P[i] & 0xff; + } + } + } + } + } + } + + bsc_free(bucket1); bsc_free(bucket0); + return index; + }; + bsc_free(bucket0); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +static int bsc_st5_transform_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int * RESTRICT bucket0, int n) +{ + if (int * RESTRICT bucket1 = (int *)bsc_zero_malloc(ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int pos, index = 0; + + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + #pragma omp parallel num_threads(2) + { + int nThreads = omp_get_num_threads(); + int threadId = omp_get_thread_num(); + + if (nThreads == 1) + { + index = bsc_st5_transform_serial(T, P, bucket0, n); + } + else + { + int median = n / 2; + + { + if (threadId == 0) + { + unsigned char C0 = T[n - 2] & 0xf; + unsigned char C1 = T[n - 1]; + for (int i = 0; i < median; ++i) + { + unsigned char C2 = T[i]; + bucket0[(C0 << 16) | (C1 << 8) | C2]++; + C0 = C1 & 0xf; C1 = C2; + } + } + else + { + unsigned char C0 = T[median - 2] & 0xf; + unsigned char C1 = T[median - 1]; + for (int i = median; i < n; ++i) + { + unsigned char C2 = T[i]; + bucket1[(C0 << 16) | (C1 << 8) | C2]++; + C0 = C1 & 0xf; C1 = C2; + } + } + + #pragma omp barrier + } + + { + #pragma omp single + { + for (int sum = 0, i = 0; i < ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket0[i] + bucket1[i]; bucket0[i] = tmp; bucket1[i] = sum - 1; + } + + pos = bucket0[((T[2] & 0xf) << 16) | (T[3] << 8) | T[4]]; + } + } + + { + if (threadId == 0) + { + unsigned char L = T[n - 1]; + unsigned int W = (T[0] << 24) | (T[1] << 16) | (T[2] << 8) | T[3]; + for (int i = 0; i < median; ++i) + { + unsigned int V = (W & 0xfffff000) | L; + + L = (unsigned char)(W >> 24); W = (W << 8) | T[i + 4]; + P[bucket0[W & 0x000fffff]++] = V; + } + + memset(bucket0, 0, ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int)); + + unsigned char P0 = T[n - 2]; + unsigned char P1 = T[n - 1]; + for (int i = 0; i < median; ++i) + { + unsigned char P2 = T[i]; + bucket0[(P0 << 12) | (P1 << 4) | (P2 >> 4)]++; + P0 = P1; P1 = P2; + } + } + else + { + unsigned char L = T[n - 1]; + unsigned int W = (T[0] << 24) | (T[1] << 16) | (T[2] << 8) | T[3]; + for (int i = n - 1; i >= median; --i) + { + unsigned int S = W & 0x000fffff; + + W = (W >> 8) | (L << 24); L = T[i - 1]; + P[bucket1[S]--] = (W & 0xfffff000) | L; + } + + memset(bucket1, 0, ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int)); + + unsigned char P0 = T[median - 2]; + unsigned char P1 = T[median - 1]; + for (int i = median; i < n; ++i) + { + unsigned char P2 = T[i]; + bucket1[(P0 << 12) | (P1 << 4) | (P2 >> 4)]++; + P0 = P1; P1 = P2; + } + } + + #pragma omp barrier + } + + { + #pragma omp single + { + for (int sum = 0, i = 0; i < ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE; ++i) + { + int tmp = sum; sum += bucket0[i] + bucket1[i]; bucket0[i] = tmp; bucket1[i] = sum - 1; + } + } + } + + { + if (threadId == 0) + { + if (pos < median) + { + for (int i = 0; i < pos; ++i) + { + T[bucket0[P[i] >> 12]++] = P[i] & 0xff; + } + index = bucket0[P[pos] >> 12]; + for (int i = pos; i < median; ++i) + { + T[bucket0[P[i] >> 12]++] = P[i] & 0xff; + } + } + else + { + for (int i = 0; i < median; ++i) + { + T[bucket0[P[i] >> 12]++] = P[i] & 0xff; + } + } + } + else + { + if (pos >= median) + { + for (int i = n - 1; i > pos; --i) + { + T[bucket1[P[i] >> 12]--] = P[i] & 0xff; + } + index = bucket1[P[pos] >> 12]; + for (int i = pos; i >= median; --i) + { + T[bucket1[P[i] >> 12]--] = P[i] & 0xff; + } + } + else + { + for (int i = n - 1; i >= median; --i) + { + T[bucket1[P[i] >> 12]--] = P[i] & 0xff; + } + } + } + } + } + } + + bsc_free(bucket1); + return index; + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +static int bsc_st6_transform_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, int * RESTRICT bucket, int n) +{ + if (int * RESTRICT bucket0 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + if (int * RESTRICT bucket1 = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int pos, index = 0; + + for (int i = 0; i < LIBBSC_HEADER_SIZE; ++i) T[n + i] = T[i]; + + #pragma omp parallel num_threads(2) + { + int nThreads = omp_get_num_threads(); + int threadId = omp_get_thread_num(); + + if (nThreads == 1) + { + index = bsc_st6_transform_serial(T, P, bucket, n); + } + else + { + int median = n / 2; + + { + if (threadId == 0) + { + unsigned int W = (T[n - 2] << 16) | (T[n - 1] << 8) | T[0]; + for (int i = 0; i < median; ++i) + { + W = (W << 8) | T[i + 1]; bucket0[W >> 8]++; + } + } + else + { + unsigned int W = (T[median - 2] << 16) | (T[median - 1] << 8) | T[median]; + for (int i = median; i < n; ++i) + { + W = (W << 8) | T[i + 1]; bucket1[W >> 8]++; + } + } + + #pragma omp barrier + } + + { + if (threadId == 0) + { + for (int sum = 0, i = 0; i < ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE / 2; ++i) + { + int tmp = sum; sum = sum + bucket0[i] + bucket1[i]; bucket[i] = bucket0[i] = tmp; bucket1[i] = sum - 1; + } + } + else + { + for (int sum = n, i = ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE - 1; i >= ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE / 2; --i) + { + int tmp = sum; sum = sum - bucket0[i] - bucket1[i]; bucket[i] = bucket0[i] = sum; bucket1[i] = tmp - 1; + } + } + + #pragma omp barrier + } + + { + if (threadId == 0) + { + pos = bucket0[(T[3] << 16) | (T[4] << 8) | T[5]]; + + unsigned int W0 = (T[n - 2] << 24) | (T[n - 1] << 16) | (T[0] << 8) | T[1]; + unsigned int W1 = (T[ 2] << 24) | (T[ 3] << 16) | (T[4] << 8) | T[5]; + for (int i = 0; i < median; ++i) + { + W0 = (W0 << 8) | T[i + 2]; W1 = (W1 << 8) | T[i + 6]; + P[bucket0[W1 >> 8]++] = (W0 << 8) | (W0 >> 24); + } + } + else + { + unsigned int W0 = (T[n - 1] << 24) | (T[0] << 16) | (T[1] << 8) | T[2]; + unsigned int W1 = (T[ 3] << 24) | (T[4] << 16) | (T[5] << 8) | T[6]; + for (int i = n - 1; i >= median; --i) + { + W0 = (W0 >> 8) | (T[i - 1] << 24); W1 = (W1 >> 8) | (T[i + 3] << 24); + P[bucket1[W1 >> 8]--] = (W0 << 8) | (W0 >> 24); + } + } + + #pragma omp barrier + } + + { + if (threadId == 0) + { + memcpy(bucket1, bucket + 1, (ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE / 2) * sizeof(int)); + } + else + { + memcpy(bucket1 + ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE / 2, bucket + ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE / 2 + 1, (ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE / 2- 1) * sizeof(int)); + bucket1[ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE - 1] = n; + } + + #pragma omp barrier + } + + { + if (threadId == 0) + { + if (pos < median) + { + for (int i = 0; i < pos; ++i) + { + T[bucket[P[i] >> 8]++] = P[i] & 0xff; + } + index = bucket[P[pos] >> 8]; + for (int i = pos; i < median; ++i) + { + T[bucket[P[i] >> 8]++] = P[i] & 0xff; + } + } + else + { + for (int i = 0; i < median; ++i) + { + T[bucket[P[i] >> 8]++] = P[i] & 0xff; + } + } + } + else + { + if (pos >= median) + { + for (int i = n - 1; i >= pos; --i) + { + T[--bucket1[P[i] >> 8]] = P[i] & 0xff; + } + index = bucket1[P[pos] >> 8]; + for (int i = pos - 1; i >= median; --i) + { + T[--bucket1[P[i] >> 8]] = P[i] & 0xff; + } + } + else + { + for (int i = n - 1; i >= median; --i) + { + T[--bucket1[P[i] >> 8]] = P[i] & 0xff; + } + } + } + } + } + } + + bsc_free(bucket1); bsc_free(bucket0); + return index; + }; + bsc_free(bucket0); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +#endif + +int bsc_st3_encode(unsigned char * T, int n, int features) +{ + if (unsigned short * P = (unsigned short *)bsc_malloc(n * sizeof(unsigned short))) + { + if (int * bucket = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int index = LIBBSC_NO_ERROR; + +#ifdef LIBBSC_OPENMP + + if ((features & LIBBSC_FEATURE_MULTITHREADING) && (n >= 64 * 1024)) + { + index = bsc_st3_transform_parallel(T, P, bucket, n); + } + else + +#endif + + { + index = bsc_st3_transform_serial(T, P, bucket, n); + } + + bsc_free(bucket); bsc_free(P); + return index; + }; + bsc_free(P); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_st4_encode(unsigned char * T, int n, int features) +{ + if (unsigned int * P = (unsigned int *)bsc_malloc(n * sizeof(unsigned int))) + { + if (int * bucket = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int index = LIBBSC_NO_ERROR; + +#ifdef LIBBSC_OPENMP + + if ((features & LIBBSC_FEATURE_MULTITHREADING) && (n >= 64 * 1024)) + { + index = bsc_st4_transform_parallel(T, P, bucket, n); + } + else + +#endif + + { + index = bsc_st4_transform_serial(T, P, bucket, n); + } + + bsc_free(bucket); bsc_free(P); + return index; + }; + bsc_free(P); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_st5_encode(unsigned char * T, int n, int features) +{ + if (unsigned int * P = (unsigned int *)bsc_malloc(n * sizeof(unsigned int))) + { + if (int * bucket = (int *)bsc_zero_malloc(ALPHABET_SQRT_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int index = LIBBSC_NO_ERROR; + +#ifdef LIBBSC_OPENMP + + if ((features & LIBBSC_FEATURE_MULTITHREADING) && (n >= 64 * 1024)) + { + index = bsc_st5_transform_parallel(T, P, bucket, n); + } + else + +#endif + + { + index = bsc_st5_transform_serial(T, P, bucket, n); + } + + bsc_free(bucket); bsc_free(P); + return index; + }; + bsc_free(P); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_st6_encode(unsigned char * T, int n, int features) +{ + if (unsigned int * P = (unsigned int *)bsc_malloc(n * sizeof(unsigned int))) + { + if (int * bucket = (int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * ALPHABET_SIZE * sizeof(int))) + { + int index = LIBBSC_NO_ERROR; + +#ifdef LIBBSC_OPENMP + + if ((features & LIBBSC_FEATURE_MULTITHREADING) && (n >= 6 * 1024 * 1024)) + { + index = bsc_st6_transform_parallel(T, P, bucket, n); + } + else + +#endif + + { + index = bsc_st6_transform_serial(T, P, bucket, n); + } + + bsc_free(bucket); bsc_free(P); + return index; + }; + bsc_free(P); + }; + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +int bsc_st_encode(unsigned char * T, int n, int k, int features) +{ + if ((T == NULL) || (n < 0)) return LIBBSC_BAD_PARAMETER; + if ((k < 3) || (k > 8)) return LIBBSC_BAD_PARAMETER; + if (n <= 1) return 0; + +#ifdef LIBBSC_CUDA_SUPPORT + + if (features & LIBBSC_FEATURE_CUDA) + { + int index = bsc_st_encode_cuda(T, n, k, features); + if (index >= LIBBSC_NO_ERROR || k >= 7) return index; + } + +#endif + + if (k == 3) return bsc_st3_encode(T, n, features); + if (k == 4) return bsc_st4_encode(T, n, features); + if (k == 5) return bsc_st5_encode(T, n, features); + if (k == 6) return bsc_st6_encode(T, n, features); + + return LIBBSC_NOT_SUPPORTED; +} + +static bool bsc_unst_sort_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, unsigned int * RESTRICT bucket, int n, int k) +{ + unsigned int index[ALPHABET_SIZE]; + int group[ALPHABET_SIZE]; + + bool failBack = false; + { + for (int i = 0; i < n; ++i) count[T[i]]++; + for (int sum = 0, c = 0; c < ALPHABET_SIZE; ++c) + { + if (count[c] >= 0x800000) failBack = true; + + int tmp = sum; sum += count[c]; count[c] = tmp; + if ((int)count[c] != sum) + { + unsigned int * RESTRICT bucket_p = &bucket[c << 8]; + for (int i = count[c]; i < sum; ++i) bucket_p[T[i]]++; + } + } + } + + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + for (int d = 0; d < c; ++d) + { + int tmp = bucket[(d << 8) | c]; bucket[(d << 8) | c] = bucket[(c << 8) | d]; bucket[(c << 8) | d] = tmp; + } + } + + if (k == 3) + { + for (int sum = 0, w = 0; w < ALPHABET_SIZE * ALPHABET_SIZE; ++w) + { + if (bucket[w] > 0) + { + P[sum] = 1; sum += bucket[w]; + } + } + + return failBack; + } + + memcpy(index, count, ALPHABET_SIZE * sizeof(unsigned int)); + memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + for (int sum = 0, w = 0; w < ALPHABET_SIZE * ALPHABET_SIZE; ++w) + { + int tmp = sum; sum += bucket[w]; bucket[w] = tmp; + for (int i = bucket[w]; i < sum; ++i) + { + unsigned char c = T[i]; + if (group[c] != w) + { + group[c] = w; P[index[c]] = 0x80000000; + } + index[c]++; + } + } + + unsigned int mask0 = 0x80000000, mask1 = 0x40000000; + for (int round = 4; round < k; ++round, mask0 >>= 1, mask1 >>= 1) + { + memcpy(index, count, ALPHABET_SIZE * sizeof(unsigned int)); + memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + for (int g = 0, i = 0; i < n; ++i) + { + if (P[i] & mask0) g = i; + + unsigned char c = T[i]; + if (group[c] != g) + { + group[c] = g; P[index[c]] += mask1; + } + index[c]++; + } + } + + return failBack; +} + +static void bsc_unst_reconstruct_case1_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, int n, int start) +{ + unsigned int index[ALPHABET_SIZE]; + int group[ALPHABET_SIZE]; + + memcpy(index, count, ALPHABET_SIZE * sizeof(unsigned int)); + memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + for (int g = 0, i = 0; i < n; ++i) + { + if (P[i] > 0) g = i; + + unsigned char c = T[i]; + if (group[c] < g) + { + group[c] = i; P[i] = (c << 24) | index[c]; + } + else + { + P[i] = (c << 24) | 0x800000 | group[c]; P[group[c]]++; + } + index[c]++; + } + + for (int p = start, i = n - 1; i >= 0; --i) + { + unsigned int u = P[p]; + if (u & 0x800000) + { + p = u & 0x7fffff; + u = P[p]; + } + + T[i] = u >> 24; P[p]--; p = u & 0x7fffff; + } +} + +static void bsc_unst_reconstruct_case2_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, int n, int start) +{ + unsigned int index[ALPHABET_SIZE]; + int group[ALPHABET_SIZE]; + + memset(index, 0, ALPHABET_SIZE * sizeof(unsigned int)); + memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + for (int g = 0, i = 0; i < n; ++i) + { + if (P[i] > 0) g = i; + + unsigned char c = T[i]; + if (group[c] < g) + { + group[c] = i; P[i] = (c << 24) | index[c]; + } + else + { + P[i] = (c << 24) | 0x800000 | (i - group[c]); P[group[c]]++; + } + index[c]++; + } + + for (int p = start, i = n - 1; i >= 0; --i) + { + unsigned int u = P[p]; + if (u & 0x800000) + { + p = p - (u & 0x7fffff); + u = P[p]; + } + + unsigned char c = u >> 24; + T[i] = c; P[p]--; p = (u & 0x7fffff) + count[c]; + } +} + +static INLINE int bsc_unst_search(int index, unsigned int * p, unsigned int v) +{ + while (p[index] <= v) { index++; } return index; +} + +#define ST_NUM_FASTBITS (10) + +static void bsc_unst_reconstruct_case3_serial(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, int n, int start) +{ + unsigned char fastbits[1 << ST_NUM_FASTBITS]; + unsigned int index[ALPHABET_SIZE]; + int group[ALPHABET_SIZE]; + + memcpy(index, count, ALPHABET_SIZE * sizeof(unsigned int)); + memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + for (int g = 0, i = 0; i < n; ++i) + { + if (P[i] > 0) g = i; + + unsigned char c = T[i]; + if (group[c] < g) + { + group[c] = i; P[i] = index[c]; + } + else + { + P[i] = 0x80000000 | group[c]; P[group[c]]++; + } + index[c]++; + } + + { + int shift = 0; while (((n - 1) >> shift) >= (1 << ST_NUM_FASTBITS)) shift++; + + { + for (int v = 0, c = 0; c < ALPHABET_SIZE; ++c) + { + index[c] = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + if (count[c] != index[c]) + { + for (; v <= (int)((index[c] - 1) >> shift); ++v) fastbits[v] = c; + } + } + } + + if (P[start] & 0x80000000) + { + start = P[start] & 0x7fffffff; + } + + T[0] = bsc_unst_search(fastbits[start >> shift], index, start); + P[start]--; start = P[start] + 1; + + for (int p = start, i = n - 1; i >= 1; --i) + { + unsigned int u = P[p]; + if (u & 0x80000000) + { + p = u & 0x7fffffff; + u = P[p]; + } + + T[i] = bsc_unst_search(fastbits[p >> shift], index, p); + P[p]--; p = u; + } + } +} + +static void bsc_unst_reconstruct_serial(unsigned char * T, unsigned int * P, unsigned int * count, int n, int index, bool failBack) +{ + if (n < 0x800000) return bsc_unst_reconstruct_case1_serial(T, P, count, n, index); + if (!failBack) return bsc_unst_reconstruct_case2_serial(T, P, count, n, index); + if (failBack) return bsc_unst_reconstruct_case3_serial(T, P, count, n, index); +} + +#ifdef LIBBSC_OPENMP + +static bool bsc_unst_sort_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, unsigned int * RESTRICT bucket, int n, int k) +{ + bool failBack = false; + { + #pragma omp parallel + { + unsigned int count_local[ALPHABET_SIZE]; + + memset(count_local, 0, ALPHABET_SIZE * sizeof(unsigned int)); + + #pragma omp for schedule(static) nowait + for (int i = 0; i < n; ++i) count_local[T[i]]++; + + #pragma omp critical + for (int c = 0; c < ALPHABET_SIZE; ++c) count[c] += count_local[c]; + } + + for (int sum = 0, c = 0; c < ALPHABET_SIZE; ++c) + { + if (count[c] >= 0x800000) failBack = true; + int tmp = sum; sum += count[c]; count[c] = tmp; + } + + #pragma omp parallel for schedule(static, 1) + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + int start = count[c], end = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + if (start != end) + { + unsigned int * RESTRICT bucket_p = &bucket[c << 8]; + for (int i = start; i < end; ++i) bucket_p[T[i]]++; + } + } + } + + for (int sum = 0, C0 = 0; C0 < ALPHABET_SIZE; ++C0) + { + for (int C1 = 0; C1 < ALPHABET_SIZE; ++C1) + { + if (bucket[(C1 << 8) | C0] > 0) + { + P[sum] = 0x80000000; sum += bucket[(C1 << 8) | C0]; + } + } + } + + { + unsigned int index[ALPHABET_SIZE]; + + memcpy(index, count, ALPHABET_SIZE * sizeof(unsigned int)); + for (int C0 = 0; C0 < ALPHABET_SIZE; ++C0) + { + unsigned int * RESTRICT bucket_p = &bucket[C0 << 8]; + for (int C1 = 0; C1 < ALPHABET_SIZE; ++C1) + { + int tmp = index[C1]; index[C1] += bucket_p[C1]; bucket_p[C1] = tmp; + } + } + } + + unsigned int mask0 = 0x80000000, mask1 = 0x40000000; + for (int round = 3; round < k; ++round, mask0 >>= 1, mask1 >>= 1) + { + #pragma omp parallel for schedule(static, 1) + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + unsigned int index[ALPHABET_SIZE]; memcpy(index, &bucket[c << 8], ALPHABET_SIZE * sizeof(unsigned int)); + int group[ALPHABET_SIZE]; memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + int start = count[c], end = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + for (int g = 0, i = start; i < end; ++i) + { + if (P[i] & mask0) g = i; + + unsigned char c = T[i]; + if (group[c] != g) + { + group[c] = g; P[index[c]] += mask1; + } + index[c]++; + } + } + } + + return failBack; +} + +static void bsc_unst_reconstruct_case1_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, unsigned int * RESTRICT bucket, int n, int start) +{ + #pragma omp parallel for schedule(static, 1) + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + unsigned int index[ALPHABET_SIZE]; memcpy(index, &bucket[c << 8], ALPHABET_SIZE * sizeof(unsigned int)); + int group[ALPHABET_SIZE]; memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + int start = count[c], end = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + for (int g = 0, i = start; i < end; ++i) + { + if (P[i] > 0) g = i; + + unsigned char c = T[i]; + if (group[c] < g) + { + group[c] = i; P[i] = (c << 24) | index[c]; + } + else + { + P[i] = (c << 24) | 0x800000 | group[c]; P[group[c]]++; + } + index[c]++; + } + } + + for (int p = start, i = n - 1; i >= 0; --i) + { + unsigned int u = P[p]; + if (u & 0x800000) + { + p = u & 0x7fffff; + u = P[p]; + } + + T[i] = u >> 24; P[p]--; p = u & 0x7fffff; + } +} + +static void bsc_unst_reconstruct_case2_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, unsigned int * RESTRICT bucket, int n, int start) +{ + #pragma omp parallel for schedule(static, 1) + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + unsigned int index[ALPHABET_SIZE]; memcpy(index, &bucket[c << 8], ALPHABET_SIZE * sizeof(unsigned int)); + int group[ALPHABET_SIZE]; memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + for (int i = 0; i < ALPHABET_SIZE; ++i) index[i] -= count[i]; + + int start = count[c], end = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + for (int g = 0, i = start; i < end; ++i) + { + if (P[i] > 0) g = i; + + unsigned char c = T[i]; + if (group[c] < g) + { + group[c] = i; P[i] = (c << 24) | index[c]; + } + else + { + P[i] = (c << 24) | 0x800000 | (i - group[c]); P[group[c]]++; + } + index[c]++; + } + } + + for (int p = start, i = n - 1; i >= 0; --i) + { + unsigned int u = P[p]; + if (u & 0x800000) + { + p = p - (u & 0x7fffff); + u = P[p]; + } + + unsigned char c = u >> 24; + T[i] = c; P[p]--; p = (u & 0x7fffff) + count[c]; + } +} + +static void bsc_unst_reconstruct_case3_parallel(unsigned char * RESTRICT T, unsigned int * RESTRICT P, unsigned int * RESTRICT count, unsigned int * RESTRICT bucket, int n, int start) +{ + #pragma omp parallel for schedule(static, 1) + for (int c = 0; c < ALPHABET_SIZE; ++c) + { + unsigned int index[ALPHABET_SIZE]; memcpy(index, &bucket[c << 8], ALPHABET_SIZE * sizeof(unsigned int)); + int group[ALPHABET_SIZE]; memset(group, 0xff, ALPHABET_SIZE * sizeof(int)); + + int start = count[c], end = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + for (int g = 0, i = start; i < end; ++i) + { + if (P[i] > 0) g = i; + + unsigned char c = T[i]; + if (group[c] < g) + { + group[c] = i; P[i] = index[c]; + } + else + { + P[i] = 0x80000000 | group[c]; P[group[c]]++; + } + index[c]++; + } + } + + unsigned char fastbits[1 << ST_NUM_FASTBITS]; + unsigned int index[ALPHABET_SIZE]; + + { + int shift = 0; while (((n - 1) >> shift) >= (1 << ST_NUM_FASTBITS)) shift++; + + { + for (int v = 0, c = 0; c < ALPHABET_SIZE; ++c) + { + index[c] = (c + 1 < ALPHABET_SIZE) ? count[c + 1] : n; + if (count[c] != index[c]) + { + for (; v <= (int)((index[c] - 1) >> shift); ++v) fastbits[v] = c; + } + } + } + + if (P[start] & 0x80000000) + { + start = P[start] & 0x7fffffff; + } + + T[0] = bsc_unst_search(fastbits[start >> shift], index, start); + P[start]--; start = P[start] + 1; + + for (int p = start, i = n - 1; i >= 1; --i) + { + unsigned int u = P[p]; + if (u & 0x80000000) + { + p = u & 0x7fffffff; + u = P[p]; + } + + T[i] = bsc_unst_search(fastbits[p >> shift], index, p); + P[p]--; p = u; + } + } +} + +static void bsc_unst_reconstruct_parallel(unsigned char * T, unsigned int * P, unsigned int * count, unsigned int * bucket, int n, int index, bool failBack) +{ + if (n < 0x800000) return bsc_unst_reconstruct_case1_parallel(T, P, count, bucket, n, index); + if (!failBack) return bsc_unst_reconstruct_case2_parallel(T, P, count, bucket, n, index); + if (failBack) return bsc_unst_reconstruct_case3_parallel(T, P, count, bucket, n, index); +} + +#endif + +int bsc_st_decode(unsigned char * T, int n, int k, int index, int features) +{ + if ((T == NULL) || (n < 0)) return LIBBSC_BAD_PARAMETER; + if ((index < 0) || (index >= n)) return LIBBSC_BAD_PARAMETER; + if ((k < 3) || (k > 8)) return LIBBSC_BAD_PARAMETER; + if (n <= 1) return LIBBSC_NO_ERROR; + + if (unsigned int * P = (unsigned int *)bsc_zero_malloc(n * sizeof(unsigned int))) + { + if (unsigned int * bucket = (unsigned int *)bsc_zero_malloc(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(unsigned int))) + { + unsigned int count[ALPHABET_SIZE]; memset(count, 0, ALPHABET_SIZE * sizeof(unsigned int)); + +#ifdef LIBBSC_OPENMP + + if ((features & LIBBSC_FEATURE_MULTITHREADING) && (n >= 64 * 1024)) + { + bool failBack = bsc_unst_sort_parallel(T, P, count, bucket, n, k); + bsc_unst_reconstruct_parallel(T, P, count, bucket, n, index, failBack); + } + else + +#endif + + { + bool failBack = bsc_unst_sort_serial(T, P, count, bucket, n, k); + bsc_unst_reconstruct_serial(T, P, count, n, index, failBack); + } + + bsc_free(bucket); bsc_free(P); + return LIBBSC_NO_ERROR; + }; + bsc_free(P); + }; + + return LIBBSC_NOT_ENOUGH_MEMORY; +} + +#endif + +/*-----------------------------------------------------------*/ +/* End st.cpp */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/st/st.cu b/libbsc/libbsc/st/st.cu new file mode 100644 index 00000000..3d19bde2 --- /dev/null +++ b/libbsc/libbsc/st/st.cu @@ -0,0 +1,416 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Sort Transform (GPU version) */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#if defined(LIBBSC_SORT_TRANSFORM_SUPPORT) && defined(LIBBSC_CUDA_SUPPORT) + +#if defined(_MSC_VER) + #pragma warning(disable : 4267) +#endif + +#include +#include +#include + +#include "st.cuh" + +#include "../libbsc.h" +#include "../platform/platform.h" + +#include +#include +#include + +#include + +#ifdef LIBBSC_OPENMP + +omp_lock_t cuda_lock; +int bsc_st_cuda_init(int features) +{ + omp_init_lock(&cuda_lock); + return LIBBSC_NO_ERROR; +} + +#else + +int bsc_st_cuda_init(int features) +{ + return LIBBSC_NO_ERROR; +} + +#endif + +#ifndef __CUDA_ARCH__ + #define CUDA_DEVICE_ARCH 0 +#else + #define CUDA_DEVICE_ARCH __CUDA_ARCH__ +#endif + +#define CUDA_DEVICE_PADDING 1024 +#define CUDA_NUM_THREADS_IN_BLOCK 128 +#define CUDA_CTA_OCCUPANCY 8 + +cudaError_t bsc_cuda_safe_call(const char * filename, int line, cudaError_t result, cudaError_t status = cudaSuccess) +{ + if (result != cudaSuccess) + { + fprintf(stderr, "\n%s(%d): bsc_cuda_safe_call failed %d: '%s'.", filename, line, result, cudaGetErrorString(result)); + fflush(stderr); + } + + return result != cudaSuccess ? result : status; +} + +__global__ __launch_bounds__(CUDA_NUM_THREADS_IN_BLOCK, CUDA_CTA_OCCUPANCY) +void bsc_st567_encode_cuda_presort(unsigned char * RESTRICT T_device, unsigned long long * RESTRICT K_device, int n) +{ + __shared__ unsigned int staging[1 + CUDA_NUM_THREADS_IN_BLOCK + 7]; + + unsigned int * RESTRICT thread_staging = &staging[threadIdx.x]; + for (int grid_size = gridDim.x * CUDA_NUM_THREADS_IN_BLOCK, block_start = blockIdx.x * CUDA_NUM_THREADS_IN_BLOCK; block_start < n; block_start += grid_size) + { + int index = block_start + threadIdx.x; + + { + thread_staging[1 ] = T_device[index ]; + if (threadIdx.x < 7 ) thread_staging[1 + CUDA_NUM_THREADS_IN_BLOCK] = T_device[index + CUDA_NUM_THREADS_IN_BLOCK]; else + if (threadIdx.x == 7) thread_staging[-7 ] = T_device[index - 8 ]; + + __syncthreads(); + } + + { + #if CUDA_DEVICE_ARCH >= 200 + unsigned int lo = __byte_perm(thread_staging[4], thread_staging[5], 0x0411) | __byte_perm(thread_staging[6], thread_staging[7], 0x1104); + unsigned int hi = __byte_perm(thread_staging[0], thread_staging[1], 0x0411) | __byte_perm(thread_staging[2], thread_staging[3], 0x1104); + #else + unsigned int lo = (thread_staging[4] << 24) | (thread_staging[5] << 16) | (thread_staging[6] << 8) | thread_staging[7]; + unsigned int hi = (thread_staging[0] << 24) | (thread_staging[1] << 16) | (thread_staging[2] << 8) | thread_staging[3]; + #endif + + K_device[index] = (((unsigned long long)hi) << 32) | ((unsigned long long)lo); + + __syncthreads(); + } + } +} + +__global__ __launch_bounds__(CUDA_NUM_THREADS_IN_BLOCK, CUDA_CTA_OCCUPANCY) +void bsc_st8_encode_cuda_presort(unsigned char * RESTRICT T_device, unsigned long long * RESTRICT K_device, unsigned char * RESTRICT V_device, int n) +{ + __shared__ unsigned int staging[1 + CUDA_NUM_THREADS_IN_BLOCK + 8]; + + unsigned int * RESTRICT thread_staging = &staging[threadIdx.x]; + for (int grid_size = gridDim.x * CUDA_NUM_THREADS_IN_BLOCK, block_start = blockIdx.x * CUDA_NUM_THREADS_IN_BLOCK; block_start < n; block_start += grid_size) + { + int index = block_start + threadIdx.x; + + { + thread_staging[1 ] = T_device[index ]; + if (threadIdx.x < 8 ) thread_staging[1 + CUDA_NUM_THREADS_IN_BLOCK] = T_device[index + CUDA_NUM_THREADS_IN_BLOCK]; else + if (threadIdx.x == 8) thread_staging[-8 ] = T_device[index - 9 ]; + + __syncthreads(); + } + + { + #if CUDA_DEVICE_ARCH >= 200 + unsigned int lo = __byte_perm(thread_staging[5], thread_staging[6], 0x0411) | __byte_perm(thread_staging[7], thread_staging[8], 0x1104); + unsigned int hi = __byte_perm(thread_staging[1], thread_staging[2], 0x0411) | __byte_perm(thread_staging[3], thread_staging[4], 0x1104); + #else + unsigned int lo = (thread_staging[5] << 24) | (thread_staging[6] << 16) | (thread_staging[7] << 8) | thread_staging[8]; + unsigned int hi = (thread_staging[1] << 24) | (thread_staging[2] << 16) | (thread_staging[3] << 8) | thread_staging[4]; + #endif + + K_device[index] = (((unsigned long long)hi) << 32) | ((unsigned long long)lo); V_device[index] = thread_staging[0]; + + __syncthreads(); + } + } +} + +__global__ __launch_bounds__(CUDA_NUM_THREADS_IN_BLOCK, CUDA_CTA_OCCUPANCY) +void bsc_st567_encode_cuda_postsort(unsigned char * RESTRICT T_device, unsigned long long * RESTRICT K_device, int n, unsigned long long lookup, int * RESTRICT I_device) +{ + int min_index = n; + for (int grid_size = gridDim.x * CUDA_NUM_THREADS_IN_BLOCK, block_start = blockIdx.x * CUDA_NUM_THREADS_IN_BLOCK; block_start < n; block_start += grid_size) + { + int index = block_start + threadIdx.x; + { + unsigned long long value = K_device[index]; + { + if (value == lookup && index < min_index) min_index = index; + T_device[index] = (unsigned char)(value >> 56); + } + } + } + + __syncthreads(); if (min_index != n) atomicMin(I_device, min_index); +} + +__global__ __launch_bounds__(CUDA_NUM_THREADS_IN_BLOCK, CUDA_CTA_OCCUPANCY) +void bsc_st8_encode_cuda_postsort(unsigned long long * RESTRICT K_device, int n, unsigned long long lookup, int * RESTRICT I_device) +{ + int min_index = n; + for (int grid_size = gridDim.x * CUDA_NUM_THREADS_IN_BLOCK, block_start = blockIdx.x * CUDA_NUM_THREADS_IN_BLOCK; block_start < n; block_start += grid_size) + { + int index = block_start + threadIdx.x; + { + if (K_device[index] == lookup && index < min_index) min_index = index; + } + } + + __syncthreads(); if (min_index != n) atomicMin(I_device, min_index); +} + +int bsc_st567_encode_cuda(unsigned char * T, unsigned char * T_device, int n, int num_blocks, int k) +{ + int index = LIBBSC_GPU_NOT_ENOUGH_MEMORY; + { + unsigned long long * K_device = NULL; + unsigned long long * K_device_sorted = NULL; + + if (bsc_cuda_safe_call(__FILE__, __LINE__, cudaMalloc((void **)&K_device, 2 * (n + 2 * CUDA_DEVICE_PADDING) * sizeof(unsigned long long))) == cudaSuccess) + { + index = LIBBSC_GPU_ERROR; + cudaError_t status = cudaSuccess; + + bsc_st567_encode_cuda_presort<<>>(T_device, K_device, n); + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + K_device_sorted = K_device + ((n + 2 * CUDA_DEVICE_PADDING) / CUDA_DEVICE_PADDING) * CUDA_DEVICE_PADDING; + + cub::DoubleBuffer d_keys(K_device, K_device_sorted); + + void * d_temp_storage = NULL; size_t temp_storage_bytes = 0; + + status = bsc_cuda_safe_call(__FILE__, __LINE__, cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, n, (7 - k) * 8, 56), status); + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMalloc(&d_temp_storage, temp_storage_bytes), status); + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + status = bsc_cuda_safe_call(__FILE__, __LINE__, cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, n, (7 - k) * 8, 56), status); + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaFree(d_temp_storage), status); + } + } + + K_device_sorted = d_keys.Current(); + } + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + unsigned long long lookup; + { + unsigned int lo = (T[3 ] << 24) | (T[4] << 16) | (T[5] << 8) | T[6]; + unsigned int hi = (T[n - 1] << 24) | (T[0] << 16) | (T[1] << 8) | T[2]; + + lookup = (((unsigned long long)hi) << 32) | ((unsigned long long)lo); + + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T_device - sizeof(int), &n, sizeof(int), cudaMemcpyHostToDevice), status); + } + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + bsc_st567_encode_cuda_postsort<<>>(T_device, K_device_sorted, n, lookup, (int *)(T_device - sizeof(int))); + + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaFree(K_device), status); + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T_device + n, T_device - sizeof(int), sizeof(int), cudaMemcpyDeviceToDevice), status); + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T, T_device, n + sizeof(int), cudaMemcpyDeviceToHost), status); + } + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + index = *(int *)(T + n); + } + + return index; + } + } + cudaFree(K_device); + } + } + + return index; +} + +int bsc_st8_encode_cuda(unsigned char * T, unsigned char * T_device, int n, int num_blocks) +{ + int index = LIBBSC_GPU_NOT_ENOUGH_MEMORY; + { + unsigned char * V_device = NULL; + unsigned char * V_device_sorted = NULL; + + if (bsc_cuda_safe_call(__FILE__, __LINE__, cudaMalloc((void **)&V_device, 2 * (n + 2 * CUDA_DEVICE_PADDING) * sizeof(unsigned char))) == cudaSuccess) + { + unsigned long long * K_device = NULL; + unsigned long long * K_device_sorted = NULL; + + if (bsc_cuda_safe_call(__FILE__, __LINE__, cudaMalloc((void **)&K_device, 2 * (n + 2 * CUDA_DEVICE_PADDING) * sizeof(unsigned long long))) == cudaSuccess) + { + index = LIBBSC_GPU_ERROR; + cudaError_t status = cudaSuccess; + + bsc_st8_encode_cuda_presort<<>>(T_device, K_device, V_device, n); + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + K_device_sorted = K_device + ((n + 2 * CUDA_DEVICE_PADDING) / CUDA_DEVICE_PADDING) * CUDA_DEVICE_PADDING; + V_device_sorted = V_device + ((n + 2 * CUDA_DEVICE_PADDING) / CUDA_DEVICE_PADDING) * CUDA_DEVICE_PADDING; + + cub::DoubleBuffer d_keys(K_device, K_device_sorted); + cub::DoubleBuffer d_values(V_device, V_device_sorted); + + void * d_temp_storage = NULL; size_t temp_storage_bytes = 0; + + status = bsc_cuda_safe_call(__FILE__, __LINE__, cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, n), status); + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMalloc(&d_temp_storage, temp_storage_bytes), status); + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + status = bsc_cuda_safe_call(__FILE__, __LINE__, cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, n), status); + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaFree(d_temp_storage), status); + } + } + + K_device_sorted = d_keys.Current(); + V_device_sorted = d_values.Current(); + } + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + unsigned long long lookup; + { + unsigned int lo = (T[4] << 24) | (T[5] << 16) | (T[6] << 8) | T[7]; + unsigned int hi = (T[0] << 24) | (T[1] << 16) | (T[2] << 8) | T[3]; + + lookup = (((unsigned long long)hi) << 32) | ((unsigned long long)lo); + + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(V_device_sorted + ((n + sizeof(int) - 1) / sizeof(int)) * sizeof(int), &n, sizeof(int), cudaMemcpyHostToDevice), status); + } + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + bsc_st8_encode_cuda_postsort<<>>(K_device_sorted, n, lookup, (int *)(V_device_sorted + ((n + sizeof(int) - 1) / sizeof(int)) * sizeof(int))); + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T, V_device_sorted, n + 2 * sizeof(int), cudaMemcpyDeviceToHost), status); + } + + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaFree(K_device), status); + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaFree(V_device), status); + + if (bsc_cuda_safe_call(__FILE__, __LINE__, status) == cudaSuccess) + { + index = *(int *)(T + ((n + sizeof(int) - 1) / sizeof(int)) * sizeof(int)); + } + + return index; + } + } + cudaFree(K_device); + } + cudaFree(V_device); + } + } + + return index; +} + +int bsc_st_encode_cuda(unsigned char * T, int n, int k, int features) +{ + if ((T == NULL) || (n < 0)) return LIBBSC_BAD_PARAMETER; + if ((k < 5) || (k > 8)) return LIBBSC_BAD_PARAMETER; + if (n <= 1) return 0; + + int num_blocks = 1; + { + cudaDeviceProp deviceProperties; + { + int deviceId; if (cudaGetDevice(&deviceId) != cudaSuccess || cudaGetDeviceProperties(&deviceProperties, deviceId) != cudaSuccess) + { + return LIBBSC_GPU_NOT_SUPPORTED; + } + } + + if (deviceProperties.major * 10 + deviceProperties.minor < 35) return LIBBSC_GPU_NOT_SUPPORTED; + + num_blocks = CUDA_CTA_OCCUPANCY * deviceProperties.multiProcessorCount; + + if (num_blocks > ((n + CUDA_NUM_THREADS_IN_BLOCK - 1) / CUDA_NUM_THREADS_IN_BLOCK)) num_blocks = (n + CUDA_NUM_THREADS_IN_BLOCK - 1) / CUDA_NUM_THREADS_IN_BLOCK; + if (num_blocks <= 0) num_blocks = 1; + } + + #ifdef LIBBSC_OPENMP + omp_set_lock(&cuda_lock); + #endif + + int index = LIBBSC_GPU_NOT_ENOUGH_MEMORY; + { + unsigned char * T_device = NULL; + if (cudaMalloc((void **)&T_device, n + 2 * CUDA_DEVICE_PADDING) == cudaSuccess) + { + index = LIBBSC_GPU_ERROR; + + cudaError_t status = cudaSuccess; + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T_device + CUDA_DEVICE_PADDING , T , n , cudaMemcpyHostToDevice ), status); + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T_device + CUDA_DEVICE_PADDING + n, T_device + CUDA_DEVICE_PADDING, CUDA_DEVICE_PADDING, cudaMemcpyDeviceToDevice), status); + status = bsc_cuda_safe_call(__FILE__, __LINE__, cudaMemcpy(T_device , T_device + n , CUDA_DEVICE_PADDING, cudaMemcpyDeviceToDevice), status); + + if (status == cudaSuccess) + { + if (k >= 5 && k <= 7) index = bsc_st567_encode_cuda(T, T_device + CUDA_DEVICE_PADDING, n, num_blocks, k); + if (k == 8) index = bsc_st8_encode_cuda (T, T_device + CUDA_DEVICE_PADDING, n, num_blocks ); + } + + cudaFree(T_device); + } + } + + #ifdef LIBBSC_OPENMP + omp_unset_lock(&cuda_lock); + #endif + + return index; +} + +#endif + +/*-----------------------------------------------------------*/ +/* End st.cu */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/st/st.cuh b/libbsc/libbsc/st/st.cuh new file mode 100644 index 00000000..b3c0e628 --- /dev/null +++ b/libbsc/libbsc/st/st.cuh @@ -0,0 +1,69 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to Sort Transform (GPU version) */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_ST_CUH +#define _LIBBSC_ST_CUH + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(LIBBSC_SORT_TRANSFORM_SUPPORT) && defined(LIBBSC_CUDA_SUPPORT) + + /** + * You should call this function before you call any of the other functions in st. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_st_cuda_init(int features); + + /** + * Constructs the Sort Transform of order k transformed string of a given string. + * @param T - the input/output string of n chars. + * @param n - the length of the given string. + * @param k[3..8] - the order of Sort Transform. + * @param features - the set of additional features. + * @return the primary index if no error occurred, error code otherwise. + */ + int bsc_st_encode_cuda(unsigned char * T, int n, int k, int features); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End st.cuh */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/libbsc/st/st.h b/libbsc/libbsc/st/st.h new file mode 100644 index 00000000..f405e4c3 --- /dev/null +++ b/libbsc/libbsc/st/st.h @@ -0,0 +1,80 @@ +/*-----------------------------------------------------------*/ +/* Block Sorting, Lossless Data Compression Library. */ +/* Interface to Sort Transform */ +/*-----------------------------------------------------------*/ + +/*-- + +This file is a part of bsc and/or libbsc, a program and a library for +lossless, block-sorting data compression. + + Copyright (c) 2009-2021 Ilya Grebnov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Please see the file LICENSE for full copyright information and file AUTHORS +for full list of contributors. + +See also the bsc and libbsc web site: + http://libbsc.com/ for more information. + +--*/ + +#ifndef _LIBBSC_ST_H +#define _LIBBSC_ST_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef LIBBSC_SORT_TRANSFORM_SUPPORT + + /** + * You should call this function before you call any of the other functions in st. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_st_init(int features); + + /** + * Constructs the Sort Transform of order k transformed string of a given string. + * @param T - the input/output string of n chars. + * @param n - the length of the given string. + * @param k[3..8] - the order of Sort Transform. + * @param features - the set of additional features. + * @return the primary index if no error occurred, error code otherwise. + */ + int bsc_st_encode(unsigned char * T, int n, int k, int features); + + /** + * Reconstructs the original string from Sort Transform of order k transformed string. + * @param T - the input/output string of n chars. + * @param n - the length of the given string. + * @param k[3..8] - the order of Sort Transform. + * @param index - the primary index. + * @param features - the set of additional features. + * @return LIBBSC_NO_ERROR if no error occurred, error code otherwise. + */ + int bsc_st_decode(unsigned char * T, int n, int k, int index, int features); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +/*-----------------------------------------------------------*/ +/* End st.h */ +/*-----------------------------------------------------------*/ diff --git a/libbsc/makefile b/libbsc/makefile new file mode 100644 index 00000000..891ffa9b --- /dev/null +++ b/libbsc/makefile @@ -0,0 +1,125 @@ +SHELL = /bin/sh + +CC = g++ +AR = ar +RANLIB = ranlib + +CFLAGS = -g -Wall -std=c++11 +NVCCFLAGS = -g -std=c++11 + +# CUDA support +CFLAGS += -DLIBBSC_CUDA_SUPPORT +NVCCFLAGS += -DLIBBSC_CUDA_SUPPORT + +# Comment out CFLAGS line below for compatability mode for 32bit file sizes +# (less than 2GB) and systems that have compilers that treat int as 64bit +# natively (ie: modern AIX) +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +NVCCFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 + +# Comment out CFLAGS line below to disable optimizations +CFLAGS += -O3 -fomit-frame-pointer -fstrict-aliasing -ffast-math +NVCCFLAGS += -O3 + +# Comment out CFLAGS line below to disable AVX2 instruction set (performance will suffer) +CFLAGS += -mavx2 + +# Comment out CFLAGS line below to disable OpenMP optimizations +CFLAGS += -fopenmp -DLIBBSC_OPENMP_SUPPORT + +# Comment out CFLAGS line below to enable debug output +CFLAGS += -DNDEBUG +NVCCFLAGS += -DNDEBUG + +# Comment out CFLAGS line below to disable Sort Transform +CFLAGS += -DLIBBSC_SORT_TRANSFORM_SUPPORT +NVCCFLAGS += -DLIBBSC_SORT_TRANSFORM_SUPPORT + +# Comment out CFLAGS line below to disable unaligned memory access +CFLAGS += -DLIBBSC_ALLOW_UNALIGNED_ACCESS +NVCCFLAGS += -DLIBBSC_ALLOW_UNALIGNED_ACCESS + +# Where you want bsc installed when you do 'make install' +PREFIX = /usr + +OBJS = \ + adler32.o \ + libsais.o \ + bwt.o \ + coder.o \ + qlfc.o \ + qlfc_model.o \ + detectors.o \ + preprocessing.o \ + libbsc.o \ + lzp.o \ + platform.o \ + st.o \ + st_cu.o \ + +all: libbsc.a bsc + +bsc: libbsc.a bsc.cpp + $(CC) $(CFLAGS) bsc.cpp -o bsc -L. -lbsc -L /usr/local/cuda/lib64 -l cudart -l cuda + +libbsc.a: $(OBJS) + rm -f libbsc.a + $(AR) cq libbsc.a $(OBJS) + @if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \ + -f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \ + echo $(RANLIB) libbsc.a ; \ + $(RANLIB) libbsc.a ; \ + fi + +install: libbsc.a bsc + if ( test ! -d $(DESTDIR)$(PREFIX)/bin ) ; then mkdir -p $(DESTDIR)$(PREFIX)/bin ; fi + if ( test ! -d $(DESTDIR)$(PREFIX)/lib ) ; then mkdir -p $(DESTDIR)$(PREFIX)/lib ; fi + if ( test ! -d $(DESTDIR)$(PREFIX)/include ) ; then mkdir -p $(DESTDIR)$(PREFIX)/include ; fi + cp -f bsc $(DESTDIR)$(PREFIX)/bin/bsc + chmod a+x $(DESTDIR)$(PREFIX)/bin/bsc + cp -f libbsc/libbsc.h $(DESTDIR)$(PREFIX)/include + chmod a+r $(DESTDIR)$(PREFIX)/include/libbsc.h + cp -f libbsc.a $(DESTDIR)$(PREFIX)/lib + chmod a+r $(DESTDIR)$(PREFIX)/lib/libbsc.a + +clean: + rm -f *.o libbsc.a bsc + +adler32.o: libbsc/adler32/adler32.cpp + $(CC) $(CFLAGS) -c libbsc/adler32/adler32.cpp + +libsais.o: libbsc/bwt/libsais/libsais.c + $(CC) $(CFLAGS) -c libbsc/bwt/libsais/libsais.c + +bwt.o: libbsc/bwt/bwt.cpp + $(CC) $(CFLAGS) -c libbsc/bwt/bwt.cpp + +coder.o: libbsc/coder/coder.cpp + $(CC) $(CFLAGS) -c libbsc/coder/coder.cpp + +qlfc.o: libbsc/coder/qlfc/qlfc.cpp + $(CC) $(CFLAGS) -c libbsc/coder/qlfc/qlfc.cpp + +qlfc_model.o: libbsc/coder/qlfc/qlfc_model.cpp + $(CC) $(CFLAGS) -c libbsc/coder/qlfc/qlfc_model.cpp + +detectors.o: libbsc/filters/detectors.cpp + $(CC) $(CFLAGS) -c libbsc/filters/detectors.cpp + +preprocessing.o: libbsc/filters/preprocessing.cpp + $(CC) $(CFLAGS) -c libbsc/filters/preprocessing.cpp + +libbsc.o: libbsc/libbsc/libbsc.cpp + $(CC) $(CFLAGS) -c libbsc/libbsc/libbsc.cpp + +lzp.o: libbsc/lzp/lzp.cpp + $(CC) $(CFLAGS) -c libbsc/lzp/lzp.cpp + +platform.o: libbsc/platform/platform.cpp + $(CC) $(CFLAGS) -c libbsc/platform/platform.cpp + +st.o: libbsc/st/st.cpp + $(CC) $(CFLAGS) -c libbsc/st/st.cpp + +st_cu.o: libbsc/st/st.cu + nvcc $(NVCCFLAGS) -c -arch=sm_75 libbsc/st/st.cu -o st_cu.o