diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 602f570..50c4405 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -22,8 +22,21 @@ jobs: - 6 clang: - 12 + cmake: + - 3.20.2 + ninja: + - 1.10.2 + CC: + - clang + CXX: + - clang++ + env: + CC: ${{ matrix.CC }} + CXX: ${{ matrix.CXX }} steps: - uses: actions/checkout@v2 + with: + submodules: 'true' # Cache - name: Cache @@ -38,17 +51,24 @@ jobs: ./.dub ./llvm C:/Program Files/LLVM - key: "cache-D:${{ matrix.d }}-OS:${{ matrix.os }}-Clang:${{ matrix.clang }}" + key: "cache-OS:${{ matrix.os }}-D:${{ matrix.d }}-Clang:${{ matrix.clang }}-dub:${{ hashFiles('./dub.selections.json')}}-pnpm:${{ hashFiles('./pnpm-lock.yaml') }}" + restore-keys: | + "cache-OS:${{ matrix.os }}-D:${{ matrix.d }}-Clang:${{ matrix.clang }}" # Setup compilers and tools - name: Setup LLVM - if: contains(matrix.os, 'ubuntu') && matrix.clang uses: KyleMayes/install-llvm-action@v1 with: version: ${{ matrix.clang }} cached: ${{ steps.cache.outputs.cache-hit }} + - name: Setup Cmake and Ninja + uses: aminya/install-cmake@new-versions-and-arch + with: + cmake: ${{ matrix.cmake }} + ninja: ${{ matrix.ninja }} + - name: Setup Node uses: actions/setup-node@v2 with: diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a35beda --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "src/native/despacer"] + path = src/native/despacer + url = https://github.com/aminya/despacer + branch = minijson diff --git a/Readme.md b/Readme.md index 74230c7..aec360d 100644 --- a/Readme.md +++ b/Readme.md @@ -2,7 +2,7 @@ Minify JSON files **blazing fast**! Supports Comments. Written in D. -385 times faster than jsonminify! +4180 times faster than jsonminify! [![CI](https://github.com/aminya/minijson/actions/workflows/CI.yml/badge.svg)](https://github.com/aminya/minijson/actions/workflows/CI.yml) @@ -23,6 +23,7 @@ https://github.com/aminya/minijson/releases/tag/v0.5.1 - Dub ``` +git submodule update --init --recursive dub build --config=library --build=release-nobounds --compiler=ldc2 # or dub build --config=executable --build=release-nobounds --compiler=ldc2 @@ -90,12 +91,21 @@ minifyFiles(["file1.json", "file2.json"], true); On AMD Ryzen 7 4800H: +- minifyString: minijson is 4178 times faster than jsonMinify +- minifyFiles: minijson is 1198 times faster than jsonMinify. + ``` -❯ node .\benchmark\native-benchmark.mjs -0.152 seconds +❯ .\dist\minijson-benchmark.exe --benchmark-minifyString --benchmark-minifyFiles +Benchmark minifyFiles +49 ms +Benchmark minifyString +14 ms ❯ node .\benchmark\js-benchmark.mjs -58.818 seconds +Benchmark minifyString +58.502 seconds +Benchmark minifyFiles +58.703 seconds ``` ### Contributing diff --git a/benchmark/benchmark.d b/benchmark/benchmark.d index 38dca4a..8b4b0d8 100644 --- a/benchmark/benchmark.d +++ b/benchmark/benchmark.d @@ -9,8 +9,10 @@ void main(string[] args) { bool benchmarkMinifyFiles = false; bool benchmarkMinifyString = true; + bool benchmarkParallelMinifyString = false; - getopt(args, "benchmark-minifyFiles", &benchmarkMinifyFiles, "benchmark-minifyString", &benchmarkMinifyString); + getopt(args, "benchmark-minifyFiles", &benchmarkMinifyFiles, "benchmark-minifyString", + &benchmarkMinifyString, "benchmark-parallel-minifyString", &benchmarkParallelMinifyString); const string[] files = dirEntries("./test/fixtures/standard", SpanMode.shallow).map!(entry => entry.name).array(); @@ -36,7 +38,7 @@ void main(string[] args) if (benchmarkMinifyString) { - writeln("Benchmark minifyString"); + writeln("Benchmark minifyString single-threaded"); const repeat = 120; auto repeater = iota(repeat); string tmp; @@ -52,5 +54,23 @@ void main(string[] args) result = sw.peek(); writeln(result / repeat); + + if (benchmarkParallelMinifyString) + { + writeln("Benchmark minifyString multi-threaded"); + auto repeater2 = iota(repeat); + + sw.reset(); + foreach (_; repeater2) + { + foreach (fileContent; filesContent.parallel()) + { + tmp = minifyString(fileContent); + } + } + result = sw.peek(); + + writeln(result / repeat); + } } } diff --git a/benchmark/js-benchmark.mjs b/benchmark/js-benchmark.mjs index 01ac9f9..3b065a4 100644 --- a/benchmark/js-benchmark.mjs +++ b/benchmark/js-benchmark.mjs @@ -7,6 +7,25 @@ import { standardFiles } from "../test/fixtures.mjs" // warmup const tmp = await jsonMinify("{}") +console.log("Benchmark minifyString") + +const filesContents = await Promise.all( + standardFiles.map(async (jsonFile) => { + return readFile(jsonFile, "utf8") + }) +) + +const t11 = performance.now() + +for (const fileContent of filesContents) { + const data = jsonMinify(fileContent) +} + +const t22 = performance.now() +console.log(((t22 - t11) / 1000).toFixed(3), "seconds") + +console.log("Benchmark minifyFiles") + const t1 = performance.now() await Promise.all( diff --git a/dub.sdl b/dub.sdl index 9fcf435..d228f09 100644 --- a/dub.sdl +++ b/dub.sdl @@ -10,6 +10,8 @@ sourcePaths "./src/native" importPaths "./src/native" dependency "automem" version="~>0.6.6" +preGenerateCommands "git submodule update --init" # despacer download +dependency "despacer" path="./src/native/despacer/bindings/d" configuration "executable" { targetType "executable" diff --git a/package.json b/package.json index 330c546..bcfbf45 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "build.release": "pnpm build -- --build release-nobounds --compiler=ldc2", "build.profile": "pnpm build -- --build profile --compiler=ldc2 && node ./src/node/build.js && npm run build.node.js", "build.benchmark": "dub build --config=benchmark --build release-nobounds --compiler=ldc2", - "start.profile": "shx rm -rf ./trace.* && npm run start.benchmark && profdump.exe --dot trace.log trace.dot && dot -Tsvg trace.dot -o trace.svg && ./trace.svg", + "start.profile": "shx rm -rf ./trace.* && npm run start.benchmark.node && profdump.exe --dot trace.log trace.dot && dot -Tsvg trace.dot -o trace.svg && ./trace.svg", "build.node": "npm run build.release && node ./src/node/build.js && npm run build.node.js", "build.node.js": "tsc -p ./src/node/tsconfig.json", "build.wasm": "ldc2 ./src/wasm/wasm.d ./src/native/lib.d --od ./dist --O3 --mtriple=wasm32-unknown-unknown-wasm", diff --git a/src/native/despacer b/src/native/despacer new file mode 160000 index 0000000..f1acbdb --- /dev/null +++ b/src/native/despacer @@ -0,0 +1 @@ +Subproject commit f1acbdb05dfe9b32d00a6a68c082923019def073 diff --git a/src/native/lib.d b/src/native/lib.d index 00611a4..c75d038 100644 --- a/src/native/lib.d +++ b/src/native/lib.d @@ -1,12 +1,12 @@ module minijson.lib; -import std : ctRegex, replaceAll, join, array, matchAll, matchFirst, RegexMatch; +import std : ctRegex, matchAll, matchFirst, toStringz; + +import despacer.simd_check : supports_sse4_1, supports_avx2; const tokenizerWithComment = ctRegex!(`"|(/\*)|(\*/)|(//)|\n|\r|\[|]`, "g"); const tokenizerNoComment = ctRegex!(`[\n\r"[]]`, "g"); -const spaceOrBreakRegex = ctRegex!(`\s`); - /** Minify the given JSON string @@ -49,7 +49,7 @@ string minifyString(in string jsonString, in bool hasComment = false) @trusted const noLeftContext = leftContextSubstr.length == 0; if (!in_string && !noLeftContext) { - leftContextSubstr = leftContextSubstr.replaceAll(spaceOrBreakRegex, ""); + leftContextSubstr = remove_spaces(leftContextSubstr); } if (!noLeftContext) { @@ -122,9 +122,45 @@ private bool hasNoSlashOrEvenNumberOfSlashes(in string leftContextSubstr) @safe return slashCount % 2 == 0; } -private bool notSlashAndNoSpaceOrBreak(in string matchFrontHit) @safe +private bool notSlashAndNoSpaceOrBreak(const ref string matchFrontHit) @safe +{ + return matchFrontHit != "\"" && hasNoSpace(matchFrontHit); +} + +/** Removes spaces from the original string */ +private string remove_spaces(string str) @trusted nothrow +{ + static if (supports_sse4_1()) + { + import despacer.despacer : sse4_despace_branchless_u4; + + // this wrapper reduces the overall time by 15 compared to d_sse4_despace_branchless_u4 because of no dup and toStringz + auto cstr = cast(char*) str; + const length = str.length; + return str[0 .. sse4_despace_branchless_u4(cstr, length)]; + } + else + { + const spaceOrBreakRegex = ctRegex!(`\s`); + leftContextSubstr.replaceAll(spaceOrBreakRegex, ""); + } +} + +/** Check if the given string has space */ +private bool hasNoSpace(const ref string matchFrontHit) @trusted { - return matchFrontHit != "\"" && matchFrontHit.matchFirst(spaceOrBreakRegex).empty(); + static if (supports_avx2()) + { + import despacer.despacer : avx2_hasspace; + + // the algorithm never checks for zero termination so toStringz is not needed + return !avx2_hasspace(cast(const char*) matchFrontHit, matchFrontHit.length); + } + else + { + const spaceOrBreakRegex = ctRegex!(`\s`); + return matchFrontHit.matchFirst(spaceOrBreakRegex).empty(); + } } /**