Skip to content

Commit

Permalink
Merge pull request #17 from aminya/despacer
Browse files Browse the repository at this point in the history
  • Loading branch information
aminya authored Jul 11, 2021
2 parents 2b912a1 + 69a0660 commit 96cc75f
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 15 deletions.
24 changes: 22 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,21 @@ jobs:
- 6
clang:
- 12
cmake:
- 3.20.2
ninja:
- 1.10.2
CC:
- clang
CXX:
- clang++
env:
CC: ${{ matrix.CC }}
CXX: ${{ matrix.CXX }}
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'

# Cache
- name: Cache
Expand All @@ -38,17 +51,24 @@ jobs:
./.dub
./llvm
C:/Program Files/LLVM
key: "cache-D:${{ matrix.d }}-OS:${{ matrix.os }}-Clang:${{ matrix.clang }}"
key: "cache-OS:${{ matrix.os }}-D:${{ matrix.d }}-Clang:${{ matrix.clang }}-dub:${{ hashFiles('./dub.selections.json')}}-pnpm:${{ hashFiles('./pnpm-lock.yaml') }}"
restore-keys: |
"cache-OS:${{ matrix.os }}-D:${{ matrix.d }}-Clang:${{ matrix.clang }}"
# Setup compilers and tools

- name: Setup LLVM
if: contains(matrix.os, 'ubuntu') && matrix.clang
uses: KyleMayes/install-llvm-action@v1
with:
version: ${{ matrix.clang }}
cached: ${{ steps.cache.outputs.cache-hit }}

- name: Setup Cmake and Ninja
uses: aminya/install-cmake@new-versions-and-arch
with:
cmake: ${{ matrix.cmake }}
ninja: ${{ matrix.ninja }}

- name: Setup Node
uses: actions/setup-node@v2
with:
Expand Down
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "src/native/despacer"]
path = src/native/despacer
url = https://github.com/aminya/despacer
branch = minijson
18 changes: 14 additions & 4 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Minify JSON files **blazing fast**! Supports Comments. Written in D.

385 times faster than jsonminify!
4180 times faster than jsonminify!

[![CI](https://github.com/aminya/minijson/actions/workflows/CI.yml/badge.svg)](https://github.com/aminya/minijson/actions/workflows/CI.yml)

Expand All @@ -23,6 +23,7 @@ https://github.com/aminya/minijson/releases/tag/v0.5.1
- Dub

```
git submodule update --init --recursive
dub build --config=library --build=release-nobounds --compiler=ldc2
# or
dub build --config=executable --build=release-nobounds --compiler=ldc2
Expand Down Expand Up @@ -90,12 +91,21 @@ minifyFiles(["file1.json", "file2.json"], true);

On AMD Ryzen 7 4800H:

- minifyString: minijson is 4178 times faster than jsonMinify
- minifyFiles: minijson is 1198 times faster than jsonMinify.

```
❯ node .\benchmark\native-benchmark.mjs
0.152 seconds
❯ .\dist\minijson-benchmark.exe --benchmark-minifyString --benchmark-minifyFiles
Benchmark minifyFiles
49 ms
Benchmark minifyString
14 ms
❯ node .\benchmark\js-benchmark.mjs
58.818 seconds
Benchmark minifyString
58.502 seconds
Benchmark minifyFiles
58.703 seconds
```

### Contributing
Expand Down
24 changes: 22 additions & 2 deletions benchmark/benchmark.d
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ void main(string[] args)
{
bool benchmarkMinifyFiles = false;
bool benchmarkMinifyString = true;
bool benchmarkParallelMinifyString = false;

getopt(args, "benchmark-minifyFiles", &benchmarkMinifyFiles, "benchmark-minifyString", &benchmarkMinifyString);
getopt(args, "benchmark-minifyFiles", &benchmarkMinifyFiles, "benchmark-minifyString",
&benchmarkMinifyString, "benchmark-parallel-minifyString", &benchmarkParallelMinifyString);

const string[] files = dirEntries("./test/fixtures/standard", SpanMode.shallow).map!(entry => entry.name).array();

Expand All @@ -36,7 +38,7 @@ void main(string[] args)

if (benchmarkMinifyString)
{
writeln("Benchmark minifyString");
writeln("Benchmark minifyString single-threaded");
const repeat = 120;
auto repeater = iota(repeat);
string tmp;
Expand All @@ -52,5 +54,23 @@ void main(string[] args)
result = sw.peek();

writeln(result / repeat);

if (benchmarkParallelMinifyString)
{
writeln("Benchmark minifyString multi-threaded");
auto repeater2 = iota(repeat);

sw.reset();
foreach (_; repeater2)
{
foreach (fileContent; filesContent.parallel())
{
tmp = minifyString(fileContent);
}
}
result = sw.peek();

writeln(result / repeat);
}
}
}
19 changes: 19 additions & 0 deletions benchmark/js-benchmark.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,25 @@ import { standardFiles } from "../test/fixtures.mjs"
// warmup
const tmp = await jsonMinify("{}")

console.log("Benchmark minifyString")

const filesContents = await Promise.all(
standardFiles.map(async (jsonFile) => {
return readFile(jsonFile, "utf8")
})
)

const t11 = performance.now()

for (const fileContent of filesContents) {
const data = jsonMinify(fileContent)
}

const t22 = performance.now()
console.log(((t22 - t11) / 1000).toFixed(3), "seconds")

console.log("Benchmark minifyFiles")

const t1 = performance.now()

await Promise.all(
Expand Down
2 changes: 2 additions & 0 deletions dub.sdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ sourcePaths "./src/native"
importPaths "./src/native"

dependency "automem" version="~>0.6.6"
preGenerateCommands "git submodule update --init" # despacer download
dependency "despacer" path="./src/native/despacer/bindings/d"

configuration "executable" {
targetType "executable"
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"build.release": "pnpm build -- --build release-nobounds --compiler=ldc2",
"build.profile": "pnpm build -- --build profile --compiler=ldc2 && node ./src/node/build.js && npm run build.node.js",
"build.benchmark": "dub build --config=benchmark --build release-nobounds --compiler=ldc2",
"start.profile": "shx rm -rf ./trace.* && npm run start.benchmark && profdump.exe --dot trace.log trace.dot && dot -Tsvg trace.dot -o trace.svg && ./trace.svg",
"start.profile": "shx rm -rf ./trace.* && npm run start.benchmark.node && profdump.exe --dot trace.log trace.dot && dot -Tsvg trace.dot -o trace.svg && ./trace.svg",
"build.node": "npm run build.release && node ./src/node/build.js && npm run build.node.js",
"build.node.js": "tsc -p ./src/node/tsconfig.json",
"build.wasm": "ldc2 ./src/wasm/wasm.d ./src/native/lib.d --od ./dist --O3 --mtriple=wasm32-unknown-unknown-wasm",
Expand Down
1 change: 1 addition & 0 deletions src/native/despacer
Submodule despacer added at f1acbd
48 changes: 42 additions & 6 deletions src/native/lib.d
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
module minijson.lib;

import std : ctRegex, replaceAll, join, array, matchAll, matchFirst, RegexMatch;
import std : ctRegex, matchAll, matchFirst, toStringz;

import despacer.simd_check : supports_sse4_1, supports_avx2;

const tokenizerWithComment = ctRegex!(`"|(/\*)|(\*/)|(//)|\n|\r|\[|]`, "g");
const tokenizerNoComment = ctRegex!(`[\n\r"[]]`, "g");

const spaceOrBreakRegex = ctRegex!(`\s`);

/**
Minify the given JSON string
Expand Down Expand Up @@ -49,7 +49,7 @@ string minifyString(in string jsonString, in bool hasComment = false) @trusted
const noLeftContext = leftContextSubstr.length == 0;
if (!in_string && !noLeftContext)
{
leftContextSubstr = leftContextSubstr.replaceAll(spaceOrBreakRegex, "");
leftContextSubstr = remove_spaces(leftContextSubstr);
}
if (!noLeftContext)
{
Expand Down Expand Up @@ -122,9 +122,45 @@ private bool hasNoSlashOrEvenNumberOfSlashes(in string leftContextSubstr) @safe
return slashCount % 2 == 0;
}

private bool notSlashAndNoSpaceOrBreak(in string matchFrontHit) @safe
private bool notSlashAndNoSpaceOrBreak(const ref string matchFrontHit) @safe
{
return matchFrontHit != "\"" && hasNoSpace(matchFrontHit);
}

/** Removes spaces from the original string */
private string remove_spaces(string str) @trusted nothrow
{
static if (supports_sse4_1())
{
import despacer.despacer : sse4_despace_branchless_u4;

// this wrapper reduces the overall time by 15 compared to d_sse4_despace_branchless_u4 because of no dup and toStringz
auto cstr = cast(char*) str;
const length = str.length;
return str[0 .. sse4_despace_branchless_u4(cstr, length)];
}
else
{
const spaceOrBreakRegex = ctRegex!(`\s`);
leftContextSubstr.replaceAll(spaceOrBreakRegex, "");
}
}

/** Check if the given string has space */
private bool hasNoSpace(const ref string matchFrontHit) @trusted
{
return matchFrontHit != "\"" && matchFrontHit.matchFirst(spaceOrBreakRegex).empty();
static if (supports_avx2())
{
import despacer.despacer : avx2_hasspace;

// the algorithm never checks for zero termination so toStringz is not needed
return !avx2_hasspace(cast(const char*) matchFrontHit, matchFrontHit.length);
}
else
{
const spaceOrBreakRegex = ctRegex!(`\s`);
return matchFrontHit.matchFirst(spaceOrBreakRegex).empty();
}
}

/**
Expand Down

0 comments on commit 96cc75f

Please sign in to comment.