diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml new file mode 100644 index 0000000..04506b1 --- /dev/null +++ b/.github/workflows/doc.yml @@ -0,0 +1,27 @@ +name: documentation + +on: [push, workflow_dispatch] + +permissions: + contents: write + +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Install dependencies + run: | + pip install sphinx sphinx_rtd_theme myst_parser + - name: Sphinx build + run: | + sphinx-build doc _build + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + if: ${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/doc' }} + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: _build/ + force_orphan: true diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..7bf72f5 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,28 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Example' +copyright = 'workshop participant' +author = 'workshop participant' +release = '0.1' + + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ['myst_parser'] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..a8f18e7 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,14 @@ +.. Example documentation master file, created by + sphinx-quickstart on Sat Sep 23 20:35:12 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Example's documentation! +=================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + markdown/overview.md + markdown/another-feature.md diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..32bb245 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/markdown/another-feature.md b/doc/markdown/another-feature.md new file mode 100644 index 0000000..7a4c041 --- /dev/null +++ b/doc/markdown/another-feature.md @@ -0,0 +1,57 @@ +# More features here + + +## Some text + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod +tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, +quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo +consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse +cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non +proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +## Table + +| No. | Prime | +| ---- | ------ | +| 1 | No | +| 2 | Yes | +| 3 | Yes | +| 4 | No | + + + +## Code blocks + +The following is a Python code block: +```python + def hello(): + print("Hello world") +``` + +And this is a C code block: +```c +#include +int main() +{ + printf("Hello, World!"); + return 0; +} +``` + + +## Math + +This creates an equation: +```{math} +a^2 + b^2 = c^2 +``` + +This is an in-line equation, {math}`a^2 + b^2 = c^2`, embedded in text. + + +```{image} media/vector_insertion_3000_vectors.png +:alt: Select Parameters +:align: center +``` \ No newline at end of file diff --git a/doc/markdown/media/bench_result_20000_vectors b/doc/markdown/media/bench_result_20000_vectors new file mode 100644 index 0000000..95df25a --- /dev/null +++ b/doc/markdown/media/bench_result_20000_vectors @@ -0,0 +1,81 @@ +Using local vectorlite: ../build/release/vectorlite/vectorlite.so +Benchmarking using 20000 randomly vectors. 100 10-neariest neighbor queries will be performed on each case. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓ +┃ distance ┃ vector ┃ ef ┃ ┃ ef ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ construction ┃ M ┃ search ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩ +│ l2 │ 128 │ 100 │ 30 │ 10 │ 187.41 us │ 46.58 us │ 29.10% │ +│ l2 │ 128 │ 100 │ 30 │ 50 │ 187.41 us │ 95.16 us │ 70.20% │ +│ l2 │ 128 │ 100 │ 30 │ 100 │ 187.41 us │ 179.51 us │ 85.70% │ +│ l2 │ 512 │ 100 │ 30 │ 10 │ 820.80 us │ 105.80 us │ 18.10% │ +│ l2 │ 512 │ 100 │ 30 │ 50 │ 820.80 us │ 361.83 us │ 50.40% │ +│ l2 │ 512 │ 100 │ 30 │ 100 │ 820.80 us │ 628.88 us │ 67.00% │ +│ l2 │ 1536 │ 100 │ 30 │ 10 │ 2665.31 us │ 292.39 us │ 13.70% │ +│ l2 │ 1536 │ 100 │ 30 │ 50 │ 2665.31 us │ 1069.47 us │ 42.40% │ +│ l2 │ 1536 │ 100 │ 30 │ 100 │ 2665.31 us │ 1744.79 us │ 59.50% │ +│ l2 │ 3000 │ 100 │ 30 │ 10 │ 5236.76 us │ 558.56 us │ 13.80% │ +│ l2 │ 3000 │ 100 │ 30 │ 50 │ 5236.76 us │ 1787.83 us │ 39.30% │ +│ l2 │ 3000 │ 100 │ 30 │ 100 │ 5236.76 us │ 3039.94 us │ 56.60% │ +│ cosine │ 128 │ 100 │ 30 │ 10 │ 164.31 us │ 25.35 us │ 34.70% │ +│ cosine │ 128 │ 100 │ 30 │ 50 │ 164.31 us │ 78.33 us │ 71.20% │ +│ cosine │ 128 │ 100 │ 30 │ 100 │ 164.31 us │ 133.75 us │ 87.60% │ +│ cosine │ 512 │ 100 │ 30 │ 10 │ 711.35 us │ 100.90 us │ 19.00% │ +│ cosine │ 512 │ 100 │ 30 │ 50 │ 711.35 us │ 406.08 us │ 51.10% │ +│ cosine │ 512 │ 100 │ 30 │ 100 │ 711.35 us │ 582.51 us │ 71.50% │ +│ cosine │ 1536 │ 100 │ 30 │ 10 │ 2263.96 us │ 283.88 us │ 22.60% │ +│ cosine │ 1536 │ 100 │ 30 │ 50 │ 2263.96 us │ 919.98 us │ 54.50% │ +│ cosine │ 1536 │ 100 │ 30 │ 100 │ 2263.96 us │ 1674.77 us │ 72.40% │ +│ cosine │ 3000 │ 100 │ 30 │ 10 │ 4541.09 us │ 566.31 us │ 19.80% │ +│ cosine │ 3000 │ 100 │ 30 │ 50 │ 4541.09 us │ 1672.82 us │ 49.30% │ +│ cosine │ 3000 │ 100 │ 30 │ 100 │ 4541.09 us │ 2855.43 us │ 65.40% │ +└──────────┴───────────┴──────────────┴────┴────────┴─────────────┴─────────────┴────────┘ +Bencharmk hnswlib as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓ +┃ distance ┃ vector ┃ ef ┃ ┃ ef ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ construction ┃ M ┃ search ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩ +│ l2 │ 128 │ 100 │ 30 │ 10 │ 23.06 us │ 39.96 us │ 29.60% │ +│ l2 │ 128 │ 100 │ 30 │ 50 │ 23.06 us │ 75.02 us │ 69.80% │ +│ l2 │ 128 │ 100 │ 30 │ 100 │ 23.06 us │ 160.01 us │ 85.40% │ +│ l2 │ 512 │ 100 │ 30 │ 10 │ 146.58 us │ 167.31 us │ 18.10% │ +│ l2 │ 512 │ 100 │ 30 │ 50 │ 146.58 us │ 392.12 us │ 50.80% │ +│ l2 │ 512 │ 100 │ 30 │ 100 │ 146.58 us │ 781.50 us │ 67.20% │ +│ l2 │ 1536 │ 100 │ 30 │ 10 │ 657.41 us │ 298.71 us │ 12.70% │ +│ l2 │ 1536 │ 100 │ 30 │ 50 │ 657.41 us │ 1031.61 us │ 40.60% │ +│ l2 │ 1536 │ 100 │ 30 │ 100 │ 657.41 us │ 1764.34 us │ 57.90% │ +│ l2 │ 3000 │ 100 │ 30 │ 10 │ 1842.77 us │ 852.88 us │ 13.80% │ +│ l2 │ 3000 │ 100 │ 30 │ 50 │ 1842.77 us │ 2905.57 us │ 39.60% │ +│ l2 │ 3000 │ 100 │ 30 │ 100 │ 1842.77 us │ 4936.35 us │ 56.50% │ +│ cosine │ 128 │ 100 │ 30 │ 10 │ 19.25 us │ 23.27 us │ 34.20% │ +│ cosine │ 128 │ 100 │ 30 │ 50 │ 19.25 us │ 72.66 us │ 71.40% │ +│ cosine │ 128 │ 100 │ 30 │ 100 │ 19.25 us │ 134.11 us │ 87.60% │ +│ cosine │ 512 │ 100 │ 30 │ 10 │ 112.80 us │ 106.90 us │ 22.70% │ +│ cosine │ 512 │ 100 │ 30 │ 50 │ 112.80 us │ 341.23 us │ 54.20% │ +│ cosine │ 512 │ 100 │ 30 │ 100 │ 112.80 us │ 609.93 us │ 72.40% │ +│ cosine │ 1536 │ 100 │ 30 │ 10 │ 615.04 us │ 268.00 us │ 22.50% │ +│ cosine │ 1536 │ 100 │ 30 │ 50 │ 615.04 us │ 898.82 us │ 54.00% │ +│ cosine │ 1536 │ 100 │ 30 │ 100 │ 615.04 us │ 1557.51 us │ 71.90% │ +│ cosine │ 3000 │ 100 │ 30 │ 10 │ 1425.49 us │ 546.18 us │ 20.60% │ +│ cosine │ 3000 │ 100 │ 30 │ 50 │ 1425.49 us │ 2008.53 us │ 49.20% │ +│ cosine │ 3000 │ 100 │ 30 │ 100 │ 1425.49 us │ 3106.51 us │ 65.00% │ +└──────────┴───────────┴──────────────┴────┴────────┴─────────────┴─────────────┴────────┘ +Bencharmk vectorlite brute force(select rowid from my_table order by vector_distance(query_vector, embedding, 'l2')) as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ distance ┃ vector ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ l2 │ 128 │ 0.93 us │ 2039.69 us │ 100.00% │ +│ l2 │ 512 │ 2.73 us │ 7177.23 us │ 100.00% │ +│ l2 │ 1536 │ 4.64 us │ 17163.25 us │ 100.00% │ +│ l2 │ 3000 │ 6.62 us │ 25378.79 us │ 100.00% │ +└──────────┴───────────┴─────────────┴─────────────┴─────────┘ +Bencharmk sqlite_vec as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ distance ┃ vector ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ l2 │ 128 │ 3.49 us │ 1560.17 us │ 100.00% │ +│ l2 │ 512 │ 6.73 us │ 7778.39 us │ 100.00% │ +│ l2 │ 1536 │ 17.13 us │ 26344.76 us │ 100.00% │ +│ l2 │ 3000 │ 35.30 us │ 60652.58 us │ 100.00% │ +└──────────┴───────────┴─────────────┴─────────────┴─────────┘ diff --git a/doc/markdown/media/bench_result_3000_vectors b/doc/markdown/media/bench_result_3000_vectors new file mode 100644 index 0000000..9f90b75 --- /dev/null +++ b/doc/markdown/media/bench_result_3000_vectors @@ -0,0 +1,91 @@ +Using local vectorlite: ../build/release/vectorlite/vectorlite.so +Benchmarking using 3000 randomly vectors. 100 10-nearest neighbor queries will be performed on each case. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓ +┃ distance ┃ vector ┃ ef ┃ ┃ ef ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ construction ┃ M ┃ search ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩ +│ l2 │ 128 │ 100 │ 30 │ 10 │ 62.41 us │ 12.96 us │ 56.40% │ +│ l2 │ 128 │ 100 │ 30 │ 50 │ 62.41 us │ 42.95 us │ 93.30% │ +│ l2 │ 128 │ 100 │ 30 │ 100 │ 62.41 us │ 62.06 us │ 99.40% │ +│ l2 │ 512 │ 100 │ 30 │ 10 │ 146.40 us │ 38.05 us │ 46.60% │ +│ l2 │ 512 │ 100 │ 30 │ 50 │ 146.40 us │ 95.96 us │ 86.50% │ +│ l2 │ 512 │ 100 │ 30 │ 100 │ 146.40 us │ 148.46 us │ 96.70% │ +│ l2 │ 1536 │ 100 │ 30 │ 10 │ 463.56 us │ 124.51 us │ 38.10% │ +│ l2 │ 1536 │ 100 │ 30 │ 50 │ 463.56 us │ 355.70 us │ 78.50% │ +│ l2 │ 1536 │ 100 │ 30 │ 100 │ 463.56 us │ 547.84 us │ 92.70% │ +│ l2 │ 3000 │ 100 │ 30 │ 10 │ 1323.25 us │ 391.57 us │ 36.60% │ +│ l2 │ 3000 │ 100 │ 30 │ 50 │ 1323.25 us │ 1041.37 us │ 78.60% │ +│ l2 │ 3000 │ 100 │ 30 │ 100 │ 1323.25 us │ 1443.10 us │ 93.10% │ +│ cosine │ 128 │ 100 │ 30 │ 10 │ 59.75 us │ 15.27 us │ 58.30% │ +│ cosine │ 128 │ 100 │ 30 │ 50 │ 59.75 us │ 36.72 us │ 94.60% │ +│ cosine │ 128 │ 100 │ 30 │ 100 │ 59.75 us │ 63.67 us │ 99.30% │ +│ cosine │ 512 │ 100 │ 30 │ 10 │ 148.19 us │ 36.98 us │ 51.00% │ +│ cosine │ 512 │ 100 │ 30 │ 50 │ 148.19 us │ 102.46 us │ 88.10% │ +│ cosine │ 512 │ 100 │ 30 │ 100 │ 148.19 us │ 143.41 us │ 96.90% │ +│ cosine │ 1536 │ 100 │ 30 │ 10 │ 427.21 us │ 106.94 us │ 42.10% │ +│ cosine │ 1536 │ 100 │ 30 │ 50 │ 427.21 us │ 285.50 us │ 83.30% │ +│ cosine │ 1536 │ 100 │ 30 │ 100 │ 427.21 us │ 441.66 us │ 95.60% │ +│ cosine │ 3000 │ 100 │ 30 │ 10 │ 970.17 us │ 289.00 us │ 42.20% │ +│ cosine │ 3000 │ 100 │ 30 │ 50 │ 970.17 us │ 848.03 us │ 83.90% │ +│ cosine │ 3000 │ 100 │ 30 │ 100 │ 970.17 us │ 1250.29 us │ 95.60% │ +└──────────┴───────────┴──────────────┴────┴────────┴─────────────┴─────────────┴────────┘ +Bencharmk hnswlib as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓ +┃ distance ┃ vector ┃ ef ┃ ┃ ef ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ construction ┃ M ┃ search ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩ +│ l2 │ 128 │ 100 │ 30 │ 10 │ 12.84 us │ 12.83 us │ 56.90% │ +│ l2 │ 128 │ 100 │ 30 │ 50 │ 12.84 us │ 41.93 us │ 93.60% │ +│ l2 │ 128 │ 100 │ 30 │ 100 │ 12.84 us │ 65.84 us │ 99.40% │ +│ l2 │ 512 │ 100 │ 30 │ 10 │ 29.34 us │ 47.37 us │ 47.00% │ +│ l2 │ 512 │ 100 │ 30 │ 50 │ 29.34 us │ 126.29 us │ 86.40% │ +│ l2 │ 512 │ 100 │ 30 │ 100 │ 29.34 us │ 198.30 us │ 96.80% │ +│ l2 │ 1536 │ 100 │ 30 │ 10 │ 90.05 us │ 149.35 us │ 37.20% │ +│ l2 │ 1536 │ 100 │ 30 │ 50 │ 90.05 us │ 431.53 us │ 78.00% │ +│ l2 │ 1536 │ 100 │ 30 │ 100 │ 90.05 us │ 765.03 us │ 92.50% │ +│ l2 │ 3000 │ 100 │ 30 │ 10 │ 388.87 us │ 708.98 us │ 36.30% │ +│ l2 │ 3000 │ 100 │ 30 │ 50 │ 388.87 us │ 1666.87 us │ 78.90% │ +│ l2 │ 3000 │ 100 │ 30 │ 100 │ 388.87 us │ 2489.98 us │ 93.40% │ +│ cosine │ 128 │ 100 │ 30 │ 10 │ 10.90 us │ 11.14 us │ 58.10% │ +│ cosine │ 128 │ 100 │ 30 │ 50 │ 10.90 us │ 37.39 us │ 94.30% │ +│ cosine │ 128 │ 100 │ 30 │ 100 │ 10.90 us │ 62.45 us │ 99.40% │ +│ cosine │ 512 │ 100 │ 30 │ 10 │ 25.46 us │ 38.92 us │ 50.70% │ +│ cosine │ 512 │ 100 │ 30 │ 50 │ 25.46 us │ 109.84 us │ 87.90% │ +│ cosine │ 512 │ 100 │ 30 │ 100 │ 25.46 us │ 151.00 us │ 97.10% │ +│ cosine │ 1536 │ 100 │ 30 │ 10 │ 77.53 us │ 119.48 us │ 42.00% │ +│ cosine │ 1536 │ 100 │ 30 │ 50 │ 77.53 us │ 340.78 us │ 84.00% │ +│ cosine │ 1536 │ 100 │ 30 │ 100 │ 77.53 us │ 510.02 us │ 95.50% │ +│ cosine │ 3000 │ 100 │ 30 │ 10 │ 234.79 us │ 453.12 us │ 43.20% │ +│ cosine │ 3000 │ 100 │ 30 │ 50 │ 234.79 us │ 1380.79 us │ 83.80% │ +│ cosine │ 3000 │ 100 │ 30 │ 100 │ 234.79 us │ 1520.92 us │ 95.70% │ +└──────────┴───────────┴──────────────┴────┴────────┴─────────────┴─────────────┴────────┘ +Bencharmk vectorlite brute force(select rowid from my_table order by vector_distance(query_vector, embedding, 'l2')) as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ distance ┃ vector ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ l2 │ 128 │ 2.38 us │ 299.14 us │ 100.00% │ +│ l2 │ 512 │ 3.69 us │ 571.19 us │ 100.00% │ +│ l2 │ 1536 │ 4.86 us │ 2237.64 us │ 100.00% │ +│ l2 │ 3000 │ 7.69 us │ 5135.63 us │ 100.00% │ +└──────────┴───────────┴─────────────┴─────────────┴─────────┘ +Bencharmk sqlite_vss as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ distance ┃ vector ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ l2 │ 128 │ 395.24 us │ 2508.52 us │ 99.90% │ +│ l2 │ 512 │ 2824.89 us │ 1530.77 us │ 100.00% │ +│ l2 │ 1536 │ 8931.72 us │ 1602.36 us │ 100.00% │ +│ l2 │ 3000 │ 17498.60 us │ 3142.38 us │ 100.00% │ +└──────────┴───────────┴─────────────┴─────────────┴─────────┘ +Bencharmk sqlite_vec as comparison. +┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ distance ┃ vector ┃ insert_time ┃ search_time ┃ recall ┃ +┃ type ┃ dimension ┃ per vector ┃ per query ┃ rate ┃ +┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ l2 │ 128 │ 10.21 us │ 202.05 us │ 100.00% │ +│ l2 │ 512 │ 14.43 us │ 989.64 us │ 100.00% │ +│ l2 │ 1536 │ 31.68 us │ 3856.08 us │ 100.00% │ +│ l2 │ 3000 │ 59.94 us │ 9503.91 us │ 100.00% │ +└──────────┴───────────┴─────────────┴─────────────┴─────────┘ diff --git a/doc/markdown/media/vector_insertion_20000_vectors.png b/doc/markdown/media/vector_insertion_20000_vectors.png new file mode 100644 index 0000000..7380f1c Binary files /dev/null and b/doc/markdown/media/vector_insertion_20000_vectors.png differ diff --git a/doc/markdown/media/vector_insertion_3000_vectors.png b/doc/markdown/media/vector_insertion_3000_vectors.png new file mode 100644 index 0000000..27591c5 Binary files /dev/null and b/doc/markdown/media/vector_insertion_3000_vectors.png differ diff --git a/doc/markdown/media/vector_query_20000_vectors.png b/doc/markdown/media/vector_query_20000_vectors.png new file mode 100644 index 0000000..a0f92fb Binary files /dev/null and b/doc/markdown/media/vector_query_20000_vectors.png differ diff --git a/doc/markdown/media/vector_query_3000_vectors.png b/doc/markdown/media/vector_query_3000_vectors.png new file mode 100644 index 0000000..7106b9b Binary files /dev/null and b/doc/markdown/media/vector_query_3000_vectors.png differ diff --git a/doc/markdown/overview.md b/doc/markdown/overview.md new file mode 100644 index 0000000..88061bc --- /dev/null +++ b/doc/markdown/overview.md @@ -0,0 +1,50 @@ +# Overview of vectorlite +## Quick overview +Vectorlite is a [Runtime-loadable extension](https://www.sqlite.org/loadext.html) for SQLite that enables fast vector search based on [hnswlib](https://github.com/nmslib/hnswlib) and works on Windows, MacOS and Linux. It provides fast vector search capabilities with a SQL interface and runs on every language with a SQLite driver. + +For motivation and background of this project, please check [here](https://dev.to/yefuwang/introducing-vectorlite-a-fast-and-tunable-vector-search-extension-for-sqlite-4dcl). + +Below is an example of using it in sqlite CLI shell: + +```sql +-- Load vectorlite +.load path/to/vectorlite.[so|dll|dylib] +-- shows vectorlite version and build info. +select vectorlite_info(); +-- Calculate vector l2(squared) distance +select vector_distance(vector_from_json('[1,2,3]'), vector_from_json('[3,4,5]'), 'l2'); +-- Create a virtual table named my_table with one vector column my_embedding with dimention of 3 +create virtual table my_table using vectorlite(my_embedding float32[3], hnsw(max_elements=100)); +-- Insert vectors into my_table. rowid can be used to relate to a vector's metadata stored elsewhere, e.g. another table. +insert into my_table(rowid, my_embedding) values (0, vector_from_json('[1,2,3]')); +insert into my_table(rowid, my_embedding) values (1, vector_from_json('[2,3,4]')); +insert into my_table(rowid, my_embedding) values (2, vector_from_json('[7,7,7]')); +-- Find 2 approximate nearest neighbors of vector [3,4,5] with distances +select rowid, distance from my_table where knn_search(my_embedding, knn_param(vector_from_json('[3,4,5]'), 2)); +-- Find the nearest neighbor of vector [3,4,5] among vectors with rowid 0 and 1. (requires sqlite_version>=3.38) +-- It is called metadata filter in vectorlite, because you could get rowid set beforehand based on vectors' metadata and then perform vector search. +-- Metadata filter is pushed down to the underlying index when traversing the HNSW graph. +select rowid, distance from my_table where knn_search(my_embedding, knn_param(vector_from_json('[3,4,5]'), 1)) and rowid in (0, 1) ; + +``` + +Currently, vectorlite is pre-compiled for Windows-x64, Linux-x64, MacOS-x64, MacOS-arm64 and distributed as python wheels and npm packages. It can be installed simply by: +```shell +# For python +pip install vectorlite-py +# for nodejs +npm i vectorlite +``` +For other languages, `vectorlite.[so|dll|dylib]` can be extracted from the wheel for your platform, given that a *.whl file is actually a zip archive. + +Vectorlite is currently in beta. There could be breaking changes. + +## Highlights +1. Fast ANN(approximate nearest neighbors) search backed by [hnswlib](https://github.com/nmslib/hnswlib). Vector query is significantly faster than similar projects like [sqlite-vec](https://github.com/asg017/sqlite-vec) and [sqlite-vss](https://github.com/asg017/sqlite-vss). Please see benchmark [below](https://github.com/1yefuwang1/vectorlite?tab=readme-ov-file#benchmark). +2. Works on Windows, Linux and MacOS(x64 and ARM). +3. A fast and portable SIMD accelerated vector distance implementation using Google's [highway](https://github.com/google/highway) library. On my PC(i5-12600KF with AVX2 support), vectorlite's implementation is 1.5x-3x faster than hnswlib's when dealing vectors with dimension >= 256. +4. Supports all vector distance types provided by hnswlib: l2(squared l2), cosine, ip(inner product. I do not recomend you to use it though). For more info please check [hnswlib's doc](https://github.com/nmslib/hnswlib/tree/v0.8.0?tab=readme-ov-file#supported-distances). +3. Full control over [HNSW parameters](https://github.com/nmslib/hnswlib/blob/v0.8.0/ALGO_PARAMS.md) for performance tuning. Please check [this example](https://github.com/1yefuwang1/vectorlite/blob/main/examples/hnsw_param.py). +4. Predicate pushdown support for vector metadata(rowid) filter (requires sqlite version >= 3.38). Please check [this example](https://github.com/1yefuwang1/vectorlite/blob/main/examples/metadata_filter.py); +5. Index serde support. A vectorlite table can be saved to a file, and be reloaded from it. Index files created by hnswlib can also be loaded by vectorlite. Please check [this example](https://github.com/1yefuwang1/vectorlite/blob/main/examples/index_serde.py); +6. Vector json serde support using `vector_from_json()` and `vector_to_json()`. diff --git a/doc/markdown/some-feature.md b/doc/markdown/some-feature.md new file mode 100644 index 0000000..4d852c7 --- /dev/null +++ b/doc/markdown/some-feature.md @@ -0,0 +1,17 @@ +# Some feature + +## Subsection + +Exciting documentation in here. +Let's make a list (empty surrounding lines required): + +- item 1 + + - nested item 1 + - nested item 2 + +- item 2 +- item 3 + +![vecter insertion](media/vector_insertion_3000_vectors.png) +![vector query](media/vector_query_3000_vectors.png) \ No newline at end of file