From 2dd32edd84e9b50e395cf92f51d5f26637e0fa25 Mon Sep 17 00:00:00 2001 From: Yefu Wang <1yefuwang1@gmail.com> Date: Mon, 26 Aug 2024 23:16:15 +0800 Subject: [PATCH] Add api reference --- README.md | 12 +++++++++ doc/conf.py | 8 +++--- doc/index.rst | 4 +-- doc/markdown/api.md | 62 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 doc/markdown/api.md diff --git a/README.md b/README.md index ab999fa..2388a1b 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,18 @@ vector_from_json(json_string) -- converts a json array of type TEXT into BLOB(a vector_to_json(vector_blob) -- converts a vector of type BLOB(c-style float32 array) into a json array of type TEXT vector_distance(vector_blob1, vector_blob2, distance_type_str) -- calculate vector distance between two vectors, distance_type_str could be 'l2', 'cosine', 'ip' ``` +In fact, one can easily implement brute force searching using `vector_distance`: +```sql +-- use a normal sqlite table +create table my_table(rowid integer primary key, embedding blob); + +-- insert +insert into my_table(rowid, embedding) values (0, {your_embedding}); +-- search for 10 nearest neighbors using l2 squared distance +select rowid from my_table order by vector_distance({query_vector}, embedding, 'l2') asc limit 10 + +``` + ### Virtual Table The core of vectorlite is the [virtual table](https://www.sqlite.org/vtab.html) module, which is used to hold vector index. A vectorlite table can be created using: diff --git a/doc/conf.py b/doc/conf.py index 7bf72f5..118db85 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -6,10 +6,10 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = 'Example' -copyright = 'workshop participant' -author = 'workshop participant' -release = '0.1' +project = 'vectorlite' +copyright = 'vectorlite contributors' +author = '1yefuwang1@gmail.com' +release = '0.2.0' # -- General configuration --------------------------------------------------- diff --git a/doc/index.rst b/doc/index.rst index a8f18e7..8c97be8 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -3,7 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to Example's documentation! +Welcome to vectorlite's documentation! =================================== .. toctree:: @@ -11,4 +11,4 @@ Welcome to Example's documentation! :caption: Contents: markdown/overview.md - markdown/another-feature.md + markdown/api.md diff --git a/doc/markdown/api.md b/doc/markdown/api.md new file mode 100644 index 0000000..f1ed030 --- /dev/null +++ b/doc/markdown/api.md @@ -0,0 +1,62 @@ +# API reference +Vectorlite provides the following APIs. +Please note vectorlite is currently in beta. There could be breaking changes. +## Free-standing Application Defined SQL functions +The following functions can be used in any context. +``` sql +vectorlite_info() -- prints version info and some compile time info. e.g. Is SSE, AVX enabled. +vector_from_json(json_string) -- converts a json array of type TEXT into BLOB(a c-style float32 array) +vector_to_json(vector_blob) -- converts a vector of type BLOB(c-style float32 array) into a json array of type TEXT +vector_distance(vector_blob1, vector_blob2, distance_type_str) -- calculate vector distance between two vectors, distance_type_str could be 'l2', 'cosine', 'ip' +``` + +In fact, one can easily implement brute force searching using `vector_distance`: +```sql +-- use a normal sqlite table +create table my_table(rowid integer primary key, embedding blob); + +-- insert +insert into my_table(rowid, embedding) values (0, {your_embedding}); +-- search for 10 nearest neighbors using l2 squared distance +select rowid from my_table order by vector_distance({query_vector}, embedding, 'l2') asc limit 10 + +``` +## Virtual Table +The core of vectorlite is the [virtual table](https://www.sqlite.org/vtab.html) module, which is used to hold vector index and way faster than brute force approach. +A vectorlite table can be created using: + +```sql +-- Required fields: table_name, vector_name, dimension, max_elements +-- Optional fields: +-- 1. distance_type: defaults to l2 +-- 2. ef_construction: defaults to 200 +-- 3. M: defaults to 16 +-- 4. random_seed: defaults to 100 +-- 5. allow_replace_deleted: defaults to true +-- 6. index_file_path: no default value. If not provided, the table will be memory-only. If provided, vectorlite will try to load index from the file and save to it when db connection is closed. +create virtual table {table_name} using vectorlite({vector_name} float32[{dimension}] {distance_type}, hnsw(max_elements={max_elements}, {ef_construction=200}, {M=16}, {random_seed=100}, {allow_replace_deleted=true}), {index_file_path}); +``` +You can insert, update and delete a vectorlite table as if it's a normal sqlite table. +```sql +-- rowid is required during insertion, because rowid is used to connect the vector to its metadata stored elsewhere. Auto-generating rowid doesn't makes sense. +insert into my_vectorlite_table(rowid, vector_name) values ({your_rowid}, {vector_blob}); +-- Note: update and delete statements that uses rowid filter require sqlite3_version >= 3.38 to run. +update my_vectorlite_table set vector_name = {new_vector_blob} where rowid = {your_rowid}; +delete from my_vectorlite_table where rowid = {your_rowid}; +``` +The following functions should be only used when querying a vectorlite table +```sql +-- returns knn_parameter that will be passed to knn_search(). +-- vector_blob: vector to search +-- k: how many nearest neighbors to search for +-- ef: optional. A HNSW parameter that controls speed-accuracy trade-off. Defaults to 10 at first. If set to another value x, it will remain x if not specified again in another query within a single db connection. +knn_param(vector_blob, k, ef) +-- Should only be used in the `where clause` in a `select` statement to tell vectorlite to speed up the query using HNSW index +-- vector_name should match the vectorlite table's definition +-- knn_parameter is usually constructed using knn_param() +knn_search(vector_name, knn_parameter) +-- An example of vector search query. `distance` is an implicit column of a vectorlite table. +select rowid, distance from my_vectorlite_table where knn_search(vector_name, knn_param({vector_blob}, {k})) +-- An example of vector search query with pushed-down metadata(rowid) filter, requires sqlite_version >= 3.38 to run. +select rowid, distance from my_vectorlite_table where knn_search(vector_name, knn_param({vector_blob}, {k})) and rowid in (1,2,3,4,5) +```