-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
32cc9ff
commit c816503
Showing
8 changed files
with
70 additions
and
172 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# Getting started | ||
The quickest way to get started is to install vectorlite using python. | ||
```shell | ||
# Note: vectorlite-py not vectorlite. vectorlite is another project. | ||
pip install vectorlite-py apsw numpy | ||
``` | ||
Vectorlite's metadata filter feature requires sqlite>=3.38. Python's builtin `sqlite` module is usually built with old sqlite versions. So `apsw` is used here as sqlite driver, because it provides bindings to latest sqlite. Vectorlite still works with old sqlite versions if metadata filter support is not required. | ||
Below is a minimal example of using vectorlite. It can also be found in the examples folder. | ||
|
||
```python | ||
import vectorlite_py | ||
import apsw | ||
import numpy as np | ||
""" | ||
Quick start of using vectorlite extension. | ||
""" | ||
|
||
conn = apsw.Connection(':memory:') | ||
conn.enable_load_extension(True) # enable extension loading | ||
conn.load_extension(vectorlite_py.vectorlite_path()) # load vectorlite | ||
|
||
cursor = conn.cursor() | ||
# check if vectorlite is loaded | ||
print(cursor.execute('select vectorlite_info()').fetchall()) | ||
|
||
# Vector distance calculation | ||
for distance_type in ['l2', 'cosine', 'ip']: | ||
v1 = "[1, 2, 3]" | ||
v2 = "[4, 5, 6]" | ||
# Note vector_from_json can be used to convert a JSON string to a vector | ||
distance = cursor.execute(f'select vector_distance(vector_from_json(?), vector_from_json(?), "{distance_type}")', (v1, v2)).fetchone() | ||
print(f'{distance_type} distance between {v1} and {v2} is {distance[0]}') | ||
|
||
# generate some test data | ||
DIM = 32 # dimension of the vectors | ||
NUM_ELEMENTS = 10000 # number of vectors | ||
data = np.float32(np.random.random((NUM_ELEMENTS, DIM))) # Only float32 vectors are supported by vectorlite for now | ||
|
||
# Create a virtual table using vectorlite using l2 distance (default distance type) and default HNSW parameters | ||
cursor.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}))') | ||
# Vector distance type can be explicitly set to cosine using: | ||
# cursor.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS}))') | ||
|
||
# Insert the test data into the virtual table. Note that the rowid MUST be explicitly set when inserting vectors and cannot be auto-generated. | ||
# The rowid is used to uniquely identify a vector and serve as a "foreign key" to relate to the vector's metadata. | ||
# Vectorlite takes vectors in raw bytes, so a numpy vector need to be converted to bytes before inserting into the table. | ||
cursor.executemany('insert into my_table(rowid, my_embedding) values (?, ?)', [(i, data[i].tobytes()) for i in range(NUM_ELEMENTS)]) | ||
|
||
# Query the virtual table to get the vector at rowid 12345. Note the vector needs to be converted back to json using vector_to_json() to be human-readable. | ||
result = cursor.execute('select vector_to_json(my_embedding) from my_table where rowid = 1234').fetchone() | ||
print(f'vector at rowid 1234: {result[0]}') | ||
|
||
# Find 10 approximate nearest neighbors of data[0] and there distances from data[0]. | ||
# knn_search() is used to tell vectorlite to do a vector search. | ||
# knn_param(V, K, ef) is used to pass the query vector V, the number of nearest neighbors K to find and an optional ef parameter to tune the performance of the search. | ||
# If ef is not specified, ef defaults to 10. For more info on ef, please check https://github.com/nmslib/hnswlib/blob/v0.8.0/ALGO_PARAMS.md | ||
result = cursor.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, 10))', [data[0].tobytes()]).fetchall() | ||
print(f'10 nearest neighbors of row 0 is {result}') | ||
|
||
# Find 10 approximate nearest neighbors of the first embedding in vectors with rowid within [1000, 2000) using metadata(rowid) filtering. | ||
rowids = ','.join([str(rowid) for rowid in range(1000, 2000)]) | ||
result = cursor.execute(f'select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, 10)) and rowid in ({rowids})', [data[0].tobytes()]).fetchall() | ||
print(f'10 nearest neighbors of row 0 in vectors with rowid within [1000, 2000) is {result}') | ||
|
||
conn.close() | ||
|
||
``` | ||
|
||
More examples can be found in examples and bindings/python/vectorlite_py/test folder. |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.