Skip to content

Commit

Permalink
Enable windows build
Browse files Browse the repository at this point in the history
  • Loading branch information
1yefuwang1 committed Jul 1, 2024
1 parent e851d14 commit 3de7a66
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 91 deletions.
29 changes: 13 additions & 16 deletions .github/workflows/build-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,34 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# os: [ubuntu-latest, windows-latest, macos-13, macos-14]
os: [ubuntu-latest, macos-13, macos-14]
os: [ubuntu-latest, windows-latest, macos-13, macos-14]
# os: [ubuntu-latest, macos-13, macos-14]
# os: [windows-latest]

steps:
- uses: actions/checkout@v4

# Used to host cibuildwheel
- uses: actions/setup-python@v5

- name: setup devcmd
if: ${{ matrix.os == 'windows-latest' }}
# NOTE: this is necessary to correctly find and use cl.exe
uses: ilammy/[email protected]
- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.19.1 pipx pytest apsw numpy

- name: Install linux deps
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
sudo apt-get install zip -y
- uses: ilammy/msvc-dev-cmd@v1

- name: Setup cmake
uses: jwlawson/actions-setup-cmake@v2

- name: Setup Ninja
uses: seanmiddleditch/gha-setup-ninja@v5

- name: Bootstrap vcpkg
run: |
git submodule update --init --recursive
python bootstrap_vcpkg.py
- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.19.1

- name: Build wheels
run: python -m cibuildwheel --output-dir wheelhouse
env:
MACOSX_DEPLOYMENT_TARGET: '10.15' # 10.15 is the minimum version that fully supports c++17
run: pipx run cibuildwheel --output-dir wheelhouse

- uses: actions/upload-artifact@v4
with:
Expand Down
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
cmake_minimum_required(VERSION 3.22 FATAL_ERROR)

if (WIN32)
set(VCPKG_TARGET_TRIPLET "x64-windows-static-md-release")
message(STATUS "VCPKG_TARGET_TRIPLET on windows: ${VCPKG_TARGET_TRIPLET}")
endif(WIN32)

project(vectorlite VERSION 0.1.0 LANGUAGES CXX)

configure_file(src/version.h.in version.h)
Expand Down Expand Up @@ -31,12 +36,13 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
endif ()

add_library(vectorlite SHARED src/vectorlite.cpp src/virtual_table.cpp src/vector.cpp src/util.cpp src/vector_space.cpp src/index_options.cpp src/sqlite_functions.cpp src/constraint.cpp)
# remove the lib prefix to make the shared library name consistent on all platforms.
set_target_properties(vectorlite PROPERTIES PREFIX "")
target_include_directories(vectorlite PUBLIC ${RAPIDJSON_INCLUDE_DIRS} ${HNSWLIB_INCLUDE_DIRS} ${PROJECT_BINARY_DIR})
target_link_libraries(vectorlite PRIVATE unofficial::sqlite3::sqlite3 absl::status absl::statusor absl::strings re2::re2)
# copy the shared library to the python package to make running integration tests easier
add_custom_command(TARGET vectorlite POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:vectorlite> ${PROJECT_SOURCE_DIR}/vectorlite_py/$<TARGET_FILE_NAME:vectorlite>)


include(GoogleTest)
enable_testing()
file(GLOB TEST_SOURCES src/*.cpp)
Expand All @@ -61,6 +67,12 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_libraries(unit-test PRIVATE absl::log)
endif()

# TODO: For mysterious reason, absl::log symbols are required for even release build on MSVC. Only DLOG are used which should be guarded by NDEBUG and not included in Release build.
if(MSVC)
target_link_libraries(vectorlite PRIVATE absl::log)
target_link_libraries(unit-test PRIVATE absl::log)
endif()

gtest_discover_tests(unit-test)

add_test(NAME unit-test COMMAND unit-test)
Expand Down
2 changes: 1 addition & 1 deletion examples/knn_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def create_connection():
# create connection to in-memory database
conn = apsw.Connection(':memory:') if use_apsw else sqlite3.connect(':memory:')
conn.enable_load_extension(True)
conn.load_extension('../build/release/libvectorlite.so')
conn.load_extension('../build/release/vectorlite.so')
return conn

conn = create_connection()
Expand Down
2 changes: 1 addition & 1 deletion integration_test/delete_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ int main(int argc, char* argv[]) {
const auto& vectors = GenerateRandomVectors();
rc = sqlite3_enable_load_extension(db, 1);
assert(rc == SQLITE_OK);
rc = sqlite3_load_extension(db, "build/dev/libvectorlite.so", "sqlite3_extension_init", &zErrMsg);
rc = sqlite3_load_extension(db, "build/dev/vectorlite.so", "sqlite3_extension_init", &zErrMsg);
if (rc != SQLITE_OK) {
std::cerr << "load extension failed: " << zErrMsg << std::endl;
sqlite3_free(zErrMsg);
Expand Down
122 changes: 67 additions & 55 deletions integration_test/python/test/vectorlite_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import tempfile
import os
import platform

def get_connection():
conn = apsw.Connection(':memory:')
Expand Down Expand Up @@ -97,59 +98,70 @@ def test_vector_distance(conn):
assert np.isclose(math.sqrt(result), l2_distance)

def test_index_file(random_vectors):
conn = get_connection()
def remove_quote(s: str):
return s.strip('\'').strip('\"')
with tempfile.TemporaryDirectory() as tempdir:
index_file_path = os.path.join(tempdir, 'index.bin')
assert not os.path.exists(index_file_path)
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(index_file_path)
assert os.path.exists(index_file_path) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(index_file_path) and os.path.getsize(index_file_path) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(index_file_path) and os.path.getsize(index_file_path) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(index_file_path)
conn.close()


file_path = os.path.join(tempdir, 'index.bin')
file_paths = [f'\"{file_path}\"', f'\'{file_path}\'']

# Windows paths always contain ':', which must be quoted by double/single quotes
# Unix paths don't necessarliy contain special charactors that needs to be quoted.
if platform.system().lower() != 'windows':
file_paths.append(file_path)

for index_file_path in file_paths:
assert not os.path.exists(remove_quote(index_file_path))

conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(remove_quote(index_file_path))
assert os.path.exists(remove_quote(index_file_path)) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(remove_quote(index_file_path))
conn.close()


11 changes: 9 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
[build-system]
requires = ["setuptools>=59", "wheel", "cmake", "ninja"]
requires = ["setuptools>=59", "wheel"]

build-backend = "setuptools.build_meta"

[tool.cibuildwheel]
test-requires = ["pytest", "numpy", "apsw>=3.46"]
test-command = "pytest {project}/integration_test/python/test"
skip = ["*-win32", "*-manylinux_i686", "*musllinux*", "pp*", "cp36*", "cp37*", "cp38*"]
skip = ["*-win32", "*-win_arm64", "*-manylinux_i686", "*musllinux*", "pp*", "cp36*", "cp37*", "cp38*", "cp39*"]

[tool.cibuildwheel.macos]
environment = {MACOSX_DEPLOYMENT_TARGET = "10.15"} # 10.15 is the minimum version that fully supports c++17

# todo: support musllinux
[tool.cibuilidwheel.linux]
before-build = "yum install -y ninja-build"
30 changes: 16 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from pathlib import Path

from setuptools import Extension, setup
import cmake
# import cmake
import subprocess
import ninja
# import ninja

VERSION = '0.1.0'
PACKAGE_NAME = 'vectorlite_py'
Expand All @@ -19,33 +19,35 @@
machine = platform.machine()

print(f'Current platfrom: {system}, {machine}')
print(f'cmake bin dir: {cmake.CMAKE_BIN_DIR}. cwd: {os.getcwd()}')
cmake_path = os.path.join(cmake.CMAKE_BIN_DIR, 'cmake')
ctest_path = os.path.join(cmake.CMAKE_BIN_DIR, 'ctest')
ninja_path = os.path.join(ninja.BIN_DIR, 'ninja')
cmake_version = subprocess.run(['cmake', '--version'], check=True)
cmake_version.check_returncode()
# print(f'cmake bin dir: {cmake.CMAKE_BIN_DIR}. cwd: {os.getcwd()}')
# cmake_path = os.path.join(cmake.CMAKE_BIN_DIR, 'cmake')
# ctest_path = os.path.join(cmake.CMAKE_BIN_DIR, 'ctest')
# ninja_path = os.path.join(ninja.BIN_DIR, 'ninja')
# cmake_version = subprocess.run([cmake_path, '--version'], check=True)
# cmake_version.check_returncode()

class CMakeExtension(Extension):
def __init__(self, name: str) -> None:
super().__init__(name, sources=[])

def get_lib_name():
if system.lower() == 'linux':
return 'libvectorlite.so'
return 'vectorlite.so'
if system.lower() == 'darwin':
return 'libvectorlite.dylib'
return 'vectorlite.dylib'
if system.lower() == 'windows':
return 'libvectorlite.dll'
return 'vectorlite.dll'
raise ValueError(f'Unsupported platform: {system}')

class CMakeBuild(build_ext):
def build_extension(self, ext: CMakeExtension) -> None:
cmake_path = 'cmake'
ctest_path = 'ctest'
print(f'Building extension for {self.plat_name} {self.compiler.compiler_type}')
extra_args = []
if system.lower() == 'windows':
extra_args = ['-DCMAKE_CXX_COMPILER=cl', '-DCMAKE_C_COMPILER=cl']
configure = subprocess.run([cmake_path, '--preset', 'release', f'-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_path}', *extra_args])
# if system.lower() == 'windows':
# extra_args = ['-DCMAKE_CXX_COMPILER=cl', '-DCMAKE_C_COMPILER=cl']
configure = subprocess.run([cmake_path, '--preset', 'release', *extra_args])
configure.check_returncode()

subprocess.run([cmake_path, '--build', os.path.join('build', 'release'), '-j8'], check=True)
Expand Down
10 changes: 10 additions & 0 deletions src/virtual_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ static int InitVirtualTable(bool load_from_file, sqlite3* db, void* pAux,
std::string_view index_file_path;
if (argc == 3 + kModuleParamOffset) {
index_file_path = argv[2 + kModuleParamOffset];
int size = index_file_path.size();
// Handle cases where the index_file_path is enclosed in double/single
// quotes. It is necessary for windows paths, because they contain ':', that
// must be quoted for sqlite to parse correctly.
if (size > 2) {
if ((index_file_path[0] == '\"' && index_file_path[size - 1] == '\"') ||
(index_file_path[0] == '\'' && index_file_path[size - 1] == '\'')) {
index_file_path = index_file_path.substr(1, size - 2);
}
}
}

std::string sql = absl::StrFormat("CREATE TABLE X(%s, distance REAL hidden)",
Expand Down
2 changes: 1 addition & 1 deletion vectorlite_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__version__ = '0.1.0'

def vectorlite_path():
loadable_path = os.path.join(os.path.dirname(__file__), 'libvectorlite')
loadable_path = os.path.join(os.path.dirname(__file__), 'vectorlite')
return os.path.normpath(loadable_path)


Expand Down

0 comments on commit 3de7a66

Please sign in to comment.