Skip to content

Commit

Permalink
Add bfloat16 quantization support (1yefuwang1#30)
Browse files Browse the repository at this point in the history
* Add quantization to float16/bfloat16

* make distance ops generic on float type

* Add f16/bf16 to f32 conversion

* Add normalization for bfloat16

* Add inner product distance for bfloat16

* Implement L2DistanceSquared for bf16

* Implement L2 for f32 * bf16

* Add benchmark for bf16 ops

* Add bfloat16 vector
  • Loading branch information
1yefuwang1 authored Sep 19, 2024
1 parent d9e55df commit 569b833
Show file tree
Hide file tree
Showing 26 changed files with 1,365 additions and 451 deletions.
113 changes: 57 additions & 56 deletions bindings/python/vectorlite_py/test/vectorlite_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,59 +121,60 @@ def remove_quote(s: str):
file_path = os.path.join(tempdir, 'index.bin')
file_paths = [f'\"{file_path}\"', f'\'{file_path}\'']

for index_file_path in file_paths:
assert not os.path.exists(remove_quote(index_file_path))

conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(remove_quote(index_file_path))
assert os.path.exists(remove_quote(index_file_path)) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(remove_quote(index_file_path))
conn.close()


for vector_type in ['float32', 'bfloat16']:
for index_file_path in file_paths:
assert not os.path.exists(remove_quote(index_file_path))

conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding {vector_type}[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(remove_quote(index_file_path))
assert os.path.exists(remove_quote(index_file_path)) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding {vector_type}[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding {vector_type}[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding {vector_type}[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(remove_quote(index_file_path))
conn.close()


2 changes: 1 addition & 1 deletion format.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
clang-format -style=file -i src/*.h src/*.cpp
clang-format -style=file -i vectorlite/*.h vectorlite/*.cpp
2 changes: 1 addition & 1 deletion vcpkg
Submodule vcpkg updated 552 files
2 changes: 1 addition & 1 deletion vectorlite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ message(STATUS "Compiling on ${CMAKE_SYSTEM_PROCESSOR}")

add_subdirectory(ops)

add_library(vectorlite SHARED vectorlite.cpp virtual_table.cpp vector.cpp vector_view.cpp util.cpp vector_space.cpp index_options.cpp sqlite_functions.cpp constraint.cpp)
add_library(vectorlite SHARED vectorlite.cpp virtual_table.cpp util.cpp vector_space.cpp index_options.cpp sqlite_functions.cpp constraint.cpp quantization.cpp)
# remove the lib prefix to make the shared library name consistent on all platforms.
set_target_properties(vectorlite PROPERTIES PREFIX "")
target_include_directories(vectorlite PUBLIC ${RAPIDJSON_INCLUDE_DIRS} ${HNSWLIB_INCLUDE_DIRS} ${PROJECT_BINARY_DIR})
Expand Down
44 changes: 32 additions & 12 deletions vectorlite/constraint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "absl/strings/str_join.h"
#include "hnswlib/hnswlib.h"
#include "macros.h"
#include "quantization.h"
#include "sqlite3ext.h"
#include "util.h"
#include "vector.h"
Expand Down Expand Up @@ -195,20 +196,39 @@ absl::StatusOr<QueryExecutor::QueryResult> QueryExecutor::Execute() const {
index_.setEf(*knn_param->ef_search);
}
try {
if (!space_.normalize) {
return index_.searchKnnCloserFirst(
knn_param->query_vector.data().data(), knn_param->k,
rowid_filter.get());
if (space_.vector_type == VectorType::Float32) {
if (!space_.normalize) {
return index_.searchKnnCloserFirst(
knn_param->query_vector.data().data(), knn_param->k,
rowid_filter.get());
}

VECTORLITE_ASSERT(space_.normalize);
// Copy the query vector and normalize it.
Vector normalized_vector = Vector::Normalize(knn_param->query_vector);

auto result = index_.searchKnnCloserFirst(
normalized_vector.data().data(), knn_param->k, rowid_filter.get());
return result;
} else if (space_.vector_type == VectorType::BFloat16) {
BF16Vector quantized_vector = Quantize(knn_param->query_vector);

if (!space_.normalize) {
return index_.searchKnnCloserFirst(quantized_vector.data().data(),
knn_param->k, rowid_filter.get());
}

VECTORLITE_ASSERT(space_.normalize);
BF16Vector normalized_vector = quantized_vector.Normalize();

auto result = index_.searchKnnCloserFirst(
normalized_vector.data().data(), knn_param->k, rowid_filter.get());
return result;
} else {
return absl::InternalError(
absl::StrFormat("Unknown vector type: %d", space_.vector_type));
}

VECTORLITE_ASSERT(space_.normalize);
// Copy the query vector and normalize it.
Vector normalized_vector = Vector::Normalize(knn_param->query_vector);

auto result = index_.searchKnnCloserFirst(
normalized_vector.data().data(), knn_param->k, rowid_filter.get());
return result;

} catch (const std::runtime_error& e) {
return absl::InternalError(e.what());
}
Expand Down
2 changes: 1 addition & 1 deletion vectorlite/constraint.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#include "hnswlib/hnswlib.h"
#include "macros.h"
#include "sqlite3.h"
#include "vector_view.h"
#include "vector_space.h"
#include "vector_view.h"

namespace vectorlite {

Expand Down
34 changes: 22 additions & 12 deletions vectorlite/distance.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include "hnswlib/hnswlib.h"
#include "hwy/base.h"
#include "macros.h"
#include "ops/ops.h"

// This file implements hnswlib::SpaceInterface<float> using vectorlite
Expand All @@ -9,12 +11,13 @@
// PC(i5-12600KF with AVX2 support)
namespace vectorlite {

class InnerProductSpace : public hnswlib::SpaceInterface<float> {
template <class T, VECTORLITE_IF_FLOAT_SUPPORTED(T)>
class GenericInnerProductSpace : public hnswlib::SpaceInterface<float> {
public:
explicit InnerProductSpace(size_t dim)
: dim_(dim), func_(InnerProductSpace::InnerProductDistanceFunc) {}
explicit GenericInnerProductSpace(size_t dim)
: dim_(dim), func_(GenericInnerProductSpace::InnerProductDistanceFunc) {}

size_t get_data_size() override { return dim_ * sizeof(float); }
size_t get_data_size() override { return dim_ * sizeof(T); }

void* get_dist_func_param() override { return &dim_; }

Expand All @@ -26,18 +29,22 @@ class InnerProductSpace : public hnswlib::SpaceInterface<float> {

static float InnerProductDistanceFunc(const void* v1, const void* v2,
const void* dim) {
return ops::InnerProductDistance(static_cast<const float*>(v1),
static_cast<const float*>(v2),
return ops::InnerProductDistance(static_cast<const T*>(v1),
static_cast<const T*>(v2),
*reinterpret_cast<const size_t*>(dim));
}
};

class L2Space : public hnswlib::SpaceInterface<float> {
using InnerProductSpace = GenericInnerProductSpace<float>;
using InnerProductSpaceBF16 = GenericInnerProductSpace<hwy::bfloat16_t>;

template <class T, VECTORLITE_IF_FLOAT_SUPPORTED(T)>
class GenericL2Space : public hnswlib::SpaceInterface<float> {
public:
explicit L2Space(size_t dim)
: dim_(dim), func_(L2Space::L2DistanceSquaredFunc) {}
explicit GenericL2Space(size_t dim)
: dim_(dim), func_(GenericL2Space::L2DistanceSquaredFunc) {}

size_t get_data_size() override { return dim_ * sizeof(float); }
size_t get_data_size() override { return dim_ * sizeof(T); }

void* get_dist_func_param() override { return &dim_; }

Expand All @@ -49,10 +56,13 @@ class L2Space : public hnswlib::SpaceInterface<float> {

static float L2DistanceSquaredFunc(const void* v1, const void* v2,
const void* dim) {
return ops::L2DistanceSquared(static_cast<const float*>(v1),
static_cast<const float*>(v2),
return ops::L2DistanceSquared(static_cast<const T*>(v1),
static_cast<const T*>(v2),
*reinterpret_cast<const size_t*>(dim));
}
};

using L2Space = GenericL2Space<float>;
using L2SpaceBF16 = GenericL2Space<hwy::bfloat16_t>;

} // namespace vectorlite
12 changes: 12 additions & 0 deletions vectorlite/macros.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#pragma once

#include <type_traits>

#include "hwy/base.h"

#if defined(_WIN32) || defined(__WIN32__)
#define VECTORLITE_EXPORT __declspec(dllexport)
#else
Expand All @@ -11,3 +15,11 @@
#include <cassert>
#define VECTORLITE_ASSERT(x) assert(x)
#endif

#define VECTORLITE_IF_FLOAT_SUPPORTED(T) \
std::enable_if_t<std::is_same_v<T, float> || \
std::is_same_v<T, hwy::bfloat16_t>>* = nullptr

#define VECTORLITE_IF_FLOAT_SUPPORTED_FWD_DECL(T) \
std::enable_if_t<std::is_same_v<T, float> || \
std::is_same_v<T, hwy::bfloat16_t>>*
Loading

0 comments on commit 569b833

Please sign in to comment.