Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bfloat16 quantization support #30

Merged
merged 9 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 57 additions & 56 deletions bindings/python/vectorlite_py/test/vectorlite_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,59 +121,60 @@ def remove_quote(s: str):
file_path = os.path.join(tempdir, 'index.bin')
file_paths = [f'\"{file_path}\"', f'\'{file_path}\'']

for index_file_path in file_paths:
assert not os.path.exists(remove_quote(index_file_path))

conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(remove_quote(index_file_path))
assert os.path.exists(remove_quote(index_file_path)) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(remove_quote(index_file_path))
conn.close()


for vector_type in ['float32', 'bfloat16']:
for index_file_path in file_paths:
assert not os.path.exists(remove_quote(index_file_path))

conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding {vector_type}[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')

for i in range(NUM_ELEMENTS):
cur.execute('insert into my_table (rowid, my_embedding) values (?, ?)', (i, random_vectors[i].tobytes()))

result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

conn.close()
# The index file should be created
index_file_size = os.path.getsize(remove_quote(index_file_path))
assert os.path.exists(remove_quote(index_file_path)) and index_file_size > 0

# test if the index file could be loaded with the same parameters without inserting data again
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table using vectorlite(my_embedding {vector_type}[{DIM}], hnsw(max_elements={NUM_ELEMENTS}), {index_file_path})')
result = cur.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10
conn.close()
# The index file should be created
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size

# test if the index file could be loaded with different hnsw parameters and distance type without inserting data again
# But hnsw parameters can't be changed even if different values are set, they will be owverwritten by the value from the index file
# todo: test whether hnsw parameters are overwritten after more functions are introduced to provide runtime stats.
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding {vector_type}[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=32,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

# test searching with ef_search = 30, which defaults to 10
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?, ?))', (random_vectors[0].tobytes(), 10, 30)).fetchall()
assert len(result) == 10
conn.close()
assert os.path.exists(remove_quote(index_file_path)) and os.path.getsize(remove_quote(index_file_path)) == index_file_size


# test if `drop table` deletes the index file
conn = get_connection()
cur = conn.cursor()
cur.execute(f'create virtual table my_table2 using vectorlite(my_embedding {vector_type}[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS},ef_construction=64,M=32), {index_file_path})')
result = cur.execute('select rowid, distance from my_table2 where knn_search(my_embedding, knn_param(?, ?))', (random_vectors[0].tobytes(), 10)).fetchall()
assert len(result) == 10

cur.execute(f'drop table my_table2')
assert not os.path.exists(remove_quote(index_file_path))
conn.close()


2 changes: 1 addition & 1 deletion format.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
clang-format -style=file -i src/*.h src/*.cpp
clang-format -style=file -i vectorlite/*.h vectorlite/*.cpp
2 changes: 1 addition & 1 deletion vcpkg
Submodule vcpkg updated 552 files
2 changes: 1 addition & 1 deletion vectorlite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ message(STATUS "Compiling on ${CMAKE_SYSTEM_PROCESSOR}")

add_subdirectory(ops)

add_library(vectorlite SHARED vectorlite.cpp virtual_table.cpp vector.cpp vector_view.cpp util.cpp vector_space.cpp index_options.cpp sqlite_functions.cpp constraint.cpp)
add_library(vectorlite SHARED vectorlite.cpp virtual_table.cpp util.cpp vector_space.cpp index_options.cpp sqlite_functions.cpp constraint.cpp quantization.cpp)
# remove the lib prefix to make the shared library name consistent on all platforms.
set_target_properties(vectorlite PROPERTIES PREFIX "")
target_include_directories(vectorlite PUBLIC ${RAPIDJSON_INCLUDE_DIRS} ${HNSWLIB_INCLUDE_DIRS} ${PROJECT_BINARY_DIR})
Expand Down
44 changes: 32 additions & 12 deletions vectorlite/constraint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "absl/strings/str_join.h"
#include "hnswlib/hnswlib.h"
#include "macros.h"
#include "quantization.h"
#include "sqlite3ext.h"
#include "util.h"
#include "vector.h"
Expand Down Expand Up @@ -195,20 +196,39 @@ absl::StatusOr<QueryExecutor::QueryResult> QueryExecutor::Execute() const {
index_.setEf(*knn_param->ef_search);
}
try {
if (!space_.normalize) {
return index_.searchKnnCloserFirst(
knn_param->query_vector.data().data(), knn_param->k,
rowid_filter.get());
if (space_.vector_type == VectorType::Float32) {
if (!space_.normalize) {
return index_.searchKnnCloserFirst(
knn_param->query_vector.data().data(), knn_param->k,
rowid_filter.get());
}

VECTORLITE_ASSERT(space_.normalize);
// Copy the query vector and normalize it.
Vector normalized_vector = Vector::Normalize(knn_param->query_vector);

auto result = index_.searchKnnCloserFirst(
normalized_vector.data().data(), knn_param->k, rowid_filter.get());
return result;
} else if (space_.vector_type == VectorType::BFloat16) {
BF16Vector quantized_vector = Quantize(knn_param->query_vector);

if (!space_.normalize) {
return index_.searchKnnCloserFirst(quantized_vector.data().data(),
knn_param->k, rowid_filter.get());
}

VECTORLITE_ASSERT(space_.normalize);
BF16Vector normalized_vector = quantized_vector.Normalize();

auto result = index_.searchKnnCloserFirst(
normalized_vector.data().data(), knn_param->k, rowid_filter.get());
return result;
} else {
return absl::InternalError(
absl::StrFormat("Unknown vector type: %d", space_.vector_type));
}

VECTORLITE_ASSERT(space_.normalize);
// Copy the query vector and normalize it.
Vector normalized_vector = Vector::Normalize(knn_param->query_vector);

auto result = index_.searchKnnCloserFirst(
normalized_vector.data().data(), knn_param->k, rowid_filter.get());
return result;

} catch (const std::runtime_error& e) {
return absl::InternalError(e.what());
}
Expand Down
2 changes: 1 addition & 1 deletion vectorlite/constraint.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#include "hnswlib/hnswlib.h"
#include "macros.h"
#include "sqlite3.h"
#include "vector_view.h"
#include "vector_space.h"
#include "vector_view.h"

namespace vectorlite {

Expand Down
34 changes: 22 additions & 12 deletions vectorlite/distance.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include "hnswlib/hnswlib.h"
#include "hwy/base.h"
#include "macros.h"
#include "ops/ops.h"

// This file implements hnswlib::SpaceInterface<float> using vectorlite
Expand All @@ -9,12 +11,13 @@
// PC(i5-12600KF with AVX2 support)
namespace vectorlite {

class InnerProductSpace : public hnswlib::SpaceInterface<float> {
template <class T, VECTORLITE_IF_FLOAT_SUPPORTED(T)>
class GenericInnerProductSpace : public hnswlib::SpaceInterface<float> {
public:
explicit InnerProductSpace(size_t dim)
: dim_(dim), func_(InnerProductSpace::InnerProductDistanceFunc) {}
explicit GenericInnerProductSpace(size_t dim)
: dim_(dim), func_(GenericInnerProductSpace::InnerProductDistanceFunc) {}

size_t get_data_size() override { return dim_ * sizeof(float); }
size_t get_data_size() override { return dim_ * sizeof(T); }

void* get_dist_func_param() override { return &dim_; }

Expand All @@ -26,18 +29,22 @@ class InnerProductSpace : public hnswlib::SpaceInterface<float> {

static float InnerProductDistanceFunc(const void* v1, const void* v2,
const void* dim) {
return ops::InnerProductDistance(static_cast<const float*>(v1),
static_cast<const float*>(v2),
return ops::InnerProductDistance(static_cast<const T*>(v1),
static_cast<const T*>(v2),
*reinterpret_cast<const size_t*>(dim));
}
};

class L2Space : public hnswlib::SpaceInterface<float> {
using InnerProductSpace = GenericInnerProductSpace<float>;
using InnerProductSpaceBF16 = GenericInnerProductSpace<hwy::bfloat16_t>;

template <class T, VECTORLITE_IF_FLOAT_SUPPORTED(T)>
class GenericL2Space : public hnswlib::SpaceInterface<float> {
public:
explicit L2Space(size_t dim)
: dim_(dim), func_(L2Space::L2DistanceSquaredFunc) {}
explicit GenericL2Space(size_t dim)
: dim_(dim), func_(GenericL2Space::L2DistanceSquaredFunc) {}

size_t get_data_size() override { return dim_ * sizeof(float); }
size_t get_data_size() override { return dim_ * sizeof(T); }

void* get_dist_func_param() override { return &dim_; }

Expand All @@ -49,10 +56,13 @@ class L2Space : public hnswlib::SpaceInterface<float> {

static float L2DistanceSquaredFunc(const void* v1, const void* v2,
const void* dim) {
return ops::L2DistanceSquared(static_cast<const float*>(v1),
static_cast<const float*>(v2),
return ops::L2DistanceSquared(static_cast<const T*>(v1),
static_cast<const T*>(v2),
*reinterpret_cast<const size_t*>(dim));
}
};

using L2Space = GenericL2Space<float>;
using L2SpaceBF16 = GenericL2Space<hwy::bfloat16_t>;

} // namespace vectorlite
12 changes: 12 additions & 0 deletions vectorlite/macros.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#pragma once

#include <type_traits>

#include "hwy/base.h"

#if defined(_WIN32) || defined(__WIN32__)
#define VECTORLITE_EXPORT __declspec(dllexport)
#else
Expand All @@ -11,3 +15,11 @@
#include <cassert>
#define VECTORLITE_ASSERT(x) assert(x)
#endif

#define VECTORLITE_IF_FLOAT_SUPPORTED(T) \
std::enable_if_t<std::is_same_v<T, float> || \
std::is_same_v<T, hwy::bfloat16_t>>* = nullptr

#define VECTORLITE_IF_FLOAT_SUPPORTED_FWD_DECL(T) \
std::enable_if_t<std::is_same_v<T, float> || \
std::is_same_v<T, hwy::bfloat16_t>>*
Loading
Loading