From da13d590fabe0f019f8648e863054fffbb9c7df3 Mon Sep 17 00:00:00 2001 From: FluxCapacitor2 <31071265+FluxCapacitor2@users.noreply.github.com> Date: Mon, 18 Nov 2024 01:51:09 -0500 Subject: [PATCH] Add a method to drop the vector embedding tables created for each source --- app/database/db.go | 1 + app/database/db_sqlite.go | 11 ++++++++++- app/database/db_sqlite_embedding.sql | 12 ++++++------ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/app/database/db.go b/app/database/db.go index f2ebb11..58be835 100644 --- a/app/database/db.go +++ b/app/database/db.go @@ -4,6 +4,7 @@ type Database interface { // Create necessary tables Setup() error SetupVectorTables(sourceID string, dimension int) error + DropVectorTables(sourceID string) error // Set the status of items that have been Processing for over a minute to Pending and remove any Finished entries Cleanup() error diff --git a/app/database/db_sqlite.go b/app/database/db_sqlite.go index ca3f474..2ee82df 100644 --- a/app/database/db_sqlite.go +++ b/app/database/db_sqlite.go @@ -33,7 +33,16 @@ func (db *SQLiteDatabase) Setup() error { } func (db *SQLiteDatabase) SetupVectorTables(sourceID string, dimensions int) error { - _, err := db.conn.Exec(fmt.Sprintf(embedSetupCommands, sourceID, sourceID, sourceID, sourceID, sourceID, dimensions)) + _, err := db.conn.Exec(fmt.Sprintf(embedSetupCommands, sourceID, dimensions, sourceID, sourceID, sourceID, sourceID)) + return err +} + +func (db *SQLiteDatabase) DropVectorTables(sourceID string) error { + _, err := db.conn.Exec(fmt.Sprintf(` + DROP TABLE pages_vec_%s; + DROP TRIGGER pages_refresh_vector_embeddings_%s; + DROP TRIGGER delete_embedding_on_delete_chunk_%s; + `, sourceID, sourceID, sourceID)) return err } diff --git a/app/database/db_sqlite_embedding.sql b/app/database/db_sqlite_embedding.sql index 2610fef..cbd8acb 100644 --- a/app/database/db_sqlite_embedding.sql +++ b/app/database/db_sqlite_embedding.sql @@ -9,6 +9,11 @@ -- * More accurate `k` limit when there are many sources that aren't included in the query -- * In the future, different sources could use different embedding sources with different vector sizes +CREATE VIRTUAL TABLE IF NOT EXISTS pages_vec_%s USING vec0( + id INTEGER PRIMARY KEY, + embedding FLOAT[%d] distance_metric=cosine +); + CREATE TRIGGER IF NOT EXISTS pages_refresh_vector_embeddings_%s AFTER UPDATE ON pages WHEN old.url != new.url OR old.title != new.title OR old.description != new.description OR old.content != new.content BEGIN -- If the page has associated vector embeddings, they must be recomputed when the text changes @@ -17,9 +22,4 @@ END; CREATE TRIGGER IF NOT EXISTS delete_embedding_on_delete_chunk_%s AFTER DELETE ON vec_chunks BEGIN DELETE FROM pages_vec_%s WHERE id = old.id; -END; - -CREATE VIRTUAL TABLE IF NOT EXISTS pages_vec_%s USING vec0( - id INTEGER PRIMARY KEY, - embedding FLOAT[%d] distance_metric=cosine -); +END; \ No newline at end of file