Skip to content

Commit

Permalink
Add a method to drop the vector embedding tables created for each source
Browse files Browse the repository at this point in the history
  • Loading branch information
FluxCapacitor2 committed Nov 18, 2024
1 parent d453904 commit da13d59
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 7 deletions.
1 change: 1 addition & 0 deletions app/database/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ type Database interface {
// Create necessary tables
Setup() error
SetupVectorTables(sourceID string, dimension int) error
DropVectorTables(sourceID string) error

// Set the status of items that have been Processing for over a minute to Pending and remove any Finished entries
Cleanup() error
Expand Down
11 changes: 10 additions & 1 deletion app/database/db_sqlite.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,16 @@ func (db *SQLiteDatabase) Setup() error {
}

func (db *SQLiteDatabase) SetupVectorTables(sourceID string, dimensions int) error {
_, err := db.conn.Exec(fmt.Sprintf(embedSetupCommands, sourceID, sourceID, sourceID, sourceID, sourceID, dimensions))
_, err := db.conn.Exec(fmt.Sprintf(embedSetupCommands, sourceID, dimensions, sourceID, sourceID, sourceID, sourceID))
return err
}

func (db *SQLiteDatabase) DropVectorTables(sourceID string) error {
_, err := db.conn.Exec(fmt.Sprintf(`
DROP TABLE pages_vec_%s;
DROP TRIGGER pages_refresh_vector_embeddings_%s;
DROP TRIGGER delete_embedding_on_delete_chunk_%s;
`, sourceID, sourceID, sourceID))
return err
}

Expand Down
12 changes: 6 additions & 6 deletions app/database/db_sqlite_embedding.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
-- * More accurate `k` limit when there are many sources that aren't included in the query
-- * In the future, different sources could use different embedding sources with different vector sizes

CREATE VIRTUAL TABLE IF NOT EXISTS pages_vec_%s USING vec0(
id INTEGER PRIMARY KEY,
embedding FLOAT[%d] distance_metric=cosine
);

CREATE TRIGGER IF NOT EXISTS pages_refresh_vector_embeddings_%s AFTER UPDATE ON pages
WHEN old.url != new.url OR old.title != new.title OR old.description != new.description OR old.content != new.content BEGIN
-- If the page has associated vector embeddings, they must be recomputed when the text changes
Expand All @@ -17,9 +22,4 @@ END;

CREATE TRIGGER IF NOT EXISTS delete_embedding_on_delete_chunk_%s AFTER DELETE ON vec_chunks BEGIN
DELETE FROM pages_vec_%s WHERE id = old.id;
END;

CREATE VIRTUAL TABLE IF NOT EXISTS pages_vec_%s USING vec0(
id INTEGER PRIMARY KEY,
embedding FLOAT[%d] distance_metric=cosine
);
END;

0 comments on commit da13d59

Please sign in to comment.