Skip to content

Commit

Permalink
Replace SELECT + REPLACE INTO with one INSERT with an UPSERT clause
Browse files Browse the repository at this point in the history
This really shows you that fresh eyes are important! I thought I checked my triggers yesterday, but today, I had the idea to run the setup SQL in the `sqlite3` REPL and remove various parts until it worked. It turns out one of my triggers was broken, but I didn't catch it earlier because REPLACE INTO deletes and re-creates the entire row instead of updating it. The "4 values for 5 columns" error was indeed the problem, I just didn't look closely enough.
  • Loading branch information
FluxCapacitor2 committed Jan 8, 2025
1 parent 6e9c4a9 commit 8c8ce27
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 23 deletions.
28 changes: 6 additions & 22 deletions app/database/db_sqlite.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,12 @@ func (db *SQLiteDatabase) AddDocument(ctx context.Context, source string, depth
return id, err
}

oldID := int64(-1)
err = tx.QueryRowContext(ctx, "SELECT id FROM pages WHERE source = ? AND url = ?;", source, url).Scan(&oldID)
if err != nil && err != sql.ErrNoRows {
if err := tx.Rollback(); err != nil {
return id, err
}
return id, err
}

err = tx.QueryRowContext(ctx, "REPLACE INTO pages (source, depth, status, url, title, description, content, errorInfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?) RETURNING (id);", source, depth, status, url, title, description, content, errorInfo).Scan(&id)
err = tx.QueryRowContext(ctx, `
INSERT INTO pages (source, depth, status, url, title, description, content, errorInfo)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT DO UPDATE SET depth = min(depth, excluded.depth), status = excluded.status, title = excluded.title, description = excluded.description, content = excluded.content, errorInfo = excluded.errorInfo, crawledAt = CURRENT_TIMESTAMP
RETURNING id;
`, source, depth, status, url, title, description, content, errorInfo).Scan(&id)
if err != nil {
if err := tx.Rollback(); err != nil {
return id, err
Expand All @@ -78,18 +74,6 @@ func (db *SQLiteDatabase) AddDocument(ctx context.Context, source string, depth
}

for _, ref := range referrers {
if oldID != -1 && id != oldID && ref == oldID {
// Special case: within the transaction, if the REPLACE INTO replaces an existing row,
// it's possible that one of the referrers now points to a page that no longer exists.
// If this happens, we should just ignore it because pages shouldn't point to themselves anyways.

// 2025/01/08: I tried for ~3 hours to just use a normal UPSERT-style statement to avoid the extra SELECT:
// "INSERT INTO pages ... ON CONFLICT DO UPDATE SET ...", but I kept getting errors like "4 values for 5 columns"
// and "sub-select returns 4 columns - expected 1" even though I'm not doing any sub-selects and every sub-select
// in a trigger uses the correct number of columns (and even for regular UPDATE statements too!).
// I've decided to give up for now. :/
continue
}
if ref == id {
// Pages don't need to reference themselves
continue
Expand Down
2 changes: 1 addition & 1 deletion app/database/db_sqlite_setup.sql
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ END;

CREATE TRIGGER IF NOT EXISTS pages_auto_update AFTER UPDATE ON pages BEGIN
INSERT INTO pages_fts(pages_fts, rowid, url, title, description, content) VALUES('delete', old.rowid, old.url, old.title, old.description, old.content);
INSERT INTO pages_fts(rowid, url, title, description, content) VALUES (new.url, new.title, new.description, new.content);
INSERT INTO pages_fts(rowid, url, title, description, content) VALUES (new.rowid, new.url, new.title, new.description, new.content);
-- Remove crawl queue entry if it exists
DELETE FROM crawl_queue WHERE source = new.source AND url = new.url;
END;
Expand Down

0 comments on commit 8c8ce27

Please sign in to comment.