diff --git a/app/database/db.go b/app/database/db.go index a376735..967ec85 100644 --- a/app/database/db.go +++ b/app/database/db.go @@ -8,6 +8,8 @@ type Database interface { AddDocument(source string, depth int32, referrer string, url string, status QueueItemStatus, title string, description string, content string, errorInfo string) error // Returns whether the given URL (or the URL's canonical) is indexed HasDocument(source string, url string) (*bool, error) + // Fetch the document by URL (or the URL's canonical) + GetDocument(source string, url string) (*Page, error) // Run a fulltext search with the given query Search(sources []string, query string, page uint32, pageSize uint32) ([]Result, *uint32, error) @@ -28,16 +30,16 @@ type Database interface { } type Page struct { - Source string - Referrer string - URL string - Title string - Description string - Content string - Depth int32 - CrawledAt string - Status QueueItemStatus - ErrorInfo string + Source string `json:"source"` + Referrer string `json:"referrer"` + URL string `json:"url"` + Title string `json:"title"` + Description string `json:"description"` + Content string `json:"content"` + Depth int32 `json:"depth"` + CrawledAt string `json:"crawledAt"` + Status QueueItemStatus `json:"status"` + ErrorInfo string `json:"error"` } type Result struct { diff --git a/app/database/db_sqlite.go b/app/database/db_sqlite.go index c7baa23..39e05f5 100644 --- a/app/database/db_sqlite.go +++ b/app/database/db_sqlite.go @@ -49,6 +49,23 @@ func (db *SQLiteDatabase) HasDocument(source string, url string) (*bool, error) return &exists, nil } +func (db *SQLiteDatabase) GetDocument(source string, url string) (*Page, error) { + cursor := db.conn.QueryRow("SELECT source, referrer, url, title, description, content, depth, crawledAt, status, errorInfo FROM pages WHERE source = ? AND (url = ? OR url IN (SELECT canonical FROM canonicals WHERE url = ?));", source, url, url) + + page := Page{} + err := cursor.Scan(&page.Source, &page.Referrer, &page.URL, &page.Title, &page.Description, &page.Content, &page.Depth, &page.CrawledAt, &page.Status, &page.ErrorInfo) + + if err != nil { + if err == sql.ErrNoRows { + return nil, nil + } else { + return nil, err + } + } + + return &page, nil +} + type RawResult struct { Rank float64 URL string diff --git a/app/database/db_sqlite_test.go b/app/database/db_sqlite_test.go index 1828c44..6d5b172 100644 --- a/app/database/db_sqlite_test.go +++ b/app/database/db_sqlite_test.go @@ -145,7 +145,37 @@ func TestHasDocument(t *testing.T) { if !*res { t.Fatalf("document was not added to database: hasDocument returned false") } +} + +func TestGetDocument(t *testing.T) { + db := createDB(t) + page := Page{ + Source: "source1", + Referrer: "", + URL: "https://example.com/", + Title: "Example Domain", + Description: "", + Content: "This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.", + Depth: 1, + Status: Finished, + ErrorInfo: "", + } + + db.AddDocument(page.Source, page.Depth, page.Referrer, page.URL, page.Status, page.Title, page.Description, page.Content, page.ErrorInfo) + + doc, err := db.GetDocument("source1", "https://example.com/") + if err != nil { + t.Fatalf("error fetching document: %v", err) + } + if doc == nil { + t.Fatalf("document was not added to database: hasDocument returned false") + } + doc.CrawledAt = "" // We don't want to compare CrawledAt because it's generated when the row is added + + if !reflect.DeepEqual(page, *doc) { + t.Fatalf("document was improperly added or retrieved from the database: expected %v, got %v", page, doc) + } } func TestSearchQuery(t *testing.T) { diff --git a/app/server/server.go b/app/server/server.go index be0061e..ca39979 100644 --- a/app/server/server.go +++ b/app/server/server.go @@ -53,7 +53,7 @@ func Start(db database.Database, config *config.Config) { http.HandleFunc("/results", func(w http.ResponseWriter, req *http.Request) { // This endpoint returns results as HTML to be used on the index page (/). - // It is called by Alpine.js to show search results without a full page reload. + // It is called by HTMX to show search results without a full page reload. if req.Header.Get("HX-Request") != "" { // ^ This request was made with HTMX. Update the URL shown in the address bar to match the most recent query params.