Skip to content

Commit

Permalink
Add a method to get an indexed document's details
Browse files Browse the repository at this point in the history
  • Loading branch information
FluxCapacitor2 committed Oct 6, 2024
1 parent 0a35a4e commit d5acc4c
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 11 deletions.
22 changes: 12 additions & 10 deletions app/database/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ type Database interface {
AddDocument(source string, depth int32, referrer string, url string, status QueueItemStatus, title string, description string, content string, errorInfo string) error
// Returns whether the given URL (or the URL's canonical) is indexed
HasDocument(source string, url string) (*bool, error)
// Fetch the document by URL (or the URL's canonical)
GetDocument(source string, url string) (*Page, error)

// Run a fulltext search with the given query
Search(sources []string, query string, page uint32, pageSize uint32) ([]Result, *uint32, error)
Expand All @@ -28,16 +30,16 @@ type Database interface {
}

type Page struct {
Source string
Referrer string
URL string
Title string
Description string
Content string
Depth int32
CrawledAt string
Status QueueItemStatus
ErrorInfo string
Source string `json:"source"`
Referrer string `json:"referrer"`
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"description"`
Content string `json:"content"`
Depth int32 `json:"depth"`
CrawledAt string `json:"crawledAt"`
Status QueueItemStatus `json:"status"`
ErrorInfo string `json:"error"`
}

type Result struct {
Expand Down
17 changes: 17 additions & 0 deletions app/database/db_sqlite.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@ func (db *SQLiteDatabase) HasDocument(source string, url string) (*bool, error)
return &exists, nil
}

func (db *SQLiteDatabase) GetDocument(source string, url string) (*Page, error) {
cursor := db.conn.QueryRow("SELECT source, referrer, url, title, description, content, depth, crawledAt, status, errorInfo FROM pages WHERE source = ? AND (url = ? OR url IN (SELECT canonical FROM canonicals WHERE url = ?));", source, url, url)

page := Page{}
err := cursor.Scan(&page.Source, &page.Referrer, &page.URL, &page.Title, &page.Description, &page.Content, &page.Depth, &page.CrawledAt, &page.Status, &page.ErrorInfo)

if err != nil {
if err == sql.ErrNoRows {
return nil, nil
} else {
return nil, err
}
}

return &page, nil
}

type RawResult struct {
Rank float64
URL string
Expand Down
30 changes: 30 additions & 0 deletions app/database/db_sqlite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,37 @@ func TestHasDocument(t *testing.T) {
if !*res {
t.Fatalf("document was not added to database: hasDocument returned false")
}
}

func TestGetDocument(t *testing.T) {
db := createDB(t)

page := Page{
Source: "source1",
Referrer: "",
URL: "https://example.com/",
Title: "Example Domain",
Description: "",
Content: "This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.",
Depth: 1,
Status: Finished,
ErrorInfo: "",
}

db.AddDocument(page.Source, page.Depth, page.Referrer, page.URL, page.Status, page.Title, page.Description, page.Content, page.ErrorInfo)

doc, err := db.GetDocument("source1", "https://example.com/")
if err != nil {
t.Fatalf("error fetching document: %v", err)
}
if doc == nil {
t.Fatalf("document was not added to database: hasDocument returned false")
}
doc.CrawledAt = "" // We don't want to compare CrawledAt because it's generated when the row is added

if !reflect.DeepEqual(page, *doc) {
t.Fatalf("document was improperly added or retrieved from the database: expected %v, got %v", page, doc)
}
}

func TestSearchQuery(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion app/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func Start(db database.Database, config *config.Config) {

http.HandleFunc("/results", func(w http.ResponseWriter, req *http.Request) {
// This endpoint returns results as HTML to be used on the index page (/).
// It is called by Alpine.js to show search results without a full page reload.
// It is called by HTMX to show search results without a full page reload.

if req.Header.Get("HX-Request") != "" {
// ^ This request was made with HTMX. Update the URL shown in the address bar to match the most recent query params.
Expand Down

0 comments on commit d5acc4c

Please sign in to comment.