From a1ce4780cb678838ad1ecf0dd0388af33aff38ad Mon Sep 17 00:00:00 2001
From: FluxCapacitor2 <31071265+FluxCapacitor2@users.noreply.github.com>
Date: Fri, 18 Oct 2024 01:51:28 -0400
Subject: [PATCH] Record visits for all non-HTML pages, not just sitemaps and
 feeds

---
 app/crawler/crawler.go | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/app/crawler/crawler.go b/app/crawler/crawler.go
index 850a4f6..7812077 100644
--- a/app/crawler/crawler.go
+++ b/app/crawler/crawler.go
@@ -156,14 +156,10 @@ func Crawl(source config.Source, currentDepth int32, referrer string, db databas
 					add(link)
 				}
 			}
-		} else {
-			return
 		}
 
-		if len(urls) > 0 { // <- This will be true if URLs were found *before* an HTML document was parsed, which only happens for sitemaps/feeds.
-			// This page is a sitemap. Insert an "unindexable" document, which records that this document has been crawled, but has no text content of its own.
-			db.AddDocument(source.ID, currentDepth, referrer, canonical, database.Unindexable, "", "", "", "")
-		}
+		// This page is not an HTML document. Insert an "unindexable" document, which records that this document has been crawled, but has no text content of its own.
+		db.AddDocument(source.ID, currentDepth, referrer, canonical, database.Unindexable, "", "", "", "")
 	})
 
 	collector.OnHTML("a[href]", func(element *colly.HTMLElement) {