From c530e65b11410968781e88acf0cfbc0c6391080f Mon Sep 17 00:00:00 2001 From: Talha SARI Date: Mon, 6 Nov 2023 17:49:37 +0300 Subject: [PATCH 1/2] update web_page_reader.py --- autollm/utils/web_page_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autollm/utils/web_page_reader.py b/autollm/utils/web_page_reader.py index 45abc6a9..32cedae6 100644 --- a/autollm/utils/web_page_reader.py +++ b/autollm/utils/web_page_reader.py @@ -68,7 +68,7 @@ def load_data(self, url: str) -> List[Document]: tag.decompose() content = " ".join(soup.stripped_strings) - document = Document(text=content, metadata={"url": url}) + document = Document(doc_id=url, text=content, metadata={"url": url}) return [document] From 913269401b7a430f2dc55a5d6b42221622cee216 Mon Sep 17 00:00:00 2001 From: Talha SARI Date: Mon, 6 Nov 2023 17:54:39 +0300 Subject: [PATCH 2/2] minor fix --- autollm/utils/web_page_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autollm/utils/web_page_reader.py b/autollm/utils/web_page_reader.py index 32cedae6..522d2fb9 100644 --- a/autollm/utils/web_page_reader.py +++ b/autollm/utils/web_page_reader.py @@ -68,7 +68,7 @@ def load_data(self, url: str) -> List[Document]: tag.decompose() content = " ".join(soup.stripped_strings) - document = Document(doc_id=url, text=content, metadata={"url": url}) + document = Document(id_=url, text=content, metadata={"url": url}) return [document]