From 0ee123d6a8fae58071b514b30c848cd882a33bfd Mon Sep 17 00:00:00 2001 From: Kamyab Nazari Date: Wed, 5 Jul 2023 21:24:15 +0200 Subject: [PATCH] Changing chunk sizes --- backend/ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/ingest.py b/backend/ingest.py index 41938b4..a614a79 100644 --- a/backend/ingest.py +++ b/backend/ingest.py @@ -160,9 +160,9 @@ def text_to_docs(text: List[str], metadata: Dict[str, str]) -> List[Document]: all_chunks = [] for page_num, page in text: text_splitter = RecursiveCharacterTextSplitter( - chunk_size=500, + chunk_size=3500, separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""], - chunk_overlap=100, + chunk_overlap=500, ) chunks = text_splitter.split_text(page) all_chunks.extend([(page_num, i, chunk) for i, chunk in enumerate(chunks)])