Skip to content

Commit

Permalink
Added return value for indicating success of add
Browse files Browse the repository at this point in the history
  • Loading branch information
whitead committed Jun 16, 2023
1 parent c889ba6 commit ed13c50
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 10 deletions.
21 changes: 13 additions & 8 deletions paperqa/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def add_file(
docname: Optional[str] = None,
dockey: Optional[DocKey] = None,
chunk_chars: int = 3000,
) -> str:
) -> Optional[str]:
"""Add a document to the collection."""
# just put in temp file and use existing method
suffix = ".txt"
Expand All @@ -144,7 +144,7 @@ def add_url(
docname: Optional[str] = None,
dockey: Optional[DocKey] = None,
chunk_chars: int = 3000,
) -> str:
) -> Optional[str]:
"""Add a document to the collection."""
import urllib.request

Expand All @@ -167,7 +167,7 @@ def add(
disable_check: bool = False,
dockey: Optional[DocKey] = None,
chunk_chars: int = 3000,
) -> str:
) -> Optional[str]:
"""Add a document to the collection."""
if dockey is None:
dockey = md5sum(path)
Expand Down Expand Up @@ -216,17 +216,21 @@ def add(
raise ValueError(
f"This does not look like a text document: {path}. Path disable_check to ignore this error."
)
self.add_texts(texts, doc)
return docname
if self.add_texts(texts, doc):
return docname
return None

def add_texts(
self,
texts: List[Text],
doc: Doc,
):
"""Add chunked texts to the collection. This is useful if you have already chunked the texts yourself."""
) -> bool:
"""Add chunked texts to the collection. This is useful if you have already chunked the texts yourself.
Returns True if the document was added, False if it was already in the collection.
"""
if doc.dockey in self.docs:
raise ValueError(f"Document {doc.dockey} already in collection.")
return False
if len(texts) == 0:
raise ValueError("No texts to add.")
if doc.docname in self.docnames:
Expand Down Expand Up @@ -255,6 +259,7 @@ def add_texts(
self.docs[doc.dockey] = doc
self.texts += texts
self.docnames.add(doc.docname)
return True

def delete(
self, name: Optional[str] = None, dockey: Optional[DocKey] = None
Expand Down
2 changes: 1 addition & 1 deletion paperqa/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.0.0"
__version__ = "3.1.0"
19 changes: 18 additions & 1 deletion tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_evidence():
docs = Docs()
docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now")
evidence = docs.get_evidence(
Answer(question="For which state was he a governor"), k=1, max_sources=1
Answer(question="For which state was Bates a governor?"), k=1, max_sources=1
)
assert "Missouri" in evidence.context
os.remove(doc_path)
Expand All @@ -121,6 +121,23 @@ def test_query():
docs.query("What is Frederick Bates's greatest accomplishment?")


def test_duplicate():
docs = Docs()
assert docs.add_url(
"https://en.wikipedia.org/wiki/Frederick_Bates_(politician)",
citation="WikiMedia Foundation, 2023, Accessed now",
dockey="test",
)
assert (
docs.add_url(
"https://en.wikipedia.org/wiki/Frederick_Bates_(politician)",
citation="WikiMedia Foundation, 2023, Accessed now",
dockey="test",
)
is None
)


class Test(IsolatedAsyncioTestCase):
async def test_aquery(self):
docs = Docs()
Expand Down

0 comments on commit ed13c50

Please sign in to comment.