Skip to content

Commit

Permalink
More flexible types (#139)
Browse files Browse the repository at this point in the history
* Added future possible types

* Added back defaults to read_doc too

* Exported more types

* Added more typing hints
  • Loading branch information
whitead committed Jun 13, 2023
1 parent be22061 commit 72ef415
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 14 deletions.
4 changes: 2 additions & 2 deletions paperqa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .docs import Answer, Docs, PromptCollection
from .docs import Answer, Docs, PromptCollection, Doc, Text
from .version import __version__

__all__ = ["Docs", "Answer", "PromptCollection", "__version__"]
__all__ = ["Docs", "Answer", "PromptCollection", "__version__", "Doc", "Text"]
7 changes: 2 additions & 5 deletions paperqa/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def add_texts(
):
"""Add chunked texts to the collection. This is useful if you have already chunked the texts yourself."""
if doc.dockey in self.docs:
raise ValueError("Document already in collection.")
raise ValueError(f"Document {doc.dockey} already in collection.")
if len(texts) == 0:
raise ValueError("No texts to add.")
if doc.docname in self.docnames:
Expand Down Expand Up @@ -261,9 +261,7 @@ async def adoc_match(
query, k=k + len(self.deleted_dockeys)
)
matched_docs = [self.docs[m.metadata["dockey"]] for m in matches]
chain = make_chain(
self.prompts.select, cast(BaseLanguageModel, self.summary_llm)
)
chain = make_chain(self.prompts.select, cast(BaseLanguageModel, self.llm))
papers = [f"{d.docname}: {d.citation}" for d in matched_docs]
result = await chain.arun( # type: ignore
question=query, papers="\n".join(papers), callbacks=get_callbacks("filter")
Expand Down Expand Up @@ -507,7 +505,6 @@ async def aquery(
else:
callbacks = get_callbacks("answer")
qa_chain = make_chain(self.prompts.qa, self.llm)
print(self.prompts.qa)
answer_text = await qa_chain.arun(
context=answer.context,
answer_length=answer.answer_length,
Expand Down
7 changes: 4 additions & 3 deletions paperqa/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
'reply "I cannot answer". '
"For each part of your answer, indicate which sources most support it "
"via valid citation markers at the end of sentences, like (Example2012). "
"Answer in an unbiased, comp rehensive, and scholarly tone. "
"Answer in an unbiased, comprehensive, and scholarly tone. "
"If the question is subjective, provide an opinionated answer in the concluding 1-2 sentences. \n\n"
"{context}\n"
"Question: {question}\n"
Expand All @@ -34,11 +34,12 @@

select_paper_prompt = PromptTemplate(
input_variables=["question", "papers"],
template="Select papers to help answer the question below. "
template="Select papers that may help answer the question below. "
"Papers are listed as $KEY: $PAPER_INFO. "
"Return a list of keys, separated by commas. "
'Return "None", if no papers are applicable. '
"Choose papers that are relevant, from reputable sources, and timely. \n\n"
"Choose papers that are relevant, from reputable sources, and timely "
"(if the question requires timely information). \n\n"
"Question: {question}\n\n"
"{papers}\n\n"
"Selected keys:",
Expand Down
4 changes: 2 additions & 2 deletions paperqa/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def parse_code_txt(path: Path, doc: Doc, chunk_chars: int, overlap: int) -> List
def read_doc(
path: Path,
doc: Doc,
chunk_chars: int,
overlap: int,
chunk_chars: int = 3000,
overlap: int = 100,
force_pypdf: bool = False,
) -> List[Text]:
"""Parse a document into chunks."""
Expand Down
7 changes: 6 additions & 1 deletion paperqa/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Any, Callable, List, Optional, Set, Union
from typing import Any, Callable, Dict, List, Optional, Set, Union

from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks.manager import (
Expand Down Expand Up @@ -106,6 +106,11 @@ class Answer(BaseModel):
dockey_filter: Optional[Set[DocKey]] = None
summary_length: str = "about 100 words"
answer_length: str = "about 100 words"
# these two below are for convenience
# and are not set. But you can set them
# if you want to use them.
cost: Optional[float] = None
token_counts: Optional[Dict[str, List[int]]] = None

def __str__(self) -> str:
"""Return the answer as a string."""
Expand Down
2 changes: 1 addition & 1 deletion paperqa/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.0.0.dev1"
__version__ = "3.0.0.dev2"

0 comments on commit 72ef415

Please sign in to comment.