From a922ea406cd1fbc62fe85d34127a20d1f289a2df Mon Sep 17 00:00:00 2001 From: Andrew White Date: Tue, 11 Jul 2023 12:36:51 -0400 Subject: [PATCH] Improved relevance score capturing (#159) --- paperqa/chains.py | 14 ++++++- paperqa/version.py | 2 +- tests/test_paperqa.py | 94 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/paperqa/chains.py b/paperqa/chains.py index 0d0d497b..85c2c9eb 100644 --- a/paperqa/chains.py +++ b/paperqa/chains.py @@ -90,8 +90,20 @@ def make_chain( def get_score(text: str) -> int: score = re.search(r"[sS]core[:is\s]+([0-9]+)", text) + if not score: + score = re.search(r"\(([0-9])\w*\/", text) if score: - return int(score.group(1)) + s = int(score.group(1)) + if s > 10: + s = int(s / 10) # sometimes becomes out of 100 + return s + last_few = text[-15:] + scores = re.findall(r"([0-9]+)", last_few) + if scores: + s = int(scores[-1]) + if s > 10: + s = int(s / 10) # sometimes becomes out of 100 + return s if len(text) < 100: return 1 return 5 diff --git a/paperqa/version.py b/paperqa/version.py index 11731085..1da6a555 100644 --- a/paperqa/version.py +++ b/paperqa/version.py @@ -1 +1 @@ -__version__ = "3.2.0" +__version__ = "3.2.1" diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py index 46365a18..1d0dd4c2 100644 --- a/tests/test_paperqa.py +++ b/tests/test_paperqa.py @@ -12,6 +12,7 @@ from langchain.prompts import PromptTemplate from paperqa import Answer, Docs, PromptCollection, Text +from paperqa.chains import get_score from paperqa.readers import read_doc from paperqa.types import Doc from paperqa.utils import maybe_is_html, maybe_is_text, name_in_text, strings_similarity @@ -77,6 +78,99 @@ def test_name_in_text(): assert not name_in_text(name3, text7) +def test_extract_score(): + sample = """ + The text describes an experiment where different cell subtypes, + including colorectal cancer-associated fibroblasts, were treated with + oxaliplatin for 12 days. The concentration of oxaliplatin used was the + EC50 for each cell subtype, which was determined individually. + The media were changed every 3 days to avoid complete cell death. + The text does not provide information about the percentage of colorectal + cancer-associated fibroblasts that typically survive at 2 weeks when cultured + with oxaliplatin. (0/10) + """ + assert get_score(sample) == 0 + + sample = """ + COVID-19 vaccinations have been shown to be effective against hospitalization + from the Omicron and Delta variants, though effectiveness may decrease over + time. A study found that vaccine effectiveness against hospitalization peaked + around 82-92% after a third dose but declined to 53-77% 15+ weeks after the third + dose, depending on age group and hospitalization definition. Stricter + definitions of hospitalization, like requiring oxygen use or ICU admission, + showed higher and more sustained vaccine effectiveness. 8 + """ + + assert get_score(sample) == 8 + + sample = """ + Here is a 100-word summary of the text: + The text discusses a phase 3 trial of a combined + vector vaccine based on rAd26 and rAd5 vectors carrying the + SARS-CoV-2 spike protein gene. The trial aimed to assess the efficacy, + immunogenicity and safety of the vaccine against COVID-19 in adults. + The study design was a randomized, double-blind, placebo-controlled trial + done at 25 hospitals in Moscow, Russia. Eligible participants were 18 years + or older with no history of COVID-19. The exclusion criteria ensured + participants were healthy and had no contraindications for vaccination. + The trial aimed to determine if the vaccine could safely and effectively + provide protection against COVID-19. Relevance score: 8 + """ + + assert get_score(sample) == 8 + + sample = """ + Here is a 100-word summary of the provided text: The text details + trial procedures for a COVID-19 vaccine, including screening + visits, observation visits to assess vital signs, PCR testing, and + telemedicine consultations. Participants who tested positive for + COVID-19 during screening were excluded from the trial. During the trial + , additional PCR tests were only done when COVID-19 symptoms were reported + . An electronic health record platform was in place to record data from + telemedicine consultations. The text details the screening and trial + procedures but does not provide direct evidence regarding the + effectiveness of COVID-19 vaccinations. Score: 3/10 + """ + + assert get_score(sample) == 3 + + sample = """ + Here is a 100-word summary of the text: The text discusses a + phase 3 trial of a COVID-19 vaccine in Russia. The vaccine + uses a heterologous prime-boost regimen, providing robust + immune responses. The vaccine can be stored at -18°C and + 2-8°C. The study reports 91.6% efficacy against COVID-19 based on + interim analysis of over 21,000 participants. The authors + compare their results to other published COVID-19 vaccine + efficacy data. They previously published safety and immunogenicity + results from phase 1/2 trials of the same vaccine. Relevance score: + 8/10. The text provides details on the efficacy and immune response + generated by one COVID-19 vaccine in a large phase 3 trial, which is + relevant evidence to help answer the question regarding effectiveness + of COVID-19 vaccinations. + """ + + assert get_score(sample) == 8 + + sample = """ + Here is a 100-word summary of the text: The text discusses the safety and + efficacy of the BNT162b2 mRNA Covid-19 vaccine. The study found that + the vaccine was well tolerated with mostly mild to moderate side + effects. The vaccine was found to be highly effective against Covid-19, + with an observed vaccine efficacy of 90.5% after the second dose. + Severe Covid-19 cases were also reduced among vaccine recipients. + The vaccine showed an early protective effect after the first dose + and reached full efficacy 7 days after the second dose. The favorable + safety and efficacy results provide evidence that the BNT162b2 vaccine + is effective against Covid-19. The text provides data on the efficacy + and safety results from a clinical trial of the BNT162b2 Covid-19 vaccine, + which is highly relevant to answering the question about the effectiveness + of Covid-19 vaccinations. + """ + + assert get_score(sample) == 5 + + def test_docs(): llm = OpenAI(client=None, temperature=0.1, model="text-ada-001") docs = Docs(llm=llm)