From 5789f5cc7dc1bc74cf7248881d291a80fedafb85 Mon Sep 17 00:00:00 2001
From: James Braza <jamesbraza@gmail.com>
Date: Tue, 29 Oct 2024 11:56:41 -0700
Subject: [PATCH 1/2] Fixed flaky test_code via retrying

---
 tests/test_paperqa.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py
index f3c4629d2..fbaae73cb 100644
--- a/tests/test_paperqa.py
+++ b/tests/test_paperqa.py
@@ -1,5 +1,6 @@
 import contextlib
 import os
+import pathlib
 import pickle
 import textwrap
 from collections.abc import AsyncIterable
@@ -43,6 +44,8 @@
 )
 from tests.conftest import VCR_DEFAULT_MATCH_ON
 
+THIS_MODULE = pathlib.Path(__file__)
+
 
 @pytest.fixture
 def docs_fixture(stub_data_dir: Path) -> Docs:
@@ -991,17 +994,18 @@ def test_chunk_metadata_reader(stub_data_dir: Path) -> None:
     assert metadata.total_parsed_text_length // 3000 <= len(chunk_text)
 
 
+@pytest.mark.flaky(reruns=2, only_rerun=["AssertionError"])  # For couldn't answer
 def test_code() -> None:
-    # load this script
-    doc_path = Path(os.path.abspath(__file__))
     settings = Settings.from_name("fast")
     docs = Docs()
-    docs.add(doc_path, "test_paperqa.py", docname="test_paperqa.py", disable_check=True)
-    assert len(docs.docs) == 1
-    assert (
-        "test_paperqa.py"
-        in docs.query("What file is read in by test_code?", settings=settings).answer
+    # load this script
+    docs.add(
+        THIS_MODULE, "test_paperqa.py", docname="test_paperqa.py", disable_check=True
     )
+    assert len(docs.docs) == 1
+    answer = docs.query("What file is read in by test_code?", settings=settings)
+    assert not answer.could_not_answer, "Expected an answer"
+    assert "test_paperqa.py" in answer.answer
 
 
 def test_zotero() -> None:

From f7b41f9f52f54243c3f98538491389af00cdf433 Mon Sep 17 00:00:00 2001
From: James Braza <jamesbraza@gmail.com>
Date: Tue, 29 Oct 2024 11:59:32 -0700
Subject: [PATCH 2/2] Fixed flaky test minimal field filtering

---
 tests/test_clients.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/test_clients.py b/tests/test_clients.py
index 02114f409..6cf90fd25 100644
--- a/tests/test_clients.py
+++ b/tests/test_clients.py
@@ -320,12 +320,20 @@ async def test_minimal_fields_filtering() -> None:
             "semantic_scholar",
             "crossref",
         }, "Should be from two sources"
-        assert details.citation == (
+        citation_boilerplate = (
             "Unknown author(s). Augmenting large language models with chemistry tools."
             " Unknown journal, Unknown year. URL:"
-            " https://doi.org/10.1038/s42256-024-00832-8,"
-            " doi:10.1038/s42256-024-00832-8."
-        ), "Citation should be populated"
+        )
+        assert details.citation in {
+            (  # Match in Nature Machine Intelligence
+                f"{citation_boilerplate} https://doi.org/10.1038/s42256-024-00832-8,"
+                " doi:10.1038/s42256-024-00832-8."
+            ),
+            (  # Match in arXiv
+                f"{citation_boilerplate} https://doi.org/10.48550/arxiv.2304.05376,"
+                " doi:10.48550/arxiv.2304.05376."
+            ),
+        }, "Citation should be populated"
         assert not details.source_quality, "No source quality data should exist"