From ddf0af8ec830f3f225af2c14037d9a3696509b8e Mon Sep 17 00:00:00 2001 From: Rafael Goncalves Date: Thu, 7 Mar 2024 16:35:22 -0500 Subject: [PATCH 1/7] Some tests relied on cache that might not exist unless the whole test suite is executed --- test/simple_tests.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/test/simple_tests.py b/test/simple_tests.py index fc51d21..bbe8654 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -8,6 +8,7 @@ pd.set_option('display.max_columns', None) + class Text2TermTestSuite(unittest.TestCase): @classmethod @@ -51,6 +52,7 @@ def test_caching_ontology_set(self): assert len(caches) == nr_ontologies_in_registry def test_mapping_to_cached_ontology(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test mapping a list of terms to EFO loaded from cache print("Test mapping a list of terms to EFO loaded from cache...") mappings_efo_cache = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology="EFO", @@ -72,23 +74,25 @@ def test_mapping_to_cached_ontology(self): print(f"...{mappings_match}") assert mappings_match is True - def test_mapping_to_cached_efo_using_syntactic_mapper(self): + def test_mapping_to_cached_ontology_using_syntactic_mapper(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric - print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...") + print("Test mapping a list of terms to cached ontology using Jaro-Winkler syntactic similarity metric...") df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", use_cache=True, mapper=text2term.Mapper.JARO_WINKLER, term_type=OntologyTermType.ANY) print(f"{df}\n") assert df.size > 0 - def test_mapping_to_efo_using_ontology_acronym(self): - # Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry + def test_mapping_using_ontology_acronym(self): + # Test mapping a list of terms by specifying the target ontology acronym, which gets resolved by bioregistry print( - "Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry") - df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS) + "Test mapping a list of terms by specifying the ontology acronym, which gets resolved by bioregistry") + df2 = text2term.map_terms(["contains", "asthma"], "MONDO") print(f"{df2}\n") assert df2.size > 0 def test_mapping_tagged_terms(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...") df3 = text2term.map_terms( @@ -100,6 +104,7 @@ def test_mapping_tagged_terms(self): assert df3[self.TAGS_COLUMN].str.contains("measurement").any() def test_preprocessing_from_file(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test processing tagged terms where the tags are provided in a file print("Test processing tagged terms where the tags are provided in a file...") tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt") @@ -119,8 +124,7 @@ def test_mapping_to_properties(self): # Test mapping a list of properties to EFO loaded from cache and restrict search to properties print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...") - if not text2term.cache_exists("EFO"): - text2term.cache_ontology(ontology_url=self.EFO_URL, ontology_acronym="EFO") + self.ensure_cache_exists("EFO", self.EFO_URL) df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True, term_type=OntologyTermType.PROPERTY) print(f"{df6}\n") @@ -184,6 +188,7 @@ def test_term_collector_iri_limit_properties_only(self): assert len(terms) == expected_nr_properties_with_efo_iri def test_mapping_with_min_score_filter(self): + self.ensure_cache_exists("EFO", self.EFO_URL) min_score = 0.6 search_terms = ["asthma attack", "location"] @@ -203,11 +208,13 @@ def test_mapping_with_min_score_filter(self): assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all() def test_include_unmapped_terms(self): + self.ensure_cache_exists("EFO", self.EFO_URL) df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, incl_unmapped=True, min_score=0.8) assert df[self.TAGS_COLUMN].str.contains("unmapped").any() def test_include_unmapped_terms_when_mappings_df_is_empty(self): + self.ensure_cache_exists("EFO", self.EFO_URL) df = text2term.map_terms(["mojito", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, incl_unmapped=True, min_score=0.8) assert df[self.TAGS_COLUMN].str.contains("unmapped").any() @@ -222,6 +229,10 @@ def check_df_equals(self, df, expected_df): pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True) return True + def ensure_cache_exists(self, ontology_name, ontology_url): + if not text2term.cache_exists(ontology_name): + text2term.cache_ontology(ontology_url=ontology_url, ontology_acronym=ontology_name) + if __name__ == '__main__': unittest.main() From 6c4ff68cfe42b28d2952b0243f217192cdf80b3d Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Mon, 11 Mar 2024 11:47:38 -0400 Subject: [PATCH 2/7] Fix Testing Action Fixes the GitHub Action that automatically tests the module. Also fixes a "bug" in the testing suite introduced by errors in EFO --- .github/workflows/python-app.yml | 3 ++- test/simple_tests.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 96f008a..cf5010f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,6 +23,7 @@ jobs: uses: actions/setup-python@v3 with: python-version: "3.10" + cache: 'pip' - name: show python path run: | python -c "import sys; print('\n'.join(sys.path))" @@ -30,7 +31,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install flake8 pytest + pip install flake8 pytest wheel - name: Check package location run: | pip show pandas diff --git a/test/simple_tests.py b/test/simple_tests.py index bbe8654..305281e 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -86,8 +86,8 @@ def test_mapping_to_cached_ontology_using_syntactic_mapper(self): def test_mapping_using_ontology_acronym(self): # Test mapping a list of terms by specifying the target ontology acronym, which gets resolved by bioregistry print( - "Test mapping a list of terms by specifying the ontology acronym, which gets resolved by bioregistry") - df2 = text2term.map_terms(["contains", "asthma"], "MONDO") + "Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry") + df2 = text2term.map_terms(["contains", "asthma"], "MONDO", term_type=OntologyTermType.CLASS) print(f"{df2}\n") assert df2.size > 0 From f55f581114a8ab70aa09735b25e482f7a7ee8dc0 Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Wed, 20 Mar 2024 10:05:42 -0400 Subject: [PATCH 3/7] Clean printing Cleans up unnecessary print statements from the GitHub testing action --- .github/workflows/python-app.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index cf5010f..e99f354 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -24,17 +24,11 @@ jobs: with: python-version: "3.10" cache: 'pip' - - name: show python path - run: | - python -c "import sys; print('\n'.join(sys.path))" - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt pip install flake8 pytest wheel - - name: Check package location - run: | - pip show pandas - name: Install text2term run: | pip install -e . From f0236d96b74f4e518462a9a43dad18b5dfdba7ca Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Wed, 20 Mar 2024 13:30:59 -0400 Subject: [PATCH 4/7] Add actions to upload to PyPI Adds actions to upload to PyPI and test PyPI automatically upon release --- .../{python-app.yml => github_test.yml} | 3 +- .github/workflows/upload_pypi.yml | 45 ++++++++++++++++++ .github/workflows/upload_testpypi.yml | 47 +++++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) rename .github/workflows/{python-app.yml => github_test.yml} (94%) create mode 100644 .github/workflows/upload_pypi.yml create mode 100644 .github/workflows/upload_testpypi.yml diff --git a/.github/workflows/python-app.yml b/.github/workflows/github_test.yml similarity index 94% rename from .github/workflows/python-app.yml rename to .github/workflows/github_test.yml index e99f354..947449f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/github_test.yml @@ -1,7 +1,8 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Python application +name: GitHub Unit Testing +run-name: Unit Testing on ${{ github.event_name }} on: push: diff --git a/.github/workflows/upload_pypi.yml b/.github/workflows/upload_pypi.yml new file mode 100644 index 0000000..eef2a4f --- /dev/null +++ b/.github/workflows/upload_pypi.yml @@ -0,0 +1,45 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Upload PyPI +run-name: Upload ${{ github.event.release.tag_name }} to PyPI + +on: + release: + types: [published] + +permissions: + contents: write + id-token: write + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install flake8 twine sdist wheel build + - name: Install text2term + run: | + pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Build dist/ + run: | + python -m build --sdist --wheel --no-isolation --outdir dist/ . + - name: Upload to pypi + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/upload_testpypi.yml b/.github/workflows/upload_testpypi.yml new file mode 100644 index 0000000..82fec26 --- /dev/null +++ b/.github/workflows/upload_testpypi.yml @@ -0,0 +1,47 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Upload Test PyPI +run-name: Upload ${{ github.event.release.tag_name }} to Test PyPI + +on: + release: + types: [published] + +permissions: + contents: write + id-token: write + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install flake8 twine sdist wheel build + - name: Install text2term + run: | + pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Build dist/ + run: | + python -m build --sdist --wheel --no-isolation --outdir dist/ . + - name: Upload to pypi + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ From 77310cebf86c1d9848101eb96580c12a6da1f02d Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Mon, 25 Mar 2024 14:58:47 -0400 Subject: [PATCH 5/7] Update github_test.yml Adds a better description, but removes testing on pull in main to stop bug from occurring --- .github/workflows/github_test.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/github_test.yml b/.github/workflows/github_test.yml index 947449f..105b8a2 100644 --- a/.github/workflows/github_test.yml +++ b/.github/workflows/github_test.yml @@ -2,13 +2,11 @@ # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python name: GitHub Unit Testing -run-name: Unit Testing on ${{ github.event_name }} +run-name: Unit Testing on ${{ github.event.push.head_commit.message }} on: push: branches: [ "development" ] - pull_request: - branches: [ "main" ] permissions: contents: read From 4c1fc2e8b0eaec57027d925be362c2529e381a4b Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Mon, 25 Mar 2024 15:10:24 -0400 Subject: [PATCH 6/7] Revert "Update github_test.yml" This reverts commit 77310cebf86c1d9848101eb96580c12a6da1f02d. --- .github/workflows/github_test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/github_test.yml b/.github/workflows/github_test.yml index 105b8a2..947449f 100644 --- a/.github/workflows/github_test.yml +++ b/.github/workflows/github_test.yml @@ -2,11 +2,13 @@ # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python name: GitHub Unit Testing -run-name: Unit Testing on ${{ github.event.push.head_commit.message }} +run-name: Unit Testing on ${{ github.event_name }} on: push: branches: [ "development" ] + pull_request: + branches: [ "main" ] permissions: contents: read From 88a66807700be042f9ba7d5045cf11a23d6254e6 Mon Sep 17 00:00:00 2001 From: Jason Payne Date: Thu, 28 Mar 2024 10:43:12 -0400 Subject: [PATCH 7/7] Update requirements.txt Fixes typo in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 62e5bea..cf8334d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ bioregistry~=0.10.6 nltk~=3.8.1 rapidfuzz~=2.13.7 shortuuid~=1.0.11 -myst-parser~=2.0.0 \ No newline at end of file +myst_parser~=2.0.0 \ No newline at end of file