diff --git a/.github/workflows/python-app.yml b/.github/workflows/github_test.yml similarity index 83% rename from .github/workflows/python-app.yml rename to .github/workflows/github_test.yml index 96f008a..947449f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/github_test.yml @@ -1,7 +1,8 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Python application +name: GitHub Unit Testing +run-name: Unit Testing on ${{ github.event_name }} on: push: @@ -23,17 +24,12 @@ jobs: uses: actions/setup-python@v3 with: python-version: "3.10" - - name: show python path - run: | - python -c "import sys; print('\n'.join(sys.path))" + cache: 'pip' - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install flake8 pytest - - name: Check package location - run: | - pip show pandas + pip install flake8 pytest wheel - name: Install text2term run: | pip install -e . diff --git a/.github/workflows/upload_pypi.yml b/.github/workflows/upload_pypi.yml new file mode 100644 index 0000000..eef2a4f --- /dev/null +++ b/.github/workflows/upload_pypi.yml @@ -0,0 +1,45 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Upload PyPI +run-name: Upload ${{ github.event.release.tag_name }} to PyPI + +on: + release: + types: [published] + +permissions: + contents: write + id-token: write + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install flake8 twine sdist wheel build + - name: Install text2term + run: | + pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Build dist/ + run: | + python -m build --sdist --wheel --no-isolation --outdir dist/ . + - name: Upload to pypi + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/upload_testpypi.yml b/.github/workflows/upload_testpypi.yml new file mode 100644 index 0000000..82fec26 --- /dev/null +++ b/.github/workflows/upload_testpypi.yml @@ -0,0 +1,47 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Upload Test PyPI +run-name: Upload ${{ github.event.release.tag_name }} to Test PyPI + +on: + release: + types: [published] + +permissions: + contents: write + id-token: write + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install flake8 twine sdist wheel build + - name: Install text2term + run: | + pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Build dist/ + run: | + python -m build --sdist --wheel --no-isolation --outdir dist/ . + - name: Upload to pypi + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ diff --git a/requirements.txt b/requirements.txt index 62e5bea..cf8334d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ bioregistry~=0.10.6 nltk~=3.8.1 rapidfuzz~=2.13.7 shortuuid~=1.0.11 -myst-parser~=2.0.0 \ No newline at end of file +myst_parser~=2.0.0 \ No newline at end of file diff --git a/test/simple_tests.py b/test/simple_tests.py index fc51d21..305281e 100644 --- a/test/simple_tests.py +++ b/test/simple_tests.py @@ -8,6 +8,7 @@ pd.set_option('display.max_columns', None) + class Text2TermTestSuite(unittest.TestCase): @classmethod @@ -51,6 +52,7 @@ def test_caching_ontology_set(self): assert len(caches) == nr_ontologies_in_registry def test_mapping_to_cached_ontology(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test mapping a list of terms to EFO loaded from cache print("Test mapping a list of terms to EFO loaded from cache...") mappings_efo_cache = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology="EFO", @@ -72,23 +74,25 @@ def test_mapping_to_cached_ontology(self): print(f"...{mappings_match}") assert mappings_match is True - def test_mapping_to_cached_efo_using_syntactic_mapper(self): + def test_mapping_to_cached_ontology_using_syntactic_mapper(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric - print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...") + print("Test mapping a list of terms to cached ontology using Jaro-Winkler syntactic similarity metric...") df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", use_cache=True, mapper=text2term.Mapper.JARO_WINKLER, term_type=OntologyTermType.ANY) print(f"{df}\n") assert df.size > 0 - def test_mapping_to_efo_using_ontology_acronym(self): - # Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry + def test_mapping_using_ontology_acronym(self): + # Test mapping a list of terms by specifying the target ontology acronym, which gets resolved by bioregistry print( "Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry") - df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS) + df2 = text2term.map_terms(["contains", "asthma"], "MONDO", term_type=OntologyTermType.CLASS) print(f"{df2}\n") assert df2.size > 0 def test_mapping_tagged_terms(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...") df3 = text2term.map_terms( @@ -100,6 +104,7 @@ def test_mapping_tagged_terms(self): assert df3[self.TAGS_COLUMN].str.contains("measurement").any() def test_preprocessing_from_file(self): + self.ensure_cache_exists("EFO", self.EFO_URL) # Test processing tagged terms where the tags are provided in a file print("Test processing tagged terms where the tags are provided in a file...") tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt") @@ -119,8 +124,7 @@ def test_mapping_to_properties(self): # Test mapping a list of properties to EFO loaded from cache and restrict search to properties print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...") - if not text2term.cache_exists("EFO"): - text2term.cache_ontology(ontology_url=self.EFO_URL, ontology_acronym="EFO") + self.ensure_cache_exists("EFO", self.EFO_URL) df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True, term_type=OntologyTermType.PROPERTY) print(f"{df6}\n") @@ -184,6 +188,7 @@ def test_term_collector_iri_limit_properties_only(self): assert len(terms) == expected_nr_properties_with_efo_iri def test_mapping_with_min_score_filter(self): + self.ensure_cache_exists("EFO", self.EFO_URL) min_score = 0.6 search_terms = ["asthma attack", "location"] @@ -203,11 +208,13 @@ def test_mapping_with_min_score_filter(self): assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all() def test_include_unmapped_terms(self): + self.ensure_cache_exists("EFO", self.EFO_URL) df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, incl_unmapped=True, min_score=0.8) assert df[self.TAGS_COLUMN].str.contains("unmapped").any() def test_include_unmapped_terms_when_mappings_df_is_empty(self): + self.ensure_cache_exists("EFO", self.EFO_URL) df = text2term.map_terms(["mojito", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF, incl_unmapped=True, min_score=0.8) assert df[self.TAGS_COLUMN].str.contains("unmapped").any() @@ -222,6 +229,10 @@ def check_df_equals(self, df, expected_df): pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True) return True + def ensure_cache_exists(self, ontology_name, ontology_url): + if not text2term.cache_exists(ontology_name): + text2term.cache_ontology(ontology_url=ontology_url, ontology_acronym=ontology_name) + if __name__ == '__main__': unittest.main()