ccb-hms · paynejason · Mar 28, 2024 · Mar 7, 2024 · Mar 11, 2024 · Mar 20, 2024
diff --git a/.github/workflows/python-app.yml → .github/workflows/github_test.yml b/.github/workflows/python-app.yml → .github/workflows/github_test.yml
@@ -1,7 +1,8 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 
-name: Python application
+name: GitHub Unit Testing
+run-name: Unit Testing on ${{ github.event_name }}
 
 on:
   push:
@@ -23,17 +24,12 @@ jobs:
       uses: actions/setup-python@v3
       with:
         python-version: "3.10"
-    - name: show python path
-      run: |
-        python -c "import sys; print('\n'.join(sys.path))"
+        cache: 'pip'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
-        pip install flake8 pytest
-    - name: Check package location
-      run: |
-        pip show pandas
+        pip install flake8 pytest wheel
     - name: Install text2term
       run: |
         pip install -e .

diff --git a/.github/workflows/upload_pypi.yml b/.github/workflows/upload_pypi.yml
@@ -0,0 +1,45 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Upload PyPI
+run-name: Upload ${{ github.event.release.tag_name }} to PyPI
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: write
+  id-token: write
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.10"
+        cache: 'pip'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install flake8 twine sdist wheel build
+    - name: Install text2term
+      run: |
+        pip install -e .
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Build dist/
+      run: |
+        python -m build --sdist --wheel --no-isolation --outdir dist/ .
+    - name: Upload to pypi
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/upload_testpypi.yml b/.github/workflows/upload_testpypi.yml
@@ -0,0 +1,47 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Upload Test PyPI
+run-name: Upload ${{ github.event.release.tag_name }} to Test PyPI
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: write
+  id-token: write
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.10"
+        cache: 'pip'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install flake8 twine sdist wheel build
+    - name: Install text2term
+      run: |
+        pip install -e .
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Build dist/
+      run: |
+        python -m build --sdist --wheel --no-isolation --outdir dist/ .
+    - name: Upload to pypi
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        repository-url: https://test.pypi.org/legacy/
diff --git a/requirements.txt b/requirements.txt
@@ -13,4 +13,4 @@ bioregistry~=0.10.6
 nltk~=3.8.1
 rapidfuzz~=2.13.7
 shortuuid~=1.0.11
-myst-parser~=2.0.0
+myst_parser~=2.0.0
diff --git a/test/simple_tests.py b/test/simple_tests.py
@@ -8,6 +8,7 @@
 
 pd.set_option('display.max_columns', None)
 
+
 class Text2TermTestSuite(unittest.TestCase):
 
     @classmethod
@@ -51,6 +52,7 @@ def test_caching_ontology_set(self):
         assert len(caches) == nr_ontologies_in_registry
 
     def test_mapping_to_cached_ontology(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         # Test mapping a list of terms to EFO loaded from cache
         print("Test mapping a list of terms to EFO loaded from cache...")
         mappings_efo_cache = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology="EFO",
@@ -72,23 +74,25 @@ def test_mapping_to_cached_ontology(self):
         print(f"...{mappings_match}")
         assert mappings_match is True
 
-    def test_mapping_to_cached_efo_using_syntactic_mapper(self):
+    def test_mapping_to_cached_ontology_using_syntactic_mapper(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         # Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric
-        print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...")
+        print("Test mapping a list of terms to cached ontology using Jaro-Winkler syntactic similarity metric...")
         df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", use_cache=True,
                                  mapper=text2term.Mapper.JARO_WINKLER, term_type=OntologyTermType.ANY)
         print(f"{df}\n")
         assert df.size > 0
 
-    def test_mapping_to_efo_using_ontology_acronym(self):
-        # Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry
+    def test_mapping_using_ontology_acronym(self):
+        # Test mapping a list of terms by specifying the target ontology acronym, which gets resolved by bioregistry
         print(
             "Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry")
-        df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS)
+        df2 = text2term.map_terms(["contains", "asthma"], "MONDO", term_type=OntologyTermType.CLASS)
         print(f"{df2}\n")
         assert df2.size > 0
 
     def test_mapping_tagged_terms(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         # Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output
         print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...")
         df3 = text2term.map_terms(
@@ -100,6 +104,7 @@ def test_mapping_tagged_terms(self):
         assert df3[self.TAGS_COLUMN].str.contains("measurement").any()
 
     def test_preprocessing_from_file(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         # Test processing tagged terms where the tags are provided in a file
         print("Test processing tagged terms where the tags are provided in a file...")
         tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt")
@@ -119,8 +124,7 @@ def test_mapping_to_properties(self):
 
         # Test mapping a list of properties to EFO loaded from cache and restrict search to properties
         print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...")
-        if not text2term.cache_exists("EFO"):
-            text2term.cache_ontology(ontology_url=self.EFO_URL, ontology_acronym="EFO")
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True,
                                   term_type=OntologyTermType.PROPERTY)
         print(f"{df6}\n")
@@ -184,6 +188,7 @@ def test_term_collector_iri_limit_properties_only(self):
         assert len(terms) == expected_nr_properties_with_efo_iri
 
     def test_mapping_with_min_score_filter(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         min_score = 0.6
         search_terms = ["asthma attack", "location"]
 
@@ -203,11 +208,13 @@ def test_mapping_with_min_score_filter(self):
         assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all()
 
     def test_include_unmapped_terms(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
                                  incl_unmapped=True, min_score=0.8)
         assert df[self.TAGS_COLUMN].str.contains("unmapped").any()
 
     def test_include_unmapped_terms_when_mappings_df_is_empty(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
         df = text2term.map_terms(["mojito", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
                                  incl_unmapped=True, min_score=0.8)
         assert df[self.TAGS_COLUMN].str.contains("unmapped").any()
@@ -222,6 +229,10 @@ def check_df_equals(self, df, expected_df):
         pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True)
         return True
 
+    def ensure_cache_exists(self, ontology_name, ontology_url):
+        if not text2term.cache_exists(ontology_name):
+            text2term.cache_ontology(ontology_url=ontology_url, ontology_acronym=ontology_name)
+
 
 if __name__ == '__main__':
     unittest.main()