Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing testing #48

Merged
merged 7 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application
name: GitHub Unit Testing
run-name: Unit Testing on ${{ github.event_name }}

on:
push:
Expand All @@ -23,17 +24,12 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: show python path
run: |
python -c "import sys; print('\n'.join(sys.path))"
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install flake8 pytest
- name: Check package location
run: |
pip show pandas
pip install flake8 pytest wheel
- name: Install text2term
run: |
pip install -e .
Expand Down
45 changes: 45 additions & 0 deletions .github/workflows/upload_pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Upload PyPI
run-name: Upload ${{ github.event.release.tag_name }} to PyPI

on:
release:
types: [published]

permissions:
contents: write
id-token: write

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install flake8 twine sdist wheel build
- name: Install text2term
run: |
pip install -e .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Build dist/
run: |
python -m build --sdist --wheel --no-isolation --outdir dist/ .
- name: Upload to pypi
uses: pypa/gh-action-pypi-publish@release/v1
47 changes: 47 additions & 0 deletions .github/workflows/upload_testpypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Upload Test PyPI
run-name: Upload ${{ github.event.release.tag_name }} to Test PyPI

on:
release:
types: [published]

permissions:
contents: write
id-token: write

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install flake8 twine sdist wheel build
- name: Install text2term
run: |
pip install -e .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Build dist/
run: |
python -m build --sdist --wheel --no-isolation --outdir dist/ .
- name: Upload to pypi
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ bioregistry~=0.10.6
nltk~=3.8.1
rapidfuzz~=2.13.7
shortuuid~=1.0.11
myst-parser~=2.0.0
myst_parser~=2.0.0
25 changes: 18 additions & 7 deletions test/simple_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

pd.set_option('display.max_columns', None)


class Text2TermTestSuite(unittest.TestCase):

@classmethod
Expand Down Expand Up @@ -51,6 +52,7 @@ def test_caching_ontology_set(self):
assert len(caches) == nr_ontologies_in_registry

def test_mapping_to_cached_ontology(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
# Test mapping a list of terms to EFO loaded from cache
print("Test mapping a list of terms to EFO loaded from cache...")
mappings_efo_cache = text2term.map_terms(["asthma", "disease location", "food allergy"], target_ontology="EFO",
Expand All @@ -72,23 +74,25 @@ def test_mapping_to_cached_ontology(self):
print(f"...{mappings_match}")
assert mappings_match is True

def test_mapping_to_cached_efo_using_syntactic_mapper(self):
def test_mapping_to_cached_ontology_using_syntactic_mapper(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
# Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric
print("Test mapping a list of terms to cached EFO using Jaro-Winkler syntactic similarity metric...")
print("Test mapping a list of terms to cached ontology using Jaro-Winkler syntactic similarity metric...")
df = text2term.map_terms(["asthma", "disease location", "food allergy"], "EFO", use_cache=True,
mapper=text2term.Mapper.JARO_WINKLER, term_type=OntologyTermType.ANY)
print(f"{df}\n")
assert df.size > 0

def test_mapping_to_efo_using_ontology_acronym(self):
# Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry
def test_mapping_using_ontology_acronym(self):
# Test mapping a list of terms by specifying the target ontology acronym, which gets resolved by bioregistry
print(
"Test mapping a list of terms to EFO by specifying the ontology acronym, which gets resolved by bioregistry")
df2 = text2term.map_terms(["contains", "asthma"], "EFO", term_type=OntologyTermType.CLASS)
df2 = text2term.map_terms(["contains", "asthma"], "MONDO", term_type=OntologyTermType.CLASS)
print(f"{df2}\n")
assert df2.size > 0

def test_mapping_tagged_terms(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
# Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output
print("Test mapping a dictionary of tagged terms to cached EFO, and include unmapped terms in the output...")
df3 = text2term.map_terms(
Expand All @@ -100,6 +104,7 @@ def test_mapping_tagged_terms(self):
assert df3[self.TAGS_COLUMN].str.contains("measurement").any()

def test_preprocessing_from_file(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
# Test processing tagged terms where the tags are provided in a file
print("Test processing tagged terms where the tags are provided in a file...")
tagged_terms = text2term.preprocess_tagged_terms("simple_preprocess.txt")
Expand All @@ -119,8 +124,7 @@ def test_mapping_to_properties(self):

# Test mapping a list of properties to EFO loaded from cache and restrict search to properties
print("Test mapping a list of properties to EFO loaded from cache and restrict search to properties...")
if not text2term.cache_exists("EFO"):
text2term.cache_ontology(ontology_url=self.EFO_URL, ontology_acronym="EFO")
self.ensure_cache_exists("EFO", self.EFO_URL)
df6 = text2term.map_terms(source_terms=["contains", "location"], target_ontology="EFO", use_cache=True,
term_type=OntologyTermType.PROPERTY)
print(f"{df6}\n")
Expand Down Expand Up @@ -184,6 +188,7 @@ def test_term_collector_iri_limit_properties_only(self):
assert len(terms) == expected_nr_properties_with_efo_iri

def test_mapping_with_min_score_filter(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
min_score = 0.6
search_terms = ["asthma attack", "location"]

Expand All @@ -203,11 +208,13 @@ def test_mapping_with_min_score_filter(self):
assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all()

def test_include_unmapped_terms(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
incl_unmapped=True, min_score=0.8)
assert df[self.TAGS_COLUMN].str.contains("unmapped").any()

def test_include_unmapped_terms_when_mappings_df_is_empty(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
df = text2term.map_terms(["mojito", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
incl_unmapped=True, min_score=0.8)
assert df[self.TAGS_COLUMN].str.contains("unmapped").any()
Expand All @@ -222,6 +229,10 @@ def check_df_equals(self, df, expected_df):
pd.testing.assert_frame_equal(df, expected_df, check_names=False, check_like=True)
return True

def ensure_cache_exists(self, ontology_name, ontology_url):
if not text2term.cache_exists(ontology_name):
text2term.cache_ontology(ontology_url=ontology_url, ontology_acronym=ontology_name)


if __name__ == '__main__':
unittest.main()
Loading