Skip to content

Commit

Permalink
Merge pull request #6 from monarch-initiative/use-oak
Browse files Browse the repository at this point in the history
Use Ontology Access Kit in OxoEndpoint methods
  • Loading branch information
cmungall authored Apr 18, 2022
2 parents 597b21f + 83cddc7 commit 952596f
Show file tree
Hide file tree
Showing 9 changed files with 619 additions and 127 deletions.
523 changes: 518 additions & 5 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ authors = ["cmungall <cjm@berkeleybop.org>"]
python = "^3.9"
sssom = "^0.3.7"
bioregistry = "^0.4.76"
oaklib = "^0.1.6"

[tool.poetry.dev-dependencies]
pytest = "^5.2"
Expand Down
8 changes: 4 additions & 4 deletions src/mapping_walker/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def load_mappings_and_write_ptable(self):


def write_ptable(self, doc: MappingSetDocument, output: Union[Path, str, TextIO] = sys.stdout):
if not isinstance(output, TextIO):
if isinstance(output, Path) or isinstance(output, str):
output = open(str(output), 'w', encoding='utf-8')
for mapping in doc.mapping_set.mappings:
if not mapping.confidence:
Expand Down Expand Up @@ -172,11 +172,11 @@ def add_label(curie: str, label: str):
g.serialize(destination=output)

def write_prefixmap(self, doc: MappingSetDocument, output: Union[str, Path, TextIO] = sys.stdout):
if isinstance(output, TextIO):
yaml.dump(doc.prefix_map, stream=output)
else:
if isinstance(output, Path) or isinstance(output, str):
with open(output, 'w', encoding='utf-8') as stream:
yaml.dump(doc.prefix_map, stream=stream)
else:
yaml.dump(doc.prefix_map, stream=output)

@click.command()
@click.option("-v", "--verbose", count=True)
Expand Down
70 changes: 16 additions & 54 deletions src/mapping_walker/walkers/endpoints.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
import logging
from dataclasses import dataclass, field
from typing import Union, Dict, List
from typing import Dict, List, Union

import requests
import urllib
from mapping_walker.ext_schemas.oxo import Container, OntologyIdentifier, ScopeEnum
from mapping_walker.utils.oxo_utils import load_oxo_payload
from mapping_walker.utils.sssom_utils import all_curies_in_doc, get_iri_from_curie, curie_to_uri_map
from sssom.sssom_datamodel import MappingSet, Mapping, MatchTypeEnum

from mapping_walker.pipeline.pipeline_config import PipelineConfiguration, EndpointConfiguration
from oaklib.datamodels.vocabulary import IS_A
from oaklib.implementations.ols.ols_implementation import OlsImplementation
from sssom.sssom_datamodel import Mapping, MappingSet, MatchTypeEnum
from sssom.sssom_document import MappingSetDocument

oxo_pred_mappings = {
ScopeEnum.EXACT.text: 'skos:exactMatch',
ScopeEnum.BROADER.text: 'skos:broadMatch',
ScopeEnum.NARROWER.text: 'skos:narrowMatch',
ScopeEnum.RELATED.text: 'skos:closeMatch',
}
from mapping_walker.ext_schemas.oxo import OntologyIdentifier, ScopeEnum
from mapping_walker.pipeline.pipeline_config import EndpointConfiguration
from mapping_walker.utils.sssom_utils import curie_to_uri_map


@dataclass
class Endpoint:
Expand Down Expand Up @@ -47,52 +40,21 @@ def add_prefix(self, curie: str, uri: str):

@dataclass
class OxoEndpoint(Endpoint):
base_url = "https://www.ebi.ac.uk/spot/oxo/api/mappings"
ols_base_url = "https://www.ebi.ac.uk/ols/api/ontologies/"

def __post_init__(self):
self.ols = OlsImplementation()

def get_direct_mappings(self, curie: Union[str, OntologyIdentifier]) -> MappingSetDocument:
result = requests.get(self.base_url, params=dict(fromId=curie))
obj = result.json()
container = load_oxo_payload(obj)
return self.convert_payload(container)

def convert_payload(self, container: Container) -> MappingSetDocument:
oxo_mappings = container._embedded.mappings
mappings: Mapping = []
for oxo_mapping in oxo_mappings:
oxo_s = oxo_mapping.fromTerm
oxo_o = oxo_mapping.toTerm
mapping = Mapping(subject_id=oxo_s.curie,
subject_label=oxo_s.label,
subject_source=oxo_s.datasource.prefix if oxo_s.datasource else None,
predicate_id=oxo_pred_mappings[str(oxo_mapping.scope)],
match_type=MatchTypeEnum.Unspecified,
object_id=oxo_o.curie,
object_label=oxo_o.label,
object_source=oxo_o.datasource.prefix if oxo_o.datasource else None,
mapping_provider=oxo_mapping.datasource.prefix)
self.add_prefix(oxo_s.curie, oxo_s.uri)
self.add_prefix(oxo_o.curie, oxo_o.uri)
mappings.append(mapping)
ms = MappingSet(mapping_set_id=container._links.link_to_self.href,
mappings = list(self.ols.get_sssom_mappings_by_curie(curie))
ms = MappingSet(mapping_set_id=OlsImplementation.base_url,
license='http://example.org/mixed',
mappings=mappings)
return MappingSetDocument(mapping_set=ms,
prefix_map=self.prefix_map)
prefix_map=self.ols.get_prefix_map())

def get_ancestors(self, term_id: str, ontology: str = None) -> List[str]:
# must be double encoded https://www.ebi.ac.uk/ols/docs/api
term_id_quoted = urllib.parse.quote(term_id, safe='')
term_id_quoted = urllib.parse.quote(term_id_quoted, safe='')
url = f'{self.ols_base_url}{ontology}/terms/{term_id_quoted}/ancestors'
logging.debug(f'URL={url}')
result = requests.get(url)
obj = result.json()
if result.status_code == 200 and '_embedded' in obj:
ancs = [x['obo_id'] for x in obj['_embedded']['terms']]
else:
logging.debug(f'No ancestors for {url} (maybe ontology not indexed in OLS?)')
ancs = []
self.ols.focus_ontology = ontology
ancs = self.ols.ancestors(term_id, predicates=[IS_A])
return ancs

def fill_gaps(self, msdoc: MappingSetDocument, confidence: float = 1.0) -> int:
Expand Down
22 changes: 12 additions & 10 deletions tests/test_boomer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
import unittest

from linkml_runtime.loaders import json_loader, yaml_loader
from mapping_walker.pipeline.pipeline import Pipeline
Expand All @@ -8,16 +9,17 @@

from tests import INPUT_DIR, OUTPUT_DIR

class TestBoomer(unittest.TestCase):

def test_outputs():
infile = str(Path(INPUT_DIR) / 'oxo-walk.yaml')
mapping_set = yaml_loader.load(infile, target_class=MappingSet)
doc = MappingSetDocument(mapping_set=mapping_set, prefix_map={})
boomer = Pipeline()
with open(str(Path(OUTPUT_DIR) / 'oxo-walk.ptable.tsv'), 'w', encoding='utf-8') as stream:
boomer.write_ptable(doc, output=stream)
boomer.write_ontology(doc, str(Path(OUTPUT_DIR) / 'oxo-walk.ontology.ttl'))
with open(str(Path(OUTPUT_DIR) / 'oxo-walk.prefixes.yaml'), 'w', encoding='utf-8') as stream:
boomer.write_prefixmap(doc, output=stream)
def test_outputs(self):
infile = str(Path(INPUT_DIR) / 'oxo-walk.yaml')
mapping_set = yaml_loader.load(infile, target_class=MappingSet)
doc = MappingSetDocument(mapping_set=mapping_set, prefix_map={})
boomer = Pipeline()
with open(str(Path(OUTPUT_DIR) / 'oxo-walk.ptable.tsv'), 'w', encoding='utf-8') as stream:
boomer.write_ptable(doc, output=stream)
boomer.write_ontology(doc, str(Path(OUTPUT_DIR) / 'oxo-walk.ontology.ttl'))
with open(str(Path(OUTPUT_DIR) / 'oxo-walk.prefixes.yaml'), 'w', encoding='utf-8') as stream:
boomer.write_prefixmap(doc, output=stream)


29 changes: 16 additions & 13 deletions tests/test_mapping_walker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
import unittest

from mapping_walker import __version__
from mapping_walker.pipeline.pipeline import Pipeline
Expand All @@ -9,17 +10,19 @@
WD = Path(OUTPUT_DIR) / 'tmp'
CURIE = 'UBERON:0013141'

def test_pipeline():
"""
test end to end pipeline, using OxO endpoint
:return:
"""
ec = EndpointConfiguration(type=EndpointEnum.OxO)
conf = PipelineConfiguration(working_directory=str(WD),
stylesheet=str(Path(INPUT_DIR) / 'style.json'),
endpoint_configurations=[ec])
pipeline = Pipeline(configuration=conf)
result = pipeline.run(CURIE)
print(f'PNGS={result.pngs}')
assert len(result.pngs) == 2
class TestMappingWalker(unittest.TestCase):

def test_pipeline(self):
"""
test end to end pipeline, using OxO endpoint
:return:
"""
ec = EndpointConfiguration(type=EndpointEnum.OxO)
conf = PipelineConfiguration(working_directory=str(WD),
stylesheet=str(Path(INPUT_DIR) / 'style.json'),
endpoint_configurations=[ec])
pipeline = Pipeline(configuration=conf)
result = pipeline.run(CURIE)
print(f'PNGS={result.pngs}')
assert len(result.pngs) == 2

22 changes: 14 additions & 8 deletions tests/test_oxo_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
import unittest

from mapping_walker.utils.sssom_utils import save_mapping_set_doc, get_iri_from_curie

Expand All @@ -16,12 +17,17 @@

OUT_FILE = Path(OUTPUT_DIR) / 'uberon-oxo-result.yaml'

def test_oxo_endpoint():
endpoint = OxoEndpoint()
msdoc = endpoint.get_direct_mappings('UBERON:0013141')
#print(yaml_dumper.dumps(msdoc.mapping_set))
save_mapping_set_doc(msdoc, OUT_FILE)
for m in msdoc.mapping_set.mappings:
uri = get_iri_from_curie(m.object_id, msdoc)
print(f'{m.object_id} = {uri}')
class TestOxOEndpoint(unittest.TestCase):

def test_oxo_endpoint(self):
endpoint = OxoEndpoint()
msdoc = endpoint.get_direct_mappings('UBERON:0013141')
#print(yaml_dumper.dumps(msdoc.mapping_set))
save_mapping_set_doc(msdoc, OUT_FILE)
for m in msdoc.mapping_set.mappings:
uri = get_iri_from_curie(m.object_id, msdoc)
print(f'{m.object_id} = {uri}')

assert any(mapping for mapping in msdoc.mapping_set.mappings if mapping.object_id == 'FMA:45632')
assert msdoc.prefix_map['FMA']

54 changes: 28 additions & 26 deletions tests/test_oxo_walker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
import unittest

from mapping_walker.pipeline.pipeline_config import PipelineConfiguration
from mapping_walker.utils.sssom_utils import load_mapping_set_doc, save_mapping_set_doc
Expand All @@ -19,29 +20,30 @@
CURIE = 'UBERON:0013141'


def test_oxo_walker():
"""
Tests iterative walking over OxO endpoint
"""
endpoint = OxoEndpoint()
walker = MappingWalker(endpoints=[endpoint])
config = PipelineConfiguration(max_clique_size=100, max_hops=3)
msdoc = walker.walk(CURIE, config=config)
assert len(msdoc.mapping_set.mappings) > 4
with open(str(Path(OUTPUT_DIR) / 'oxo-walk.yaml'), 'w', encoding='utf-8') as stream:
stream.write(yaml_dumper.dumps(msdoc.mapping_set))

def test_oxo_fill_gaps():
"""
uses a predefined mapping file to test gap-filling using OLS
:return:
"""
msdoc = load_mapping_set_doc(str(Path(INPUT_DIR) / 'neoplasm.sssom.yaml'))
endpoint = OxoEndpoint()
n = endpoint.fill_gaps(msdoc)
print(n)
save_mapping_set_doc(msdoc, str(Path(OUTPUT_DIR) / 'neoplasm-gap-filled.sssom.yaml'))
for m in msdoc.mapping_set.mappings:
if m.predicate_id == 'rdfs:subClassOf':
print(yaml_dumper.dumps(m))
assert n > 0
class TestOxOWalker(unittest.TestCase):
def test_oxo_walker(self):
"""
Tests iterative walking over OxO endpoint
"""
endpoint = OxoEndpoint()
walker = MappingWalker(endpoints=[endpoint])
config = PipelineConfiguration(max_clique_size=100, max_hops=3)
msdoc = walker.walk(CURIE, config=config)
assert len(msdoc.mapping_set.mappings) > 4
with open(str(Path(OUTPUT_DIR) / 'oxo-walk.yaml'), 'w', encoding='utf-8') as stream:
stream.write(yaml_dumper.dumps(msdoc.mapping_set))

def test_oxo_fill_gaps(self):
"""
uses a predefined mapping file to test gap-filling using OLS
:return:
"""
msdoc = load_mapping_set_doc(str(Path(INPUT_DIR) / 'neoplasm.sssom.yaml'))
endpoint = OxoEndpoint()
n = endpoint.fill_gaps(msdoc)
print(n)
save_mapping_set_doc(msdoc, str(Path(OUTPUT_DIR) / 'neoplasm-gap-filled.sssom.yaml'))
for m in msdoc.mapping_set.mappings:
if m.predicate_id == 'rdfs:subClassOf':
print(yaml_dumper.dumps(m))
assert n > 0
17 changes: 10 additions & 7 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
import unittest

from linkml_runtime.loaders import json_loader
from mapping_walker.utils.oxo_utils import load_oxo_payload
Expand All @@ -9,11 +10,13 @@
from mapping_walker.ext_schemas.oxo import Container


def test_schema():
infile = str(Path(INPUT_DIR) / 'oxo-example-uberon.json')
with open(infile, encoding='utf-8') as stream:
obj = json.load(stream)
print(obj)
container = load_oxo_payload(obj)
print(container)
class TestSchema(unittest.TestCase):

def test_schema(self):
infile = str(Path(INPUT_DIR) / 'oxo-example-uberon.json')
with open(infile, encoding='utf-8') as stream:
obj = json.load(stream)
print(obj)
container = load_oxo_payload(obj)
print(container)

0 comments on commit 952596f

Please sign in to comment.