Skip to content

Commit

Permalink
For #2273
Browse files Browse the repository at this point in the history
  • Loading branch information
mugitty committed Mar 28, 2024
1 parent 74d6c54 commit b64cb20
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 3 deletions.
5 changes: 4 additions & 1 deletion bin/ontobio-parse-assocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,11 @@ def main():
rule_set = assocparser.RuleSet.ALL

goref_metadata = None
goref_species = None
if args.metadata_dir:
absolute_metadata = os.path.abspath(args.metadata_dir)
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
goref_species = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")

# set configuration
filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
Expand All @@ -161,6 +163,7 @@ def main():
allow_unmapped_eco=args.allow_unmapped_eco,
gpi_authority_path=args.gpi,
goref_metadata=goref_metadata,
goref_species=goref_species,
rule_set=rule_set
)
p = None
Expand Down
8 changes: 7 additions & 1 deletion bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def create_parser(config, group, dataset, format="gaf"):
"""

@tools.gzips
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, goref_species=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")
config = assocparser.AssocParserConfig(
ontology=ontology_graph,
Expand All @@ -220,6 +220,7 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False,
paint=paint,
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
goref_species=goref_species,
entity_idspaces=db_entities,
group_idspace=group_idspace,
suppress_rule_reporting_tags=suppress_rule_reporting_tags,
Expand Down Expand Up @@ -516,9 +517,11 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
# extract the titles for the go rules, this is a dictionary comprehension
rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
goref_species = metadata.yaml_set(absolute_metadata, "go-reference-species", "taxon_id")

click.echo("Found {} GO Rules".format(len(rule_metadata.keys())))
click.echo("Found {} GO_REFs".format(len(goref_metadata.keys())))
click.echo("Found {} GO_REF Species".format(len(goref_species)))

paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")
noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua")
Expand Down Expand Up @@ -546,6 +549,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
group=group,
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
goref_species=goref_species,
db_entities=db_entities,
group_idspace=group_ids,
suppress_rule_reporting_tags=suppress_rule_reporting_tag,
Expand Down Expand Up @@ -654,6 +658,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):

goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
goref_species = metadata.yaml_set(absolute_metadata, "go-reference-species", "taxon_id")

click.echo("Found {} GO Rules".format(len(gorule_metadata.keys())))

Expand All @@ -667,6 +672,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
config = assocparser.AssocParserConfig(
ontology=ontology_graph,
goref_metadata=goref_metadata,
goref_species=goref_species,
entity_idspaces=db_entities,
group_idspace=group_ids,
annotation_inferences=gaferences,
Expand Down
2 changes: 2 additions & 0 deletions ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def __init__(self,
paint=False,
rule_metadata=dict(),
goref_metadata=None,
goref_species=None,
group_metadata=None,
dbxrefs=None,
suppress_rule_reporting_tags=[],
Expand All @@ -255,6 +256,7 @@ def __init__(self,
self.paint = paint
self.rule_metadata = rule_metadata
self.goref_metadata = goref_metadata
self.goref_species = goref_species
self.group_metadata = group_metadata
self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
self.annotation_inferences = annotation_inferences
Expand Down
17 changes: 17 additions & 0 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,22 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP

return self._result(True)

class GoRule64(GoRule):

def __init__(self):
super().__init__("GORULE:0000064", "TreeGrafter ('GO_REF:0000118') IEAs should be filtered for GO reference species", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
references = [str(ref) for ref in annotation.evidence.has_supporting_reference]
evidence = str(annotation.evidence.type)


#TreeGrafter reference is GO_REF:0000118
if evidence in [iea_eco] and 'GO_REF:0000118' in references and (config.goref_species is not None and str(annotation.subject.taxon) in config.goref_species):
return self._result(False)

return self._result(True)

GoRules = enum.Enum("GoRules", {
"GoRule02": GoRule02(),
"GoRule05": GoRule05(),
Expand All @@ -951,6 +967,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule58": GoRule58(),
"GoRule61": GoRule61(),
"GoRule63": GoRule63(),
"GoRule64": GoRule64(),
# GoRule13 at the bottom in order to make all other rules clean up an annotation before reaching 13
"GoRule13": GoRule13()
})
Expand Down
15 changes: 15 additions & 0 deletions ontobio/validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,18 @@ def groups(metadata) -> Set[str]:
raise click.ClickException("Could not find or read {}: {}".format(groups_path, str(e)))

return set([group["shorthand"] for group in groups_list])


def yaml_set(metadata, yaml_file_name, field) -> Set[str]:
yaml_path = os.path.join(os.path.abspath(metadata), yaml_file_name)
try:
with open(yaml_path, "r") as yaml_file:
click.echo("Found yaml file at {path}".format(path=yaml_path))
yaml_list = yaml.load(yaml_file, Loader=yaml.FullLoader)
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(yaml_path, str(e)))

return set([yaml[field] for yaml in yaml_list])



30 changes: 29 additions & 1 deletion tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,34 @@ def test_go_rule_63():
assoc = make_annotation(evidence="ISO", withfrom="").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.WARNING

def test_go_rule_64():
config = assocparser.AssocParserConfig(
goref_species={
"NCBITaxon:7227", "NCBITaxon:123"
},
rule_set=assocparser.RuleSet.ALL
)

assoc = make_annotation(evidence="IEA", references="GO_REF:0000118").associations[0]
assoc.subject.taxon = Curie.from_str("NCBITaxon:678")
test_result = qc.GoRule64().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(evidence="IBA", references="GO_REF:0000118").associations[0]
assoc.subject.taxon = Curie.from_str("NCBITaxon:123")
test_result = qc.GoRule64().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(evidence="IEA", references="GO_REF:0000123").associations[0]
assoc.subject.taxon = Curie.from_str("NCBITaxon:123")
test_result = qc.GoRule64().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(evidence="IEA", references="GO_REF:0000118").associations[0]
assoc.subject.taxon = Curie.from_str("NCBITaxon:7227")
test_result = qc.GoRule64().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

def test_all_rules():
# pass
Expand All @@ -791,7 +819,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 25
assert len(test_results.keys()) == 26
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down

0 comments on commit b64cb20

Please sign in to comment.