diff --git a/bin/ontobio-parse-assocs.py b/bin/ontobio-parse-assocs.py index 14c49a4c..a5313046 100755 --- a/bin/ontobio-parse-assocs.py +++ b/bin/ontobio-parse-assocs.py @@ -143,9 +143,11 @@ def main(): rule_set = assocparser.RuleSet.ALL goref_metadata = None + goref_species = None if args.metadata_dir: absolute_metadata = os.path.abspath(args.metadata_dir) - goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) + goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) + goref_species = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id") # set configuration filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None @@ -161,6 +163,7 @@ def main(): allow_unmapped_eco=args.allow_unmapped_eco, gpi_authority_path=args.gpi, goref_metadata=goref_metadata, + goref_species=goref_species, rule_set=rule_set ) p = None diff --git a/bin/validate.py b/bin/validate.py index 9dd3f35d..ac3e658c 100755 --- a/bin/validate.py +++ b/bin/validate.py @@ -210,7 +210,7 @@ def create_parser(config, group, dataset, format="gaf"): """ @tools.gzips -def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL): +def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, goref_species=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL): filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w") config = assocparser.AssocParserConfig( ontology=ontology_graph, @@ -220,6 +220,7 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, paint=paint, rule_metadata=rule_metadata, goref_metadata=goref_metadata, + goref_species=goref_species, entity_idspaces=db_entities, group_idspace=group_idspace, suppress_rule_reporting_tags=suppress_rule_reporting_tags, @@ -516,9 +517,11 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base # extract the titles for the go rules, this is a dictionary comprehension rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules")) goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) + goref_species = metadata.yaml_set(absolute_metadata, "go-reference-species", "taxon_id") click.echo("Found {} GO Rules".format(len(rule_metadata.keys()))) click.echo("Found {} GO_REFs".format(len(goref_metadata.keys()))) + click.echo("Found {} GO_REF Species".format(len(goref_species))) paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint") noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua") @@ -546,6 +549,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base group=group, rule_metadata=rule_metadata, goref_metadata=goref_metadata, + goref_species=goref_species, db_entities=db_entities, group_idspace=group_ids, suppress_rule_reporting_tags=suppress_rule_reporting_tag, @@ -654,6 +658,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file): goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules")) + goref_species = metadata.yaml_set(absolute_metadata, "go-reference-species", "taxon_id") click.echo("Found {} GO Rules".format(len(gorule_metadata.keys()))) @@ -667,6 +672,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file): config = assocparser.AssocParserConfig( ontology=ontology_graph, goref_metadata=goref_metadata, + goref_species=goref_species, entity_idspaces=db_entities, group_idspace=group_ids, annotation_inferences=gaferences, diff --git a/ontobio/io/assocparser.py b/ontobio/io/assocparser.py index 28728900..622f0109 100644 --- a/ontobio/io/assocparser.py +++ b/ontobio/io/assocparser.py @@ -231,6 +231,7 @@ def __init__(self, paint=False, rule_metadata=dict(), goref_metadata=None, + goref_species=None, group_metadata=None, dbxrefs=None, suppress_rule_reporting_tags=[], @@ -255,6 +256,7 @@ def __init__(self, self.paint = paint self.rule_metadata = rule_metadata self.goref_metadata = goref_metadata + self.goref_species = goref_species self.group_metadata = group_metadata self.suppress_rule_reporting_tags = suppress_rule_reporting_tags self.annotation_inferences = annotation_inferences diff --git a/ontobio/io/qc.py b/ontobio/io/qc.py index dd89f745..fc950a4b 100644 --- a/ontobio/io/qc.py +++ b/ontobio/io/qc.py @@ -925,6 +925,22 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP return self._result(True) +class GoRule64(GoRule): + + def __init__(self): + super().__init__("GORULE:0000064", "TreeGrafter ('GO_REF:0000118') IEAs should be filtered for GO reference species", FailMode.HARD) + + def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult: + references = [str(ref) for ref in annotation.evidence.has_supporting_reference] + evidence = str(annotation.evidence.type) + + + #TreeGrafter reference is GO_REF:0000118 + if evidence in [iea_eco] and 'GO_REF:0000118' in references and (config.goref_species is not None and str(annotation.subject.taxon) in config.goref_species): + return self._result(False) + + return self._result(True) + GoRules = enum.Enum("GoRules", { "GoRule02": GoRule02(), "GoRule05": GoRule05(), @@ -951,6 +967,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP "GoRule58": GoRule58(), "GoRule61": GoRule61(), "GoRule63": GoRule63(), + "GoRule64": GoRule64(), # GoRule13 at the bottom in order to make all other rules clean up an annotation before reaching 13 "GoRule13": GoRule13() }) diff --git a/ontobio/validation/metadata.py b/ontobio/validation/metadata.py index 322ab489..8b370635 100644 --- a/ontobio/validation/metadata.py +++ b/ontobio/validation/metadata.py @@ -171,3 +171,18 @@ def groups(metadata) -> Set[str]: raise click.ClickException("Could not find or read {}: {}".format(groups_path, str(e))) return set([group["shorthand"] for group in groups_list]) + + +def yaml_set(metadata, yaml_file_name, field) -> Set[str]: + yaml_path = os.path.join(os.path.abspath(metadata), yaml_file_name) + try: + with open(yaml_path, "r") as yaml_file: + click.echo("Found yaml file at {path}".format(path=yaml_path)) + yaml_list = yaml.load(yaml_file, Loader=yaml.FullLoader) + except Exception as e: + raise click.ClickException("Could not find or read {}: {}".format(yaml_path, str(e))) + + return set([yaml[field] for yaml in yaml_list]) + + + diff --git a/tests/test_qc.py b/tests/test_qc.py index 64514d4a..75479d4a 100644 --- a/tests/test_qc.py +++ b/tests/test_qc.py @@ -775,6 +775,34 @@ def test_go_rule_63(): assoc = make_annotation(evidence="ISO", withfrom="").associations[0] test_result = qc.GoRule63().test(assoc, all_rules_config()) assert test_result.result_type == qc.ResultType.WARNING + +def test_go_rule_64(): + config = assocparser.AssocParserConfig( + goref_species={ + "NCBITaxon:7227", "NCBITaxon:123" + }, + rule_set=assocparser.RuleSet.ALL + ) + + assoc = make_annotation(evidence="IEA", references="GO_REF:0000118").associations[0] + assoc.subject.taxon = Curie.from_str("NCBITaxon:678") + test_result = qc.GoRule64().test(assoc, config) + assert test_result.result_type == qc.ResultType.PASS + + assoc = make_annotation(evidence="IBA", references="GO_REF:0000118").associations[0] + assoc.subject.taxon = Curie.from_str("NCBITaxon:123") + test_result = qc.GoRule64().test(assoc, config) + assert test_result.result_type == qc.ResultType.PASS + + assoc = make_annotation(evidence="IEA", references="GO_REF:0000123").associations[0] + assoc.subject.taxon = Curie.from_str("NCBITaxon:123") + test_result = qc.GoRule64().test(assoc, config) + assert test_result.result_type == qc.ResultType.PASS + + assoc = make_annotation(evidence="IEA", references="GO_REF:0000118").associations[0] + assoc.subject.taxon = Curie.from_str("NCBITaxon:7227") + test_result = qc.GoRule64().test(assoc, config) + assert test_result.result_type == qc.ResultType.ERROR def test_all_rules(): # pass @@ -791,7 +819,7 @@ def test_all_rules(): assoc = gafparser.to_association(a).associations[0] test_results = qc.test_go_rules(assoc, config).all_results - assert len(test_results.keys()) == 25 + assert len(test_results.keys()) == 26 assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS