Skip to content

Commit

Permalink
fix conflicts with master/main
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed May 17, 2024
2 parents be86591 + 44ad77a commit dd89b63
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 10 deletions.
13 changes: 11 additions & 2 deletions bin/ontobio-parse-assocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ def main():
parser.add_argument("-g", "--gpi", type=str, required=False, default=None,
help="GPI file")
parser.add_argument("-m", "--metadata_dir", type=dir_path, required=False,
help="Path to metadata directory")
help="Path to metadata directory")
parser.add_argument("--retracted_pub_set", type=argparse.FileType('r'), required=False,
help="Path to retracted publications file")
parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set",
help="Set of rules to be run. Default is no rules to be run, with the exception \
of gorule-0000027 and gorule-0000020. See command line documentation in the \
Expand Down Expand Up @@ -144,11 +146,17 @@ def main():
rule_set = assocparser.RuleSet.ALL

goref_metadata = None
ref_species_metadata = None
ref_species_metadata = None
if args.metadata_dir:
absolute_metadata = os.path.abspath(args.metadata_dir)
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")

retracted_pub_set = None
if args.retracted_pub_set:
retracted_pub_set = metadata.retracted_pub_set(args.retracted_pub_set.name)
elif args.metadata_dir:
retracted_pub_set = metadata.retracted_pub_set_from_meta(absolute_metadata)

# set configuration
filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
Expand All @@ -165,6 +173,7 @@ def main():
gpi_authority_path=args.gpi,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
rule_set=rule_set
)
p = None
Expand Down
50 changes: 45 additions & 5 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from ontobio.validation import tools
from ontobio.validation import rules


from typing import Dict, Set

# logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.WARNING)
Expand Down Expand Up @@ -223,7 +224,7 @@ def create_parser(config, group, dataset, format="gaf"):

@tools.gzips
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None,
goref_metadata=None, ref_species_metadata=None, db_entities=None, group_idspace=None,
goref_metadata=None, ref_species_metadata=None, retracted_pub_set=None, db_entities=None, group_idspace=None,
format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None,
extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2",
rule_set=assocparser.RuleSet.ALL) -> list[str]:
Expand All @@ -237,6 +238,7 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False,
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
entity_idspaces=db_entities,
group_idspace=group_idspace,
suppress_rule_reporting_tags=suppress_rule_reporting_tags,
Expand Down Expand Up @@ -610,9 +612,32 @@ def cli(ctx, verbose):
@click.option("--only-dataset", default=None)
@click.option("--gaf-output-version", default="2.2", type=click.Choice(["2.1", "2.2"]))
@click.option("--rule-set", "-l", "rule_set", default=[assocparser.RuleSet.ALL], multiple=True)
@click.option("--retracted_pub_set", type=click.Path(exists=True), default=None, required=False, help="Path to retracted publications file")
def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target, ontology, exclude, base_download_url,
suppress_rule_reporting_tag, skip_existing_files, gaferencer_file, only_dataset, gaf_output_version,
rule_set):
rule_set, retracted_pub_set):
"""
Produce GAF, GPI, and TTL files for a group.
This command will download the GAF files for a group, validate them, and then produce GPI and TTL files.
:param ctx: Click context
:param group: The group to produce files for
:param metadata_dir: The directory containing the metadata files
:param gpad: Produce GPAD files
:param gpad_gpi_output_version: The version of the GPAD and GPI files to produce
:param ttl: Produce TTL files
:param target: The directory to put the files in
:param ontology: The ontology to use for validation
:param exclude: Datasets to exclude
:param base_download_url: The base URL to download files from
:param suppress_rule_reporting_tag: Tags to suppress in the rule reporting
:param skip_existing_files: Skip downloading files that already exist
:param gaferencer_file: The path to the Gaferencer output file
:param only_dataset: Only process a single dataset
:param gaf_output_version: The version of the GAF files to produce
:param rule_set: The rule set to use
:param retracted_pub_set: The path to the retracted publications file
"""
logger.info("Logging is verbose")
products = {
"gaf": True,
Expand Down Expand Up @@ -650,7 +675,7 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target

db_entities = metadata.database_entities(absolute_metadata)
group_ids = metadata.groups(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)

gaferences = None
if gaferencer_file:
Expand All @@ -660,6 +685,12 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
if rule_set == (assocparser.RuleSet.ALL,):
rule_set = assocparser.RuleSet.ALL

retracted_pubs = None
if retracted_pub_set:
retracted_pubs = metadata.retracted_pub_set(retracted_pub_set)
else:
retracted_pubs = metadata.retracted_pub_set_from_meta(absolute_metadata)

for dataset_metadata, source_gaf in downloaded_gaf_sources:
dataset = dataset_metadata["dataset"]
# Set paint to True when the group is "paint".
Expand Down Expand Up @@ -772,13 +803,14 @@ def paint(group, dataset, metadata, target, ontology):
absolute_target = os.path.abspath(target)
os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")

paint_src_gaf = check_and_download_mixin_source(paint_metadata, dataset, absolute_target)

click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology)

gpi_path = os.path.join(absolute_target, "groups", dataset, "{}.gpi".format(dataset))
click.echo("Using GPI at {}".format(gpi_path))
click.echo("Using GPI at {}".format(gpi_path))
paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi_path)


Expand All @@ -788,7 +820,8 @@ def paint(group, dataset, metadata, target, ontology):
@click.option("--ontology", type=click.Path(), required=True)
@click.option("--gaferencer-file", "-I", type=click.Path(exists=True), default=None, required=False,
help="Path to Gaferencer output to be used for inferences")
def rule(metadata_dir, out, ontology, gaferencer_file):
@click.option("--retracted_pub_set", type=click.Path(exists=True), default=None, required=False, help="Path to retracted publications file")
def rule(metadata_dir, out, ontology, gaferencer_file, retracted_pub_set):
absolute_metadata = os.path.abspath(metadata_dir)

click.echo("Loading ontology: {}...".format(ontology))
Expand All @@ -797,6 +830,12 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")
retracted_pubs = None
if retracted_pub_set:
retracted_pubs = metadata.retracted_pub_set(retracted_pub_set)
else:
retracted_pubs = metadata.retracted_pub_set_from_meta(absolute_metadata)


click.echo("Found {} GO Rules".format(len(gorule_metadata.keys())))

Expand All @@ -811,6 +850,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
ontology=ontology_graph,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pubs,
entity_idspaces=db_entities,
group_idspace=group_ids,
annotation_inferences=gaferences,
Expand Down
2 changes: 1 addition & 1 deletion ontobio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import

__version__ = '2.8.24'
__version__ = '2.8.25'


from .ontol_factory import OntologyFactory
Expand Down
2 changes: 2 additions & 0 deletions ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def __init__(self,
ref_species_metadata=None,
group_metadata=None,
dbxrefs=None,
retracted_pub_set=None,
suppress_rule_reporting_tags=[],
annotation_inferences=None,
extensions_constraints=None,
Expand All @@ -258,6 +259,7 @@ def __init__(self,
self.goref_metadata = goref_metadata
self.ref_species_metadata = ref_species_metadata
self.group_metadata = group_metadata
self.retracted_pub_set = retracted_pub_set
self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
self.annotation_inferences = annotation_inferences
self.entity_idspaces = entity_idspaces
Expand Down
15 changes: 15 additions & 0 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,20 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return self._result(bool(withfrom))
else:
return self._result(True)

class GoRule22(GoRule):

def __init__(self):
super().__init__("GORULE:0000022", "Check for, and filter, annotations made to retracted publications", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
if config.retracted_pub_set is not None:
references = annotation.evidence.has_supporting_reference
for ref in references:
ref = str(ref)
if ref in config.retracted_pub_set:
return self._result(False)
return self._result(True)


class GoRule26(GoRule):
Expand Down Expand Up @@ -952,6 +966,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule16": GoRule16(),
"GoRule17": GoRule17(),
"GoRule18": GoRule18(),
"GoRule22": GoRule22(),
"GoRule26": GoRule26(),
"GoRule28": GoRule28(),
"GoRule29": GoRule29(),
Expand Down
27 changes: 26 additions & 1 deletion ontobio/validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,32 @@ def yaml_set(metadata, yaml_file_name, field) -> Set[str]:
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(yaml_path, str(e)))

return set([yaml[field] for yaml in yaml_list])
return set([yaml[field] for yaml in yaml_list])

def retracted_pub_set_from_meta(metadata) -> Set:
retracted_path = os.path.join(metadata, "retracted-publications.txt")
if os.access(retracted_path, os.R_OK):
return retracted_pub_set_use_abspath(retracted_path)
else:
return set()

def retracted_pub_set(abspath_retracted_file) -> Set:
return retracted_pub_set_use_abspath(os.path.abspath(abspath_retracted_file))

def retracted_pub_set_use_abspath(abspath_retracted_file) -> Set:
try:
retracted_pubs = None
with open(abspath_retracted_file, "r") as f:
retracted_pubs = set()
for line in f:
li=line.strip()
if not li.startswith("!"):
if "," in li:
li = li.partition(',')[0]
retracted_pubs.add(li)
return retracted_pubs
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(abspath_retracted_file, str(e)))



16 changes: 15 additions & 1 deletion tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,20 @@ def test_go_rule_18():
test_result = qc.GoRule18().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule22():
config = assocparser.AssocParserConfig(
ontology=ontology,
retracted_pub_set={"RETRACTED:1234","PMID:37772366"},
rule_set=assocparser.RuleSet.ALL
)
assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:12345").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:37772366").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

def test_go_rule26():

config = assocparser.AssocParserConfig(
Expand Down Expand Up @@ -819,7 +833,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 26
assert len(test_results.keys()) == 27
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down

0 comments on commit dd89b63

Please sign in to comment.