Skip to content

Commit

Permalink
Counts of transcripts with metric oddities are now reported in the re…
Browse files Browse the repository at this point in the history
…sults folder (issue #28)
  • Loading branch information
cschu committed Jun 9, 2020
1 parent bbaad27 commit 082d711
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 23 deletions.
42 changes: 29 additions & 13 deletions minos/scripts/metric_oddities.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import csv
from collections import Counter

Expand All @@ -6,18 +7,33 @@


class MetricOddityParser:
def __init__(self, metric_file, oddities, gene_filter=None):
self.table = Counter({oddity: 0 for oddity in oddities})
self.metric_file = metric_file
self.gene_filter = gene_filter
def run(self):
for row in csv.DictReader(open(self.metric_file), delimiter="\t"):
if gene_filter is None or row["tid"] in gene_filter:
counts = [oddity for oddity in self.table if eval(oddity.format(**row))]
self.table.update(counts)

for oddity, count in self.table.items():
print(oddity.replace("{", "").replace("}", ""), count, sep="\t")


def parse_metrics(self, metric_file, transcript_filter=None):
for row in csv.DictReader(open(metric_file), delimiter="\t"):
if transcript_filter is None or row["tid"] in transcript_filter:
counts = [oddity for oddity in self.oddities if eval(oddity.format(**row))]
source = row["original_source"]
self.data.setdefault(
source,
Counter({oddity: 0 for oddity in self.oddities})
).update(counts)

def __init__(self, metric_file, oddities, transcript_filter=None):
self.data = dict()
self.oddities = oddities
self.parse_metrics(metric_file, transcript_filter=transcript_filter)

def write_table(self, collapse=True, stream=sys.stdout):
# collapse: True for loci, False for subloci and monoloci
header = list(self.data.keys()) if not collapse else ["counts"]
print("metric", *header, sep="\t", file=stream, flush=True)
for oddity in self.oddities:
total = 0
row = list()
for source, counts in self.data.items():
total += counts[oddity]
if not collapse:
row.append(counts[oddity])
if collapse:
row.append(total)
print(oddity.replace("{", "").replace("}", ""), *row, sep="\t", file=stream, flush=True)
30 changes: 20 additions & 10 deletions minos/zzz/minos_run.smk
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ rule all:
os.path.join(config["outdir"], "mikado.subloci.gff3"),
os.path.join(config["outdir"], "mikado.monoloci.gff3"),
os.path.join(config["outdir"], "mikado.loci.gff3"),
os.path.join(RESULTS_DIR, "mikado.subloci.metrics_oddities.tsv"),
os.path.join(RESULTS_DIR, "mikado.monoloci.metrics_oddities.tsv"),
os.path.join(RESULTS_DIR, "mikado.loci.metrics_oddities.tsv"),
[
os.path.join(config["outdir"], POST_PICK_PREFIX + suffix)
for suffix in {".gff", ".proteins.fasta", ".table.txt", ".cds.fasta", ".cds.fasta.lengths", ".cdna.fasta"}
Expand Down Expand Up @@ -876,21 +879,28 @@ rule minos_generate_final_table:

rule minos_collate_metric_oddities:
input:
loci = os.path.join(config["outdir"], "mikado.loci.gff3"),
subloci = os.path.join(config["outdir"], "mikado.subloci.gff3"),
monoloci = os.path.join(config["outdir"], "mikado.monoloci.gff3"),
loci = os.path.join(config["outdir"], "mikado.loci.metrics.tsv"),
subloci = os.path.join(config["outdir"], "mikado.subloci.metrics.tsv"),
monoloci = os.path.join(config["outdir"], "mikado.monoloci.metrics.tsv"),
old_new_rel = rules.minos_create_release_gffs.output[2],
final_table = rules.minos_generate_final_table.output.final_table
output:
rules.minos_final_sanity_check.output[0] + ".metric_oddities.tsv"
os.path.join(config["outdir"], "results", "mikado.loci.metrics_oddities.tsv"),
os.path.join(config["outdir"], "results", "mikado.subloci.metrics_oddities.tsv"),
os.path.join(config["outdir"], "results", "mikado.monoloci.metrics_oddities.tsv")
run:
import csv
from minos.scripts.metric_oddities import MetricOddityParser
tx2gene = {row[1]: row[0] for row in csv.reader(open(input.final_table), delimiter="\t") if not row[0].startswith("#")}
release_genes = set(tx2gene.values())
#tx2gene = {row[1]: row[0] for row in csv.reader(open(input.final_table), delimiter="\t") if not row[0].startswith("#")}
#release_genes = set(tx2gene.values())
release_transcripts = {row[0] for row in csv.reader(open(input.final_table), delimiter="\t") if not row[0].startswith("#")}
transcript_filter = {row[3] for row in csv.reader(open(input.old_new_rel), delimiter="\t") if not row[0].startswith("#") and row[1] in release_transcripts}
with open(output[0], "w") as loci_oddities_out:
MetricOddityParser(input.loci, config["report_metric_oddities"], release_genes).run(stream=loci_oddities_out)



MetricOddityParser(input[0], config["report_metric_oddities"], transcript_filter=transcript_filter).write_table(collapse=True, stream=loci_oddities_out)
with open(output[1], "w") as subloci_oddities_out:
MetricOddityParser(input[1], config["report_metric_oddities"]).write_table(collapse=False, stream=subloci_oddities_out)
with open(output[2], "w") as monoloci_oddities_out:
MetricOddityParser(input[2], config["report_metric_oddities"]).write_table(collapse=False, stream=monoloci_oddities_out)

rule split_proteins_prepare:
input:
Expand Down

0 comments on commit 082d711

Please sign in to comment.