From 1c2b121cd7aa493af5c03609e03adfb54953d815 Mon Sep 17 00:00:00 2001 From: Luis Paulin Date: Thu, 4 Jul 2024 21:38:02 -0600 Subject: [PATCH] Bug fixing of phasing default values, now uses HP and PS --- src/sniffles/config.py | 4 ++-- src/sniffles/parallel.py | 2 +- src/sniffles/postprocessing.py | 4 ++-- src/sniffles/sv.py | 6 +++--- src/sniffles/vcf.py | 20 +++++++++++++++++--- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/sniffles/config.py b/src/sniffles/config.py index f837908..09b0553 100644 --- a/src/sniffles/config.py +++ b/src/sniffles/config.py @@ -353,8 +353,8 @@ def __init__(self, *args, **kwargs): # Genotyping self.genotype_format = "GT:GQ:DR:DV" - self.genotype_none = (".", ".", 0, 0, 0, None) - self.genotype_null = (0, 0, 0, 0, 0, None) + self.genotype_none = (".", ".", 0, 0, 0, (None, None)) + self.genotype_null = (0, 0, 0, 0, 0, (None, None)) self.genotype_min_z_score = 5 if self.genotype_ploidy != 2: util.fatal_error("Currently only --genotype-ploidy 2 is supported") diff --git a/src/sniffles/parallel.py b/src/sniffles/parallel.py index ebe2160..a1b625b 100644 --- a/src/sniffles/parallel.py +++ b/src/sniffles/parallel.py @@ -249,7 +249,7 @@ def execute(self) -> Optional[GenotypeResult]: coverage = round(sum(coverage_list) / len(coverage_list)) svcall.genotypes = {} if coverage > 0: - svcall.genotypes[0] = (0, 0, 0, coverage, 0, None) + svcall.genotypes[0] = (0, 0, 0, coverage, 0, (None, None)) else: svcall.genotypes[0] = config.genotype_none diff --git a/src/sniffles/postprocessing.py b/src/sniffles/postprocessing.py index 6ec6bd7..9f3bfcf 100644 --- a/src/sniffles/postprocessing.py +++ b/src/sniffles/postprocessing.py @@ -488,5 +488,5 @@ def phase_sv(svcall, config): ps_filter = "PASS" svcall.set_info("PHASE", f"{hp},{ps},{hp_support},{ps_support},{hp_filter},{ps_filter}") - return (config.phase_identifiers.index(hp) if hp in config.phase_identifiers else None if hp_filter == "PASS" else \ - None, ps if "PASS" == ps_filter else None) + return (config.phase_identifiers.index(hp) if hp in config.phase_identifiers else None if hp_filter == "PASS" else + None, ps if "PASS" == ps_filter else None) diff --git a/src/sniffles/sv.py b/src/sniffles/sv.py index b688540..1d6dd76 100755 --- a/src/sniffles/sv.py +++ b/src/sniffles/sv.py @@ -212,7 +212,7 @@ def call(self, config, task) -> Optional[SVCall]: rnames.extend(cand.rnames) if 0 not in cand.genotypes: - cand.genotypes[0] = (".", ".", 0, 0, cand.support, None) + cand.genotypes[0] = (".", ".", 0, 0, cand.support, (None, None)) if cand.sample_internal_id in genotypes: # Intra-sample merging a, b, gt_qual, dr, dv, ps = cand.genotypes[0] @@ -231,9 +231,9 @@ def call(self, config, task) -> Optional[SVCall]: continue coverage = self.coverages_nonincluded[sample_internal_id] if coverage >= config.combine_null_min_coverage: - genotypes[sample_internal_id] = (0, 0, 0, coverage, 0, None, "NULL") + genotypes[sample_internal_id] = (0, 0, 0, coverage, 0, (None, None), "NULL") else: - genotypes[sample_internal_id] = (".", ".", 0, coverage, 0, None, "NULL") + genotypes[sample_internal_id] = (".", ".", 0, coverage, 0, (None, None), "NULL") if config.combine_consensus: genotypes_consensus = {} diff --git a/src/sniffles/vcf.py b/src/sniffles/vcf.py index 5ed27c2..2da70df 100755 --- a/src/sniffles/vcf.py +++ b/src/sniffles/vcf.py @@ -15,6 +15,7 @@ from sniffles import sv from sniffles import util +from sniffles.config import SnifflesConfig log = logging.getLogger(__name__) @@ -29,6 +30,19 @@ def format_info(k, v): return f"{k}={v}" +def unpack_phase(phase) -> tuple: + try: + hp_i, ps = phase + except TypeError: + if phase is None: + log.warning(f"Single 'None'-valued phase: {phase}") + hp_i, ps = None, None + else: + log.warning(f"Single not 'None'-valued phase: {phase}") + hp_i, ps = phase, phase + return hp_i, ps + + def format_genotype(gt): """ hp_i is the index of the haplotype in config.phase_identifiers: @@ -37,7 +51,7 @@ def format_genotype(gt): """ if len(gt) == 6: a, b, qual, dr, dv, phase = gt - hp_i, ps = phase + hp_i, ps = unpack_phase(phase) if hp_i is not None and (a, b) == (0, 1): gt_sep = "|" if hp_i == 0: @@ -47,7 +61,7 @@ def format_genotype(gt): return f"{a}{gt_sep}{b}:{qual}:{dr}:{dv}" if ps is None else f"{a}{gt_sep}{b}:{qual}:{dr}:{dv}:{ps}" else: a, b, qual, dr, dv, phase, svid = gt - hp_i, ps = phase + hp_i, ps = unpack_phase(phase) if hp_i is not None and (a, b) == (0, 1): gt_sep = "|" if hp_i == 0: @@ -59,7 +73,7 @@ def format_genotype(gt): class VCF: - def __init__(self, config, handle): + def __init__(self, config: SnifflesConfig, handle): self.config = config self.handle = handle self.call_count = 0