From af5631cceaa452c6ca6af4f3d6def912ce5f2d56 Mon Sep 17 00:00:00 2001 From: Tim Dunn Date: Wed, 13 Sep 2023 10:48:38 -0400 Subject: [PATCH] Minor updates to analysis and pipeline scripts. --- analysis/10_vcfdist_sv.sh | 20 ++- analysis/11_truvari_sv.sh | 86 +++++++---- analysis/13_truvari_compare.py | 263 +++++++++++++++++++++++++++++++++ pipeline/5_vcfeval.sh | 43 ++++-- pipeline/6_vcfdist.sh | 32 ++-- pipeline/includes.sh | 2 +- 6 files changed, 390 insertions(+), 56 deletions(-) create mode 100644 analysis/13_truvari_compare.py diff --git a/analysis/10_vcfdist_sv.sh b/analysis/10_vcfdist_sv.sh index 2c71775..d460c38 100755 --- a/analysis/10_vcfdist_sv.sh +++ b/analysis/10_vcfdist_sv.sh @@ -17,6 +17,20 @@ $timer -v ../src/vcfdist \ $data/zook/verkko.vcf.gz \ $data/giab-tr-v4.20/GIABTR.HG002.benchmark.vcf.gz \ $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ - -b $data/giab-tr-v4.20/GIABTR.HG002.benchmark.small.bed \ - -l 1000 \ - -p ../out/small + -b $data/giab-tr-v4.20/GIABTR.HG002.benchmark.chr20.bed \ + -l 10000 \ + -p ../out/giabtr/vcfdist-chr20 + +# Benchmark on Truvari phab variants (chr20) +# file="../out/giabtr/phab-chr20/output.vcf.gz" +# for sample in `bcftools query -l $file`; do +# bcftools view -c1 -Oz -s $sample -o ${file/.vcf*/.$sample.vcf.gz} $file +# done +# $timer -v ../src/vcfdist \ +# ../out/giabtr/phab-chr20/norm.QUERY.vcf.gz \ +# ../out/giabtr/phab-chr20/norm.TRUTH.vcf.gz \ +# $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ +# -b $data/giab-tr-v4.20/GIABTR.HG002.benchmark.chr20.bed \ +# -l 10000 \ +# -t -q \ +# -p ../out/giabtr/vcfdist-norm-phab-chr20/ diff --git a/analysis/11_truvari_sv.sh b/analysis/11_truvari_sv.sh index d85ff9d..4e0c929 100755 --- a/analysis/11_truvari_sv.sh +++ b/analysis/11_truvari_sv.sh @@ -9,43 +9,75 @@ source ../pipeline/includes.sh # tabix -p vcf verkko.vcf.gz # # TruVari without harmonization -# rm -r ../out/giabtr/truvari-initial -# $timer -v truvari bench \ +# rm -r ../out/giabtr/truvari-init +# truvari bench \ # -b $data/giab-tr-v4.20/GIABTR.HG002.benchmark.vcf.gz \ # -c $data/zook/verkko.vcf.gz \ # -f $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ -# --includebed $data/giab-tr-v4.20/GIABTR.HG002.benchmark.regions.bed \ +# --includebed $data/giab-tr-v4.20/GIABTR.HG002.benchmark.lregions.bed \ # --no-ref a \ # --sizemin 1 --sizefilt 1 --sizemax 10000 \ # --pick multi \ # --typeignore \ # --dup-to-ins \ -# -o ../out/giabtr/truvari-initial/ +# -o ../out/giabtr/truvari-init/ + +# laytr tru2ga \ +# -i ../out/giabtr/truvari-init/ \ + # -o ../out/giabtr/truvari-init/result -# updated Truvari benchmarking -rm -r ../out/giabtr/truvari-phab +# Truvari benchmarking chr20 +giab_dir="../out/giabtr" +phab_dir="$giab_dir/phab-chr20" +truvari_phab_dir="$giab_dir/truvari-norm-phab-chr20" + +mkdir -p $phab_dir $timer -v truvari phab \ - -r $data/giab-tr-v4.20/GIABTR.HG002.benchmark.regions.bed \ + -r $data/giab-tr-v4.20/GIABTR.HG002.benchmark.chr20.bed \ -b $data/giab-tr-v4.20/GIABTR.HG002.benchmark.vcf.gz \ -c $data/zook/verkko.vcf.gz \ - --bSample HG002 \ - --cSample syndip \ - -f $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ - -o ../out/giabtr/truvari-phab/output.vcf.gz - -rm -r ../out/giabtr/truvari-final -$timer -v truvari bench \ - -b ../out/giabtr/truvari-phab/output.vcf.gz \ - -c ../out/giabtr/truvari-phab/output.vcf.gz \ - --bSample HG002 \ - --cSample syndip \ + --bSamples HG002\ + --cSamples syndip\ + --align wfa \ + -t 56 \ -f $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ - --includebed $data/giab-tr-v4.20/GIABTR.HG002.benchmark.regions.bed \ - --no-ref a \ - --sizemin 1 --sizefilt 1 --sizemax 10000 \ - --pick multi \ - --typeignore \ - --dup-to-ins \ - -o ../out/giabtr/truvari-final/ - -deactivate + -o $phab_dir/output.vcf.gz + +# file="../out/giabtr/phab-chr20/output.vcf.gz" +# for sample in `bcftools query -l $file`; do +# bcftools view -c1 -Oz -s $sample -o ${file/.vcf*/.$sample.vcf.gz} $file +# tabix -p vcf ${file/.vcf*/.$sample.vcf.gz} +# done +samples=( + "QUERY" + "TRUTH" +) +# for sample in ${samples[@]}; do +# bcftools norm \ +# -a \ +# -m-any \ +# --check-ref w \ +# -f $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ +# $phab_dir/output.${sample}.vcf.gz \ +# -o $phab_dir/norm.${sample}.vcf.gz +# tabix -p vcf $phab_dir/norm.${sample}.vcf.gz +# done + +# $timer -v truvari bench \ +# -b $phab_dir/norm.TRUTH.vcf.gz \ +# -c $phab_dir/norm.QUERY.vcf.gz \ +# -f $data/refs/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta \ +# --includebed $data/giab-tr-v4.20/GIABTR.HG002.benchmark.chr20.bed \ +# --no-ref a \ +# --sizemin 1 --sizefilt 1 --sizemax 10000 \ +# --pick multi \ +# --typeignore \ +# --dup-to-ins \ +# -o $truvari_phab_dir + +# laytr tru2ga \ +# -i $truvari_phab_dir/ \ +# -o $truvari_phab_dir/result +# gunzip $truvari_phab_dir/result*.gz + +# deactivate diff --git a/analysis/13_truvari_compare.py b/analysis/13_truvari_compare.py new file mode 100644 index 0000000..1fcae53 --- /dev/null +++ b/analysis/13_truvari_compare.py @@ -0,0 +1,263 @@ +import vcf +import numpy as np +import matplotlib.pyplot as plt + +# full genome, no phab +# truvari_prefix = "../out/giabtr/truvari-init/result_" +# vcfdist_prefix = "../out/giabtr/vcfdist-keep/" +# chr20, with phab +truvari_prefix = "../out/giabtr/truvari-norm-phab-chr20/result_" +vcfdist_prefix = "../out/giabtr/vcfdist-norm-phab-chr20/" +do_print = True + +SIZE = 0 +SZ_SNP = 0 +SZ_INDEL_1_10 = 1 +SZ_INDEL_10_50 = 2 +SZ_INDEL_50_500 = 3 +SZ_INDEL_500PLUS = 4 +SZ_DIMS = 5 +sizes = ["SNP", "INDEL 1-10", "INDEL 10-50", "INDEL 50-500", "INDEL 500+"] + +VCF_DIST = 1 +VD_NONE = 0 +VD_FPN = 1 +VD_PP_0_25 = 2 +VD_PP_25_50 = 3 +VD_PP_50_75 = 4 +VD_PP_75_100 = 5 +VD_TP = 6 +VD_DIMS = 7 +vd_cats = ["None", "FP/FN", "PP (0, .25)", "PP (.25, .5)", "PP(.5, .75)", "PP(.75, 1)", "TP"] + +TRU_VARI = 2 +TV_NONE = 0 +TV_FPN_ANY = 1 +TV_FPN_SEQ = 2 +TV_FPN_SIZE = 3 +TV_FPN_OVLP = 4 +TV_TP = 5 +TV_DIMS = 6 +tv_cats = ["None", "FP/FN any", "FP/FN seq", "FP/FN size", "FP/FN overlap", "TP"] + +tv_min_seq_pct = 0.7 +tv_min_size_pct = 0.7 +tv_min_ovlp_pct = 0.0 + +counts = np.zeros((SZ_DIMS, VD_DIMS, TV_DIMS)) + +def get_size(ref : str, alt : str): + if len(ref) == 1 and len(alt) == 1: + return SZ_SNP + else: + size_diff = abs(len(ref) - len(alt)) + if size_diff == 0: + print("ERROR: size 0 INDEL") + exit(1) + if size_diff <= 10: + return SZ_INDEL_1_10 + elif size_diff <= 50: + return SZ_INDEL_10_50 + elif size_diff <= 500: + return SZ_INDEL_50_500 + else: + return SZ_INDEL_500PLUS + +def get_vd_type(credit : float): + if credit == 0: + return VD_FPN + elif credit > 0 and credit <= 0.25: + return VD_PP_0_25 + elif credit > 0 and credit <= 0.50: + return VD_PP_25_50 + elif credit > 0 and credit <= 0.75: + return VD_PP_50_75 + elif credit > 0 and credit < 1: + return VD_PP_75_100 + elif credit == 1: + return VD_TP + else: + print("ERROR: credit out of range") + exit(1) + + +def get_tv_type(tv_type : str): + if tv_type == "TP": + return TV_TP + elif tv_type == "FP" or tv_type == "FN": + return TV_FPN_ANY + + +# for callset in ["query", "truth"]: +for callset in ["query"]: + + # parse vcfdist and truvari summary VCFs + vcfdist_vcf = vcf.Reader(open(f"{vcfdist_prefix}summary.vcf", "r")) + truvari_vcf = vcf.Reader(open(f"{truvari_prefix}{callset}.vcf", "r")) + name = callset.upper() + print(name) + tv_used, vd_used = True, True + this_ctg = "chr1" + print(this_ctg) + + while True: + + # get next valid records for each + if tv_used: tv_rec = next(truvari_vcf, None) + if vd_used: vd_rec = next(vcfdist_vcf, None) + while vd_rec != None and vd_rec.genotype(name)['BD'] == None: # skip other callset + vd_rec = next(vcfdist_vcf, None) + while tv_rec != None and tv_rec.ALT[0] == "*": # skip nested var + tv_rec = next(truvari_vcf, None) + tv_used, vd_used = False, False + + if do_print: + print("============================================================") + print("Truvari:", tv_rec) + print("vcfdist:", vd_rec) + + # we've finished iterating through both VCFs + if tv_rec == None and vd_rec == None: break + + # we've finished this contig for both VCFs + if (tv_rec == None or tv_rec.CHROM != this_ctg) and \ + (vd_rec == None or vd_rec.CHROM != this_ctg): + if tv_rec == None: + this_ctg = vd_rec.CHROM + print(this_ctg) + elif vd_rec == None: + this_ctg = tv_rec.CHROM + print(this_ctg) + elif tv_rec.CHROM == vd_rec.CHROM: + this_ctg = tv_rec.CHROM + print(this_ctg) + else: + print("ERROR: different contigs up next") + + # if we've finished only one VCF, set high position + tv_pos = 2_000_000_000 if tv_rec == None else ( + 1_000_000_000 if tv_rec.CHROM != this_ctg else tv_rec.POS) + vd_pos = 2_000_000_000 if vd_rec == None else ( + 1_000_000_000 if vd_rec.CHROM != this_ctg else vd_rec.POS) + + if tv_pos == vd_pos: # position match + if do_print: print(f"TV VD {name} {tv_rec.genotype(name)['GT']} {vd_rec.genotype(name)['GT']} {tv_rec.CHROM}:{tv_rec.POS}\t{tv_rec.REF}\t{tv_rec.ALT[0]}") + if tv_rec.REF == vd_rec.REF and tv_rec.ALT[0] == vd_rec.ALT[0]: # full match + tv_used, vd_used = True, True + size = get_size(tv_rec.REF, tv_rec.ALT[0]) + vd_type = get_vd_type(float(vd_rec.genotype(name)['BC'])) + tv_type = get_tv_type(tv_rec.genotype(name)['BD']) + counts[size][vd_type][tv_type] += 1 + if vd_type == VD_TP and tv_type == TV_FPN_ANY: + if do_print: print("vcfdist TP, Truvari FP") + if vd_type == VD_FPN and tv_type == TV_TP: + if do_print: print("vcfdist FP, Truvari TP") + + # skip if info unavailable + if tv_rec.INFO['PctSeqSimilarity'] == None or \ + tv_rec.INFO['PctSizeSimilarity'] == None or \ + tv_rec.INFO['PctRecOverlap'] == None: + counts[size][vd_type][TV_FPN_OVLP] += 1 + continue + + # count truvari filter fails + if tv_rec.INFO['PctSeqSimilarity'] < tv_min_seq_pct: + counts[size][vd_type][TV_FPN_SEQ] += 1 + if tv_rec.INFO['PctSizeSimilarity'] < tv_min_size_pct: + counts[size][vd_type][TV_FPN_SIZE] += 1 + if tv_rec.INFO['PctRecOverlap'] < tv_min_ovlp_pct: + counts[size][vd_type][TV_FPN_OVLP] += 1 + + # vcfdist SNP adjacent to INS + elif len(vd_rec.REF) == 1 and len(vd_rec.ALT[0]) == 1: + vd_used = True + size = get_size(vd_rec.REF, vd_rec.ALT[0]) + vd_type = get_vd_type(float(vd_rec.genotype(name)['BC'])) + tv_type = TV_NONE + counts[size][vd_type][tv_type] += 1 + + else: # position but not allele match, likely different phasing order + + # get next record for each + tv_rec_next = next(truvari_vcf, None) + vd_rec_next = next(vcfdist_vcf, None) + while vd_rec_next != None and vd_rec_next.genotype(name)['BD'] == None: # skip other callset + vd_rec_next = next(vcfdist_vcf, None) + while tv_rec_next != None and tv_rec_next.ALT[0] == "*": # skip nested var + tv_rec_next = next(truvari_vcf, None) + + # test if swapping causes matches + if tv_rec.REF == vd_rec_next.REF and \ + tv_rec.ALT[0] == vd_rec_next.ALT[0] and \ + tv_rec_next.REF == vd_rec.REF and \ + tv_rec_next.ALT[0] == vd_rec.ALT[0]: + # 1: tv_rec, vd_rec_next + size1 = get_size(tv_rec.REF, tv_rec.ALT[0]) + vd_type1 = get_vd_type(float(vd_rec_next.genotype(name)['BC'])) + tv_type1 = get_tv_type(tv_rec.genotype(name)['BD']) + counts[size1][vd_type1][tv_type1] += 1 + # 2: tv_rec_next, vd_rec + size2 = get_size(tv_rec_next.REF, tv_rec_next.ALT[0]) + vd_type2 = get_vd_type(float(vd_rec.genotype(name)['BC'])) + tv_type2 = get_tv_type(tv_rec_next.genotype(name)['BD']) + counts[size2][vd_type2][tv_type2] += 1 + tv_used, vd_used = True, True + else: + if do_print: print("ERROR: failed to match") + + # discard current two, pretend we haven't looked at next two + counts[size][VD_NONE][TV_NONE] += 2 + tv_rec = tv_rec_next + vd_rec = vd_rec_next + + + elif vd_pos < tv_pos: # vcfdist only + vd_used = True + size = get_size(vd_rec.REF, vd_rec.ALT[0]) + if do_print: + print(f" VD {name} {vd_rec.genotype(name)['GT']} {vd_rec.CHROM}:{vd_rec.POS}\t{vd_rec.REF}\t{vd_rec.ALT[0]}\t") + if size != SZ_SNP: + print("WARN: vcfdist only, not SNP") + vd_type = get_vd_type(float(vd_rec.genotype(name)['BC'])) + tv_type = TV_NONE + counts[size][vd_type][tv_type] += 1 + + elif tv_pos < vd_pos: # truvari only + if do_print: + print(f"TV {name} {tv_rec.genotype(name)['GT']} {tv_rec.CHROM}:{tv_rec.POS}\t{tv_rec.REF}\t{tv_rec.ALT[0]}") + print("WARN: Truvari only") + tv_used = True + size = get_size(tv_rec.REF, tv_rec.ALT[0]) + vd_type = VD_NONE + tv_type = get_tv_type(tv_rec.genotype(name)['BD']) + counts[size][vd_type][tv_type] += 1 + + # skip if info unavailable + if tv_rec.INFO['PctSeqSimilarity'] == None or \ + tv_rec.INFO['PctSizeSimilarity'] == None or \ + tv_rec.INFO['PctRecOverlap'] == None: + counts[size][vd_type][TV_FPN_OVLP] += 1 + continue + + # count truvari filter fails + if tv_rec.INFO['PctSeqSimilarity'] < tv_min_seq_pct: + counts[size][vd_type][TV_FPN_SEQ] += 1 + if tv_rec.INFO['PctSizeSimilarity'] < tv_min_size_pct: + counts[size][vd_type][TV_FPN_SIZE] += 1 + if tv_rec.INFO['PctRecOverlap'] < tv_min_ovlp_pct: + counts[size][vd_type][TV_FPN_OVLP] += 1 + + for size_idx in range(SZ_DIMS): + fig, ax = plt.subplots(figsize=(12,12)) + ax.matshow(np.log(counts[size_idx] + 0.1), cmap="Blues") + plt.title(f"{name} {sizes[size_idx]} Confusion Matrix") + plt.ylabel("vcdist") + ax.set_yticks(list(range(VD_DIMS))) + ax.set_yticklabels(vd_cats) + plt.xlabel("TruVari") + ax.set_xticks(list(range(TV_DIMS))) + ax.set_xticklabels(tv_cats) + for (i,j), z in np.ndenumerate(counts[size_idx]): + ax.text(j, i, f"{int(z)}", ha='center', va='center', + bbox=dict(boxstyle='round', facecolor='white', edgecolor='0.3')) + plt.savefig(f"img/13_{callset}_{size_idx}_cm.png", dpi=200) diff --git a/pipeline/5_vcfeval.sh b/pipeline/5_vcfeval.sh index 6c4f612..45ab2a2 100755 --- a/pipeline/5_vcfeval.sh +++ b/pipeline/5_vcfeval.sh @@ -9,6 +9,19 @@ for truth in "${truth_ids[@]}"; do mkdir -p $data/pfda-v2/${truth}_vcfeval done +# rtgtools multi-threaded vcfeval timing evaluation +$parallel -j1 --joblog 5_vcfeval.log \ + "$timer -v -o 5_{4}_{2}.txt ~/software/rtg-tools-3.12.1/rtg vcfeval \ + -b $data/{3}/$ref_id/{5} \ + -c $data/pfda-v2/phased_vcfs/$ref_id/{1}_HG002_{2}.vcf.gz \ + -t $data/refs/$ref_name.sdf \ + -e $data/{3}/$ref_id/{6} \ + --threads 56 \ + --all-records \ + -o $data/pfda-v2/{4}_vcfeval/{1}_HG002_{2}" ::: \ + ${sub_ids[@]} ::: ${reps[@]} ::: \ + ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} + # # rtgtools vcfeval timing evaluation # $parallel -j1 --joblog 5_vcfeval.log \ # "$timer -v -o 5_{4}_{2}.txt ~/software/rtg-tools-3.12.1/rtg RTG_MEM=8G vcfeval \ @@ -21,21 +34,21 @@ done # ${sub_ids[@]} ::: ${reps[@]} ::: \ # ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} -# hap.py vcfeval timing evaluation -# "$timer -v -o 5_{4}_{2}.txt python ~/software/happy/install/bin/hap.py \ -$parallel -j5 --joblog 5_vcfeval.log \ - "$timer -v -o 5_{4}_{2}.txt taskset 1 python ~/software/happy/install/bin/hap.py \ - $data/{3}/$ref_id/{5} \ - $data/pfda-v2/phased_vcfs/$ref_id/{1}_HG002_{2}.vcf.gz \ - -r $data/refs/$ref_name \ - -T $data/{3}/$ref_id/{6} \ - --threads 1 \ - --roc QUAL \ - --write-counts \ - --engine vcfeval \ - -o $data/pfda-v2/{4}_vcfeval/{1}_HG002_{2}" ::: \ - ${sub_ids[@]} ::: ${reps[@]} ::: \ - ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} +# # hap.py vcfeval timing evaluation +# # "$timer -v -o 5_{4}_{2}.txt python ~/software/happy/install/bin/hap.py \ +# $parallel -j5 --joblog 5_vcfeval.log \ +# "$timer -v -o 5_{4}_{2}.txt taskset 1 python ~/software/happy/install/bin/hap.py \ +# $data/{3}/$ref_id/{5} \ +# $data/pfda-v2/phased_vcfs/$ref_id/{1}_HG002_{2}.vcf.gz \ +# -r $data/refs/$ref_name \ +# -T $data/{3}/$ref_id/{6} \ +# --threads 1 \ +# --roc QUAL \ +# --write-counts \ +# --engine vcfeval \ +# -o $data/pfda-v2/{4}_vcfeval/{1}_HG002_{2}" ::: \ +# ${sub_ids[@]} ::: ${reps[@]} ::: \ +# ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} # # standardized representation (vcfdist output) # $parallel -j2 --joblog 5_vcfevalstd.log \ diff --git a/pipeline/6_vcfdist.sh b/pipeline/6_vcfdist.sh index aaa50ff..a95cc22 100755 --- a/pipeline/6_vcfdist.sh +++ b/pipeline/6_vcfdist.sh @@ -6,6 +6,18 @@ for truth in "${truth_ids[@]}"; do mkdir -p $data/pfda-v2/${truth}_vcfdist done +# standard reps +$parallel -j1 --joblog 6_vcfdist.log \ +"$timer -v -o vcfdist_{1}_{4}_{2}.log $vcfdist \ + $data/pfda-v2/phased_vcfs/$ref_id/{1}_HG002_{2}.vcf.gz \ + $data/{3}/$ref_id/{5} \ + $data/refs/$ref_name \ + -t -q \ + -b $data/{3}/$ref_id/{6} \ + -p $data/pfda-v2/{4}_vcfdist/{1}_HG002_{2}." ::: \ + ${sub_ids[@]} ::: ${reps[@]} ::: \ + ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} + # # without standardization, use original representations # $parallel -j5 --joblog 6_vcfdist.log \ # "$vcfdist \ @@ -29,14 +41,14 @@ done # ${sub_ids[@]} ::: ${reps[@]} ::: \ # ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} -# standard reps -$parallel -j1 --joblog 6_vcfdist.log \ -"$timer -v -o vcfdist_{1}_{4}_{2}.log $vcfdist \ - $data/pfda-v2/phased_vcfs/$ref_id/{1}_HG002_{2}.vcf.gz \ - $data/{3}/$ref_id/{5} \ - $data/refs/$ref_name \ - -b $data/{3}/$ref_id/{6} \ - -p $data/pfda-v2/{4}_vcfdist/{1}_HG002_{2}." ::: \ - ${sub_ids[@]} ::: ${reps[@]} ::: \ - ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} +# # standard reps +# $parallel -j1 --joblog 6_vcfdist.log \ +# "$timer -v -o vcfdist_{1}_{4}_{2}.log $vcfdist \ +# $data/pfda-v2/phased_vcfs/$ref_id/{1}_HG002_{2}.vcf.gz \ +# $data/{3}/$ref_id/{5} \ +# $data/refs/$ref_name \ +# -b $data/{3}/$ref_id/{6} \ +# -p $data/pfda-v2/{4}_vcfdist/{1}_HG002_{2}." ::: \ +# ${sub_ids[@]} ::: ${reps[@]} ::: \ +# ${truth_names[@]} :::+ ${truth_ids[@]} :::+ ${truth_vcfs[@]} :::+ ${truth_beds[@]} diff --git a/pipeline/includes.sh b/pipeline/includes.sh index f45eed0..fa8f432 100755 --- a/pipeline/includes.sh +++ b/pipeline/includes.sh @@ -110,4 +110,4 @@ e=( # include orig rep for eval # reps=() reps=("O") -reps+=("${pts[@]}") +# reps+=("${pts[@]}")