From 4cc4c9952613b9a550bc434309c5c195f0f9d36b Mon Sep 17 00:00:00 2001 From: riasc Date: Thu, 29 Feb 2024 21:04:29 -0600 Subject: [PATCH] outsource merging of predicted mhccII alleles --- .../genotyping/merge_predicted_mhcII.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 workflow/scripts/genotyping/merge_predicted_mhcII.py diff --git a/workflow/scripts/genotyping/merge_predicted_mhcII.py b/workflow/scripts/genotyping/merge_predicted_mhcII.py new file mode 100644 index 0000000..f5eb331 --- /dev/null +++ b/workflow/scripts/genotyping/merge_predicted_mhcII.py @@ -0,0 +1,45 @@ +import sys +import re +from pathlib import Path + + +""" + This scripts combines the predicted mhc-II alleles from different `groups` + + Usage: + + python merge_predicted_mhcII.py '' +""" + + +def main(): + infiles = sys.argv[1] + alleles = {} + + for infile in infiles.split(" "): + filestem = Path(infile).stem + se = re.search(r'^(.+)_(RNA|DNA)', filestem) + group = se.group(1) + + fh = open(infile, "r") + for line in fh: + al = line.strip().split("\t") + for a in al[1:]: + # make sure the alleles were type successfully + if a != "-" and a != "Not typed": + if a not in alleles: + alleles[a] = [] + if group not in alleles[a]: + alleles[a].append(group) + fh.close() + + out = open(sys.argv[2], 'w') + for al in dict(sorted(alleles.items())): + for i,v in enumerate(alleles[al]): + if i == 0: + out.write(v) + else: + out.write(f',{v}') + out.write(f'\t{al}\n') + +main()