Skip to content

Commit

Permalink
outsource merging of predicted mhccII alleles
Browse files Browse the repository at this point in the history
  • Loading branch information
riasc committed Mar 1, 2024
1 parent 4985d42 commit 4cc4c99
Showing 1 changed file with 45 additions and 0 deletions.
45 changes: 45 additions & 0 deletions workflow/scripts/genotyping/merge_predicted_mhcII.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import sys
import re
from pathlib import Path


"""
This scripts combines the predicted mhc-II alleles from different `groups`
Usage:
python merge_predicted_mhcII.py '<input>' <output>
"""


def main():
infiles = sys.argv[1]
alleles = {}

for infile in infiles.split(" "):
filestem = Path(infile).stem
se = re.search(r'^(.+)_(RNA|DNA)', filestem)
group = se.group(1)

fh = open(infile, "r")
for line in fh:
al = line.strip().split("\t")
for a in al[1:]:
# make sure the alleles were type successfully
if a != "-" and a != "Not typed":
if a not in alleles:
alleles[a] = []
if group not in alleles[a]:
alleles[a].append(group)
fh.close()

out = open(sys.argv[2], 'w')
for al in dict(sorted(alleles.items())):
for i,v in enumerate(alleles[al]):
if i == 0:
out.write(v)
else:
out.write(f',{v}')
out.write(f'\t{al}\n')

main()

0 comments on commit 4cc4c99

Please sign in to comment.