diff --git a/pdb_corner/chain_pdb_to_fasta.py b/pdb_corner/chain_pdb_to_fasta.py deleted file mode 100644 index 2139a50..0000000 --- a/pdb_corner/chain_pdb_to_fasta.py +++ /dev/null @@ -1,25 +0,0 @@ -# python3 -import argparse -from Bio.PDB import * -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -# input parameters -ap = argparse.ArgumentParser() -ap.add_argument("-pdb", "--pdb", required=True, help="input pdb file") -ap.add_argument("-model", "--model",default=0, required=False, help="model from pdb file to select(integer). Default is 0(1 model only)") -ap.add_argument("-chain", "--chain", required=True, help="chain from pdb file to select") -args = vars(ap.parse_args()) -# main -# select chain -parser = PDBParser() -s = parser.get_structure("name", args['pdb']) -fill = s[int(args['model'])][args['chain']] -# retrieve the pdb id of the input file -pdb_id = str(args['pdb']).split(".")[0] -# export to fasta -ppb = PPBuilder() -for pp in ppb.build_peptides(fill): - record = SeqRecord(Seq(str(pp.get_sequence())),id="".join([str(pdb_id),"_",str(args['chain'])]),description="") - SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),".fasta"]), "fasta") - diff --git a/pdb_corner/subset_pdb_to_fasta.py b/pdb_corner/subset_pdb_to_fasta.py index 022b072..9f80dfe 100644 --- a/pdb_corner/subset_pdb_to_fasta.py +++ b/pdb_corner/subset_pdb_to_fasta.py @@ -1,4 +1,5 @@ # python3 +import os import argparse from Bio.PDB import * from Bio import SeqIO @@ -6,46 +7,63 @@ from Bio.SeqRecord import SeqRecord # input parameters ap = argparse.ArgumentParser() -ap.add_argument("-pdb", "--pdb", required=True, help="input pdb file") +ap.add_argument("-pdb", "--pdb", required=False, help="input pdb file") ap.add_argument("-model", "--model",default=0, required=False, help="model from pdb file to select(integer). Default is 0(1 model only)") -ap.add_argument("-chain", "--chain", required=True, help="chain from pdb file to select") +ap.add_argument("-chain", "--chain", required=False, default='A', help="chain from pdb file to select") ap.add_argument("-start", "--start", required=False,default=1, type=int, help="amino acid in chain to start writing the fasta file. Default is 1") ap.add_argument("-end", "--end", required=False, type=int, help="amino acid in chain to end writing the fasta file") -ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and end location 2) the end location with be that of the latest amino acid in the chain. Default is 1") +ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and end location 2) the end location will be that of the latest amino acid in the chain. Default is 1") +ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of input to choose. 1) 1 pdb file, 2) many pdb files. Default is 1") args = vars(ap.parse_args()) # main -# select chain -parser = PDBParser() -s = parser.get_structure("name", args['pdb']) -fill = s[int(args['model'])][args['chain']] -# retrieve the pdb id of the input file -pdb_id = str(args['pdb']).split(".")[0] -# retrieve chain amino acids -ppb = PPBuilder() -for pp in ppb.build_peptides(fill): - aa_chain = str(pp.get_sequence()) -# choose program -if args['program'] == 1: - # fix the index for start parameter - if args['start'] > 0: - aa_start = args['start'] -1 +# create function to trim + convert to fasta +def trim_to_fasta(fi): + # select chain + fill = s[int(args['model'])][args['chain']] + # select between importing 1 or many pdb files + if args['type'] == 1: + # retrieve the pdb id of the input file + pdb_id = str(fi).split(".")[0] else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter - aa_end = args['end'] -else: - # fix the index for start parameter - if args['start'] > 0: - aa_start = args['start'] -1 + pdb_id = fi.split(".")[0] + # retrieve chain amino acids + ppb = PPBuilder() + for pp in ppb.build_peptides(fill): + aa_chain = str(pp.get_sequence()) + # choose program + if args['program'] == 1: + # fix the index for start parameter + if args['start'] > 0: + aa_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter + aa_end = args['end'] else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter according to program 2 - args['end'] = len(aa_chain) - aa_end = args['end'] -# subset based on aa in chain -sub_seq = aa_chain[aa_start:aa_end] -# export to fasta -record = SeqRecord(Seq(sub_seq),id="".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end'])]),description="") -SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end']),".fasta"]), "fasta") + # fix the index for start parameter + if args['start'] > 0: + aa_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter according to program 2 + args['end'] = len(aa_chain) + aa_end = args['end'] + # subset based on aa in chain + sub_seq = aa_chain[aa_start:aa_end] + # export to fasta + record = SeqRecord(Seq(sub_seq),id="".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end'])]),description="") + return SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end']),".fasta"]), "fasta") +# select between importing 1 or many pdb files +if args['type'] == 1: + parser = PDBParser() + s = parser.get_structure("name", args['pdb']) + trim_to_fasta(args['pdb']) +else: + # import each fasta file from the working directory + for filename in sorted(os.listdir(str(os.getcwd()))): + if filename.endswith(".pdb"): + parser = PDBParser() + s = parser.get_structure("name", filename) + trim_to_fasta(filename)