support for many pdb files added

olgatsiouri1996 · Jan 5, 2022 · 60e054d · 60e054d
1 parent a151d50
commit 60e054d
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 61 deletions.
diff --git a/pdb_corner/chain_pdb_to_fasta.py b/pdb_corner/chain_pdb_to_fasta.py
diff --git a/pdb_corner/subset_pdb_to_fasta.py b/pdb_corner/subset_pdb_to_fasta.py
@@ -1,51 +1,69 @@
 # python3
+import os
 import argparse
 from Bio.PDB import *
 from Bio import SeqIO
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 # input parameters
 ap = argparse.ArgumentParser()
-ap.add_argument("-pdb", "--pdb", required=True, help="input pdb file")
+ap.add_argument("-pdb", "--pdb", required=False, help="input pdb file")
 ap.add_argument("-model", "--model",default=0, required=False, help="model from pdb file to select(integer). Default is 0(1 model only)")
-ap.add_argument("-chain", "--chain", required=True, help="chain from pdb file to select")
+ap.add_argument("-chain", "--chain", required=False, default='A', help="chain from pdb file to select")
 ap.add_argument("-start", "--start", required=False,default=1, type=int, help="amino acid in chain to start writing the fasta file. Default is 1")
 ap.add_argument("-end", "--end", required=False, type=int, help="amino acid in chain to end writing the fasta file")
-ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and end location 2) the end location with be that of the latest amino acid in the chain. Default is 1")
+ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and end location 2) the end location will be that of the latest amino acid in the chain. Default is 1")
+ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of input to choose. 1) 1 pdb file, 2) many pdb files. Default is 1")
 args = vars(ap.parse_args())
 # main
-# select chain
-parser = PDBParser()
-s = parser.get_structure("name", args['pdb'])
-fill = s[int(args['model'])][args['chain']]
-# retrieve the pdb id of the input file
-pdb_id = str(args['pdb']).split(".")[0]
-# retrieve chain amino acids
-ppb = PPBuilder()
-for pp in ppb.build_peptides(fill):
-    aa_chain = str(pp.get_sequence())
-# choose program
-if args['program'] == 1:
-    # fix the index for start parameter
-    if args['start'] > 0:
-        aa_start = args['start'] -1
+# create function to trim + convert to fasta
+def trim_to_fasta(fi):
+    # select chain
+    fill = s[int(args['model'])][args['chain']]
+    # select between importing 1 or many pdb files
+    if args['type'] == 1:
+        # retrieve the pdb id of the input file
+        pdb_id = str(fi).split(".")[0]
     else:
-        print("-start parameter must be a positive integer")
-        exit(1)
-    # add end parameter
-    aa_end = args['end']
-else:
-    # fix the index for start parameter
-    if args['start'] > 0:
-        aa_start = args['start'] -1
+        pdb_id = fi.split(".")[0]
+    # retrieve chain amino acids
+    ppb = PPBuilder()
+    for pp in ppb.build_peptides(fill):
+        aa_chain = str(pp.get_sequence())
+    # choose program
+    if args['program'] == 1:
+        # fix the index for start parameter
+        if args['start'] > 0:
+            aa_start = args['start'] -1
+        else:
+            print("-start parameter must be a positive integer")
+            exit(1)
+        # add end parameter
+        aa_end = args['end']
     else:
-        print("-start parameter must be a positive integer")
-        exit(1)
-    # add end parameter according to program 2
-    args['end'] = len(aa_chain)
-    aa_end = args['end']
-# subset based on aa in chain
-sub_seq = aa_chain[aa_start:aa_end]
-# export to fasta
-record = SeqRecord(Seq(sub_seq),id="".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end'])]),description="")
-SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end']),".fasta"]), "fasta")
+        # fix the index for start parameter
+        if args['start'] > 0:
+            aa_start = args['start'] -1
+        else:
+            print("-start parameter must be a positive integer")
+            exit(1)
+        # add end parameter according to program 2
+        args['end'] = len(aa_chain)
+        aa_end = args['end']
+    # subset based on aa in chain
+    sub_seq = aa_chain[aa_start:aa_end]
+    # export to fasta
+    record = SeqRecord(Seq(sub_seq),id="".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end'])]),description="")
+    return SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end']),".fasta"]), "fasta")
+# select between importing 1 or many pdb files
+if args['type'] == 1:    
+    parser = PDBParser()
+    s = parser.get_structure("name", args['pdb'])
+    trim_to_fasta(args['pdb'])
+else:
+    # import each fasta file from the working directory
+    for filename in sorted(os.listdir(str(os.getcwd()))):
+        if filename.endswith(".pdb"):
+            parser = PDBParser()
+            s = parser.get_structure("name", filename)
+            trim_to_fasta(filename)