-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added scripts from http://munk.cis.unimelb.edu.au/~stivalaa/satabsear…
- Loading branch information
Showing
147 changed files
with
24,906 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/bin/sh | ||
# | ||
# build the tableau + distance matrix for tsrchd_sparse for the | ||
# beta-grasp query (as the 4 largest strands and 1 alpha hexli in | ||
# ubiquitin structure) | ||
# | ||
# $Id: build_betagrasp_query.sh 2908 2009-11-06 05:33:18Z astivala $ | ||
|
||
# tableaux+distmatrix db file | ||
TABLEAUX_DB=${HOME}/tableauxdistmatrixdb.ascii | ||
|
||
echo "${TABLEAUX_DB}" | ||
echo "T T F" # options: type,order,output | ||
pytableaucreate.py -bf -35 -tdssp -p none -i BGRASP -s2,1,8,5,3 ${HOME}/pdb/d1ubia_.ent |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/bin/sh | ||
# | ||
# File: build_cops_db.sh | ||
# Author: Alex Stivala | ||
# Created: May 2010 | ||
# | ||
# | ||
# build_cops_db.sh - build tableaux database for COPS benchmark data set | ||
# | ||
# Usage: build_cops_db.sh querydir dbfile | ||
# | ||
# querydir is directory to put query input tableaux into | ||
# dbfile is basename of tableaux database to create, will create | ||
# dbfile.tableaux.pickle, dbfile.distmatrix.pickle and | ||
# dbfile.tableauxdistmatrixdb.ascii | ||
# | ||
# Builds tableaux for queries and database for the COPS benchmark data set | ||
# (Frank et al. 1999 "COPS Benchmark: interactive analysis of database | ||
# search methods" Bioinformatics 26(4):574-575) available from | ||
# http://benchmark.services.came.sbg.ac.at/ | ||
# | ||
# Requires the buildtableauxdb.py and pytableaucreate.py and convdb2.py | ||
# scripts in PATH. | ||
# | ||
# WARNING: dbfile and files in querydir are overwritten if they exist. | ||
# | ||
# $Id: build_cops_db.sh 3632 2010-05-12 02:07:26Z alexs $ | ||
|
||
COPS_ROOT=${HOME}/cops-benchmark-2009-6-full | ||
COPS_PDB_QUERIES=${COPS_ROOT}/queries/pdb | ||
COPS_PDB_DB=${COPS_ROOT}/database/pdb | ||
|
||
if [ $# -ne 2 ]; then | ||
echo "Usage: $0 querydir dbfile" >&2 | ||
exit 1 | ||
fi | ||
|
||
QUERYDIR=$1 | ||
DBFILE=$2 | ||
|
||
OPTIONS="-p none -35 -t dssp" | ||
|
||
if [ ! -d $QUERYDIR ]; then | ||
mkdir $QUERYDIR | ||
fi | ||
|
||
tableaux_pickle=${DBFILE}.tableaux.pickle | ||
distmatrix_pickle=${DBFILE}.distmatrix.pickle | ||
tableauxdb=${DBFILE}.tableauxdb.ascii | ||
|
||
for query in ${COPS_PDB_QUERIES}/*.pdb | ||
do | ||
qid=`basename $query .pdb` | ||
qfile=${QUERYDIR}/${qid}.input | ||
echo $tableauxdb > $qfile | ||
echo "T T F" >> $qfile # options: type, order, output | ||
pytableaucreate.py -f -b $OPTIONS $query >> $qfile | ||
done | ||
|
||
|
||
buildtableauxdb.py $OPTIONS $COPS_PDB_DB $tableaux_pickle | ||
buildtableauxdb.py -d $OPTIONS $COPS_PDB_DB $distmatrix_pickle | ||
|
||
convdb2.py $tableaux_pickle $distmatrix_pickle > $tableauxdb | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#!/usr/bin/env python | ||
############################################################################### | ||
# | ||
# build_fastscopdominfo_cache.py - build pickle file for cached SCOP info | ||
# | ||
# File: build_fastscopdominfo_cache.py | ||
# Author: Alex Stivala | ||
# Created: March 2010 | ||
# | ||
# $Id: scopdominfo.py 3009 2009-12-08 03:01:48Z alexs $ | ||
# | ||
############################################################################### | ||
|
||
""" | ||
Build cache (Python pickled dictionary) of information on the folds | ||
and superfamilies SCOP domain identifiers (sids). | ||
See usage in docstring for main() | ||
SCOP and ASTRAL data is obtained using the Bio.SCOP library (Casbon et | ||
al 2006 'A high level interface to SCOP and ASTRAL implemented in | ||
Python' BMC Bioinformatics 7:10) and depends on having the data | ||
downloaded, in SCOP_DIR (defined below). | ||
Downloaded SCOP files from | ||
http://scop.mrc-lmb.cam.ac.uk/scop/parse/index.html | ||
and ASTRAL files (in scopseq-1.73) from | ||
http://astral.berkeley.edu/scopseq-1.73.html | ||
The files downlaoded are: | ||
/local/charikar/SCOP/: | ||
dir.cla.scop.txt_1.73 | ||
dir.des.scop.txt_1.73 | ||
dir.hie.scop.txt_1.73 | ||
/local/charikar/SCOP/scopseq-1.73: | ||
astral-scopdom-seqres-all-1.73.fa | ||
astral-scopdom-seqres-sel-gs-bib-95-1.73.id | ||
Other files there are indices built by Bio.SCOP when first used. | ||
""" | ||
|
||
import sys,os | ||
import pickle | ||
|
||
from Bio.SCOP import * | ||
|
||
from pathdefs import SCOP_DIR,SCOP_VERSION | ||
|
||
#----------------------------------------------------------------------------- | ||
# | ||
# Function definitions | ||
# | ||
#----------------------------------------------------------------------------- | ||
|
||
|
||
def build_scopdominfo_dict(scop): | ||
""" | ||
Build dictionary with | ||
information about superfamily and class of all SCOP domains | ||
Parameters: | ||
scop - previously built Bio.SCOP Scop instance | ||
Return value: | ||
dict {sid: (superfamily_sccs, superfamily_description, fold_sccs,fold_description)} | ||
where | ||
superfamily_sccs is SCOP sccs identifying the superfamily for the domain | ||
superamily_description is SCOP dessription of the superfamily | ||
fold_description is the SCOP descriptino of the fold the domain is in | ||
""" | ||
scopdominfo_dict = {} | ||
for scop_dom in scop.getDomains(): | ||
sid = scop_dom.sid | ||
scop_superfamily = scop_dom.getAscendent('superfamily') | ||
scop_fold = scop_dom.getAscendent('fold') | ||
scop_class = scop_dom.getAscendent('class') | ||
scopdominfo_dict[sid] = (scop_superfamily.sccs, | ||
scop_superfamily.description, | ||
scop_fold.sccs, | ||
scop_fold.description) | ||
|
||
return scopdominfo_dict | ||
|
||
|
||
#----------------------------------------------------------------------------- | ||
# | ||
# Main | ||
# | ||
#----------------------------------------------------------------------------- | ||
|
||
def usage(progname): | ||
""" | ||
Print usage message and exit | ||
""" | ||
|
||
sys.stderr.write("Usage: " +progname + " cachefile\n") | ||
sys.exit(1) | ||
|
||
|
||
def main(): | ||
""" | ||
main for scomdominfo.py | ||
Usage: scomdominfo.py cachefile | ||
cachefile is the file to create the pickled domain info dictionary as | ||
WARNING: overwritten if it exists | ||
""" | ||
if len(sys.argv) != 2: | ||
usage(os.path.basename(sys.argv[0])) | ||
|
||
pickle_filename = sys.argv[1] | ||
|
||
sys.stderr.write("Reading SCOP Data...") | ||
scop = Scop(dir_path=SCOP_DIR,version=SCOP_VERSION) | ||
sys.stderr.write("done\n") | ||
|
||
sys.stderr.write("Building domain info cache...") | ||
scopdominfo_dict = build_scopdominfo_dict(scop) | ||
sys.stderr.write("done. Got %d domain descriptions\n" % | ||
len(scopdominfo_dict)) | ||
|
||
sys.stderr.write("Writing cache to file %s...\n" % pickle_filename) | ||
fh = open(pickle_filename, "w") | ||
pickle.dump(scopdominfo_dict, fh) | ||
fh.close() | ||
sys.stderr.write("done\n") | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/bin/sh | ||
# | ||
# File: build_fischer_cm.sh | ||
# Author: Alex Stivala | ||
# Created: September 2008 | ||
# | ||
# build_fischer_cm.sh - build contact maps for Fischer data set | ||
# | ||
# Usage: build_fischer_cm.sh outdir | ||
# | ||
# outdir is name of diretory which is created, and each contact map | ||
# in ASCII format for use with MSVNS4MaxCMO (Pelta et al 2008) | ||
# or other program using this format of contact matrix is | ||
# created as a separate file in that directory, in format for input | ||
# for use with msvns4maxcmo_allall.py for example | ||
# | ||
# builds contact maps, using pconpy.py, | ||
# for the Fischer data set (Fischer et al 1996 Pac. Symp. Biocomput. 300-318)) | ||
# This allows all-against-all (including redundant, so for n (=68) | ||
# there are n*n (=4624) total comparions) with e.g. msvns4maxcmo_allall.py, | ||
# | ||
# | ||
|
||
# root of divided PDB hierarchy | ||
PDBROOT=/local/charikar/pdb/pdb | ||
|
||
|
||
# List of probe PDB ids from Fischer 1996 Table I | ||
# Note several PDB ids obsoleted, so change to the replacments | ||
FISCHER_S="1mdc 1mup 1npx 1cpc_l 1onc 2ak3_a 1osa 1atn_a 1pfc 1arb 2cmd 2pia 2pna 3rub_l 1bbh_a 2sar_a 1c2r_a 3cd4 1chr_a 1aep 1dxt_b 2mnr 2fbj_l 1lts_d 1gky 2gbp 1hip 1bbt_1 2sas 2mta_c 1fc1_a 1tah_a 2hpd_a 1rcb 1aba 1sac_a 1eaf 1dsb_a 2sga 1stf_i 2hhm_a 2afn_a 1aaj 1fxi_a 5fd1 1bge_b 1isu_a 3hla_b 1gal 3chy 1cau_b 2aza_a 1hom 1cew 1tlk 1cid 2omf 1crl 1lga_a 2sim 1mio_c 1ten 4sbv_a 1tie 8i1b 2snv 1hrh_a 1gp1_a" | ||
|
||
|
||
# List of target fold PDB ids from Fischer 1996 Table I | ||
# Note several PDB ids obsoleted, so change to the replacments | ||
# this list corresponds to FISCHER_S ie FISCHER_P[i] is the target fold | ||
# for probe FISCHER_S[i] for 0 < i < 67 | ||
FISCHER_P="1ifc 1rbp 3grs 1col_a 7rsa 1gky 4cpv 1atr 3hla_b 5ptp 6ldh 1fnb 1sha_a 6xia 2ccy_a 9rnt 1ycc 2rhe 2mnr 256b_a 1hbg 4enl 8fab_b 1bov_a 3adk 2liv 2hip_a 2plv1 2scp_a 1ycc 2fb4_h 1tca 2cpp 2gmf_a 1ego 2ayh 4cla 2trx_a 5ptp 1mol_a 1fbp_a 1aoz_a 1paz 1ubq 1iqz 2gmf_a 2hip_a 2rhe 3cox 2fox 1cau_a 1paz 1lfb 1mol_a 2rhe 2rhe 2por 1ede 2cyp 1nsb_a 2min_b 3hhr_b 2tbv_a 4fgf 4fgf 5ptp 1rnh 2trx_a" | ||
|
||
|
||
# List of 68 probe sequences from Fischer 1996 Table II | ||
# Note several PDB ids obsoleted, so change to the replacments | ||
FISCHER_LIST="1dxt_b 1cpc_l 1c2r_a 2mta_c 1bbh_a 1bge_b 1rcb 1aep 1osa 2sas 1hom 1lga_a 2hpd_a 1chr_a 2mnr 3rub_l 1crl 1tah_a 1aba 1dsb_a 1gpl_a 1atn_a 1hrh_a 3chy 2ak3_a 1gky 2cmd 1eaf 2gbp 1mio_c 2pia 1gal 1npx 2hhm_a 1hip 1isu_a 1fc1_a 2fbj_l 1cid 1pfc 1ten 1tlk 3cd4 3hla_b 1aaj 2afn_a 2aza_a 4sbv_a 1bbt_1 1sac_a 1lts_d 1tie 8i1b 1arb 2sga 2snv 1mdc 1mup 2sim 1cau_b 2omf 1fxi_a 1cew 1stf_i 2pna 2sar_a 1onc 5fd1" | ||
|
||
if [ $# -ne 1 ]; then | ||
echo "Usage: $0 outdir" 2>&1 | ||
exit 1 | ||
fi | ||
outdir=$1 | ||
|
||
if [ ! -d ${outdir} ]; then | ||
mkdir ${outdir} | ||
fi | ||
|
||
# pconpy.py options | ||
threshold=7.0 | ||
pconpyopts="--cmaplist --threshold=${threshold} --seq_separation=2" | ||
|
||
for i in $FISCHER_LIST | ||
do | ||
pdb=`echo $i | tr A-Z a-z` | ||
if [ `expr index $pdb _` -ne 0 ]; then | ||
# get chainid from e.g. 1BYO_B | ||
chainid=`expr substr $pdb 6 1` | ||
chainopt="--chains=$chainid" | ||
pdbid=`expr substr $pdb 1 4`_${chainid} | ||
else | ||
chainopt="" | ||
pdbid=`expr substr $pdb 1 4` | ||
fi | ||
pdb=`expr substr $pdb 1 4` | ||
div=`expr substr $pdb 2 2` | ||
pdbfile=${PDBROOT}/${div}/pdb${pdb}.ent.gz | ||
pdbid=`echo $pdbid | tr '[a-z]' '[A-Z]'` | ||
pconpy.py ${pconpyopts} ${chainopt} --pdb=${pdbfile} --output=${outdir}/${pdbid}.cm_a${threshold} | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/bin/sh | ||
# | ||
# File: build_fischer_db.sh | ||
# Author: Alex Stivala | ||
# Created: September 2008 | ||
# | ||
# build_fischer_db.sh - build tableaux database for Fischer data set | ||
# | ||
# Usage: build_fischer_db.sh outdir | ||
# | ||
# outdir is name of diretory which is created, and each tableau | ||
# in ASCII format for use with tsrchd_sparse etc. is | ||
# created as a separate file in that directory, in format for input | ||
# for use with qptabmatch_allpairs.py for example | ||
# | ||
# To stdout is written the ASCII format db of all the tableaux+dist matrices | ||
# (just all the ones written to outdir concatenated together with | ||
# blank line between each). | ||
# | ||
# builds database of tableaux, using pytableaycreate.py, | ||
# for the Fischer data set (Fischer et al 1996 Pac. Symp. Biocomput. 300-318)) | ||
# This allows all-against-all (including redundant, so for n (=68) | ||
# there are n*n (=4624) total comparions) with e.g. qptabmatch_allall.py, | ||
# | ||
# | ||
|
||
# root of divided PDB hierarchy | ||
PDBROOT=/local/charikar/pdb/pdb | ||
|
||
|
||
# List of probe PDB ids from Fischer 1996 Table I | ||
# Note several PDB ids obsoleted, so change to the replacments | ||
FISCHER_S="1mdc 1mup 1npx 1cpc_l 1onc 2ak3_a 1osa 1atn_a 1pfc 1arb 2cmd 2pia 2pna 3rub_l 1bbh_a 2sar_a 1c2r_a 3cd4 1chr_a 1aep 1dxt_b 2mnr 2fbj_l 1lts_d 1gky 2gbp 1hip 1bbt_1 2sas 2mta_c 1fc1_a 1tah_a 2hpd_a 1rcb 1aba 1sac_a 1eaf 1dsb_a 2sga 1stf_i 2hhm_a 2afn_a 1aaj 1fxi_a 5fd1 1bge_b 1isu_a 3hla_b 1gal 3chy 1cau_b 2aza_a 1hom 1cew 1tlk 1cid 2omf 1crl 1lga_a 2sim 1mio_c 1ten 4sbv_a 1tie 8i1b 2snv 1hrh_a 1gp1_a" | ||
|
||
|
||
# List of target fold PDB ids from Fischer 1996 Table I | ||
# Note several PDB ids obsoleted, so change to the replacments | ||
# this list corresponds to FISCHER_S ie FISCHER_P[i] is the target fold | ||
# for probe FISCHER_S[i] for 0 < i < 67 | ||
FISCHER_P="1ifc 1rbp 3grs 1col_a 7rsa 1gky 4cpv 1atr 3hla_b 5ptp 6ldh 1fnb 1sha_a 6xia 2ccy_a 9rnt 1ycc 2rhe 2mnr 256b_a 1hbg 4enl 8fab_b 1bov_a 3adk 2liv 2hip_a 2plv1 2scp_a 1ycc 2fb4_h 1tca 2cpp 2gmf_a 1ego 2ayh 4cla 2trx_a 5ptp 1mol_a 1fbp_a 1aoz_a 1paz 1ubq 1iqz 2gmf_a 2hip_a 2rhe 3cox 2fox 1cau_a 1paz 1lfb 1mol_a 2rhe 2rhe 2por 1ede 2cyp 1nsb_a 2min_b 3hhr_b 2tbv_a 4fgf 4fgf 5ptp 1rnh 2trx_a" | ||
|
||
|
||
# List of 68 probe sequences from Fischer 1996 Table II | ||
# Note several PDB ids obsoleted, so change to the replacments | ||
FISCHER_LIST="1dxt_b 1cpc_l 1c2r_a 2mta_c 1bbh_a 1bge_b 1rcb 1aep 1osa 2sas 1hom 1lga_a 2hpd_a 1chr_a 2mnr 3rub_l 1crl 1tah_a 1aba 1dsb_a 1gpl_a 1atn_a 1hrh_a 3chy 2ak3_a 1gky 2cmd 1eaf 2gbp 1mio_c 2pia 1gal 1npx 2hhm_a 1hip 1isu_a 1fc1_a 2fbj_l 1cid 1pfc 1ten 1tlk 3cd4 3hla_b 1aaj 2afn_a 2aza_a 4sbv_a 1bbt_1 1sac_a 1lts_d 1tie 8i1b 1arb 2sga 2snv 1mdc 1mup 2sim 1cau_b 2omf 1fxi_a 1cew 1stf_i 2pna 2sar_a 1onc 5fd1" | ||
|
||
if [ $# -ne 1 ]; then | ||
echo "Usage: $0 outdir" 2>&1 | ||
exit 1 | ||
fi | ||
outdir=$1 | ||
|
||
if [ ! -d ${outdir} ]; then | ||
mkdir ${outdir} | ||
fi | ||
|
||
# pytableaucreate.py options | ||
tabopts="-35 -f -t dssp -p none" | ||
|
||
first=1 | ||
for i in $FISCHER_LIST | ||
do | ||
pdb=`echo $i | tr A-Z a-z` | ||
if [ `expr index $pdb _` -ne 0 ]; then | ||
# get chainid from e.g. 1BYO_B | ||
chainid=`expr substr $pdb 6 1` | ||
chainopt="-c $chainid" | ||
pdbid=`expr substr $pdb 1 4`_${chainid} | ||
else | ||
chainopt="" | ||
pdbid=`expr substr $pdb 1 4` | ||
fi | ||
pdb=`expr substr $pdb 1 4` | ||
div=`expr substr $pdb 2 2` | ||
pdbfile=${PDBROOT}/${div}/pdb${pdb}.ent.gz | ||
if [ $first -eq 0 ]; then | ||
echo | ||
else | ||
first=0 | ||
fi | ||
pytableaucreate.py ${tabopts} ${chainopt} ${pdbfile} | tee ${outdir}/${pdbid}.tableaudistmatrix | ||
# append distance matrix, removing identifier on first line | ||
pytableaucreate.py -d ${tabopts} ${chainopt} ${pdbfile} | awk 'NR > 1'| tee -a ${outdir}/${pdbid}.tableaudistmatrix | ||
done | ||
|
Oops, something went wrong.