-
Notifications
You must be signed in to change notification settings - Fork 1
/
gene2ptbp1
executable file
·92 lines (81 loc) · 2.33 KB
/
gene2ptbp1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
#PBS -l nodes=1:ppn=4
GENOME="mm9"
BASED=0
#### usage ####
usage() {
echo
echo Program: "gene2ptbp1 (retrieve nucleotide sequences corresponding to genomic coordinate(s))"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: pundhir@binf.ku.dk
echo "Usage: gene2ptbp1 -i <file> -j <file> [OPTIONS]"
echo "Options:"
echo " -i <coor> [input genomic coordinate file in BED format (can be stdin)]"
echo " -j <file> [reference genome sequence as fasta file]"
echo "[OPTIONS]"
echo " -k <int> [the coordinate is 0-based or 1-based (default: 0)]"
echo " [0: BED and BAM]"
echo " [1: GFF, GTF, SAM and VCF]"
echo " [for details refer to https://www.biostars.org/p/84686/]"
echo " -s [use strand information]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:j:k:sh ARG; do
case "$ARG" in
i) COORFILE=$OPTARG;;
j) FASTAFILE=$OPTARG;;
k) BASED=$OPTARG;;
s) STRAND=1;;
h) HELP=1;;
esac
done
<<"COMMENT"
## usage, if necessary file and directories are given/exist
if [ ! -f "$FASTAFILE" -o -z "$COORFILE" -o "$HELP" ]; then
usage
fi
###################
#helperfunction
function wait_for_jobs_to_finish {
for job in `jobs -p`
do
echo $job
wait $job
done
echo $1
}
###############
## create temporary BED file if input is from stdin
if [ "$COORFILE" == "stdin" ]; then
TMP=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)
while read LINE; do
echo -e "${LINE}"
done > $TMP
COORFILE=$TMP
fi
## remove temporary file if exists
if [ ! -z "$TMP" ]; then
rm $TMP
fi
COMMENT
zless analysis/5UTRs.fasta | head -n 10 | perl -ane '
if($_=~/^>/) {
chomp($_);
$ID=$_;
$ID=~s/\>//g;
} else {
print "$ID";
#while($_=~m!(TCT|TTT|TTC|CTT|CTC|CTG|TGT|CCT|TTG|TCC|GTT|GTC|TGG|GCT|TGC|GTG|GGT|CCC|GCC|GGC[CT]{0,})!g) {
while($_=~m!(TCT|TTT[CT]{0,})!g) {
#$matchInfo=sprintf("%s\t%d\t%d", $1, length($`), length($&));
#$PTBP1{$ID}{'motif'}=$1;
#$PTBP1{$ID}{'motifStart'}=length($`);
#$PTBP1{$ID}{'motifLength'}=length($&);
printf("\t%s,%d,%d", $1, length($`), length($&));
}
print "\n";
}'