-
Notifications
You must be signed in to change notification settings - Fork 1
/
coor2fasta
executable file
·70 lines (64 loc) · 2.22 KB
/
coor2fasta
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
#PBS -l nodes=1:ppn=4
GENOME="mm9"
BASED="1"
#### usage ####
usage() {
echo
echo Program: "coor2fasta (retrieve nucleotide sequences corresponding to genomic coordinate(s))"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: pundhir@binf.ku.dk
echo "Usage: coor2fasta -i <file> -j <coor> [OPTIONS]"
echo "Options:"
echo " -i <file> [reference genome sequence as fasta file]"
echo " -j <coor> [genomic coordinate as chr:start-end]"
echo "[OPTIONS]"
echo " -k <int> [the coordinate is 0-based or 1-based (default: 1)]"
echo " [0: BED and BAM]"
echo " [1: GFF, GTF, SAM and VCF]"
echo " [for details refer to https://www.biostars.org/p/84686/]"
echo " -s <string> [genomic strand for the input coordinate (+ or -)]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:j:k:s:h ARG; do
case "$ARG" in
i) FASTAFILE=$OPTARG;;
j) COOR=$OPTARG;;
k) BASED=$OPTARG;;
s) STRAND=$OPTARG;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories are given/exist
if [ ! -f "$FASTAFILE" -o -z "$COOR" -o "$HELP" ]; then
usage
fi
###################
#helperfunction
function wait_for_jobs_to_finish {
for job in `jobs -p`
do
echo $job
wait $job
done
echo $1
}
###############
## retrieve nucleotide sequence
if [ "$BASED" -eq 0 ]; then
if [ ! -z "$STRAND" ]; then
echo $COOR | perl -ane 'chomp($_); @t=split(/[\:\-]+/,$_); print "$t[0]\t$t[1]\t$t[2]\tCOOR\t1\t'$STRAND'\n";' | bedtools getfasta -fi $FASTAFILE -bed stdin -fo stdout -s
else
echo $COOR | perl -ane 'chomp($_); @t=split(/[\:\-]+/,$_); print "$t[0]\t$t[1]\t$t[2]\n";' | bedtools getfasta -fi $FASTAFILE -bed stdin -fo stdout
fi
else
if [ ! -z "$STRAND" ]; then
echo $COOR | perl -ane 'chomp($_); @t=split(/[\:\-]+/,$_); $t[1]=$t[1]-1; print "$t[0]\t$t[1]\t$t[2]\tCOOR\t1\t'$STRAND'\n";' | bedtools getfasta -fi $FASTAFILE -bed stdin -fo stdout -s
else
echo $COOR | perl -ane 'chomp($_); @t=split(/[\:\-]+/,$_); $t[1]=$t[1]-1; print "$t[0]\t$t[1]\t$t[2]\n";' | bedtools getfasta -fi $FASTAFILE -bed stdin -fo stdout
fi
fi