-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_browser
executable file
·201 lines (175 loc) · 6.82 KB
/
update_browser
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env bash
if [ ! "$(ls -A /gbdb)" ]; then
echo "no genome data found in /gbdb, downloading example data..."
wget -r -np -e robots=off -nH --cut-dirs=3 -R 'html' -R '\?C=' \
http://www.bio.fsu.edu/~dvera/share/cruise/gbdb
fi
echo -e 'db.host='${SQL_HOST}'\ndb.user='${SQL_USER}'\ndb.password='${SQL_PASSWORD} > /root/.hg.conf
chmod 600 /root/.hg.conf
# get dbDb spreadsheet
echo "downloading dbDb spreadsheet from https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key="$DBDBID"&hl=en&exportFormat=tsv"
curl -sL "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key="$DBDBID"&hl=en&exportFormat=tsv" | \
tr -d "\r" | sed 's/^\t/NA\t/g' | sed 's/\t$/\tNA/g' | sed 's/\t\t/\tNA\t/g' | \
awk -F '\t' -v header=active '
BEGIN {
split(header,cols,",")
}
{
if(NR==1){
for (i=1; i<=NF; i++){cols[$i]=i}
print $0
}
if(NR>1 && $cols["active"]==1){
print $0
}
}' OFS='\t' > /tmp/dbDb.tsv
echo "found" $(grep -v "^name" /tmp/dbDb.tsv | wc -l) "activated genomes"
# make dbDb table
echo "preparing dbDb"
awk -F '\t' -v header=name,description,nibPath,organism,defaultPos,active,orderKey,genome,scientificName,htmlPath,hgNearOk,hgPbOk,sourceName '
BEGIN {
split(header,cols,",")
}
{
if(NR==1){
for (i=1; i<=NF; i++){cols[$i]=i}
}
if(NR>=1){
print $cols["name"], $cols["description"], $cols["nibPath"], $cols["organism"], $cols["defaultPos"], $cols["active"], $cols["orderKey"], $cols["genome"], $cols["scientificName"], $cols["htmlPath"], $cols["hgNearOk"], $cols["hgPbOk"], $cols["sourceName"]
}
}' OFS='\t' /tmp/dbDb.tsv > /tmp/dbDb2.tsv
# make clade table
# awk -F '\t' -v clade="clade" '
# NR==1 {
# for (i=1; i<=NF; i++){
# ix[$i]=i
# }
# print "name","label","priority"}
# NR>1 {
# if(!($ix[clade] in p)) {
# print $ix[clade], $ix[clade], NR; p[$ix[clade]]=$ix[clade]
# }
# }' OFS='\t' /tmp/dbDb2.tsv > /tmp/clade.tsv
echo "preparing clade"
awk -F '\t' '{
if(NR==1){
for (i=1; i<=NF; i++){cols[$i]=i}
print "name","label","priority"
}
if(NR>1 && !seen[$cols["clade"]]){
print $cols["clade"], $cols["clade"], NR
seen[$cols["clade"]]++
}
}' OFS='\t' /tmp/dbDb.tsv > /tmp/clade.tsv
# make genomeClade table
echo "preparing genomeClade"
awk -F '\t' -v clade="clade" -v active="active" -v organism="organism" '
NR==1 {
for (i=1; i<=NF; i++){
ix[$i]=i
}
print "genome","clade","priority"}
NR>1 {
if(!($ix[organism] in p && $ix[active]==1)) {
print $ix[organism], $ix[clade], NR
p[$ix[organism]]=$ix[organism]
}
}' OFS='\t' /tmp/dbDb.tsv > /tmp/genomeClade.tsv
# make defaultDb table
echo "preparing defaultDb"
awk -F '\t' -v name="name" -v organism="organism" '
NR==1 {
for (i=1; i<=NF; i++){
ix[$i]=i
}
print "genome","name"
}
NR>1 {
if(!($ix[organism] in p)) {
print $ix[organism], $ix[name]
p[$ix[organism]]=$ix[organism]
}
}' OFS='\t' /tmp/dbDb.tsv > /tmp/defaultDb.tsv
# drop all genome databases
echo "deleting genome databases"
hgsql -e "show databases" | \
grep -v "Database\|mysql\|information_schema\|customTrash\|hgFixed\|hgcentral\|performance_schema\|test" | \
awk '{print "drop database " $1 ";select sleep(0.1);"}' | \
hgsql
# clear hgcentral
echo "deleting disposable hgcentral tables"
hgsql -e "delete from hgcentral.dbDb"
hgsql -e "delete from hgcentral.defaultDb"
hgsql -e "delete from hgcentral.clade"
hgsql -e "delete from hgcentral.genomeClade"
hgsql -e "delete from hgcentral.dbDbArch"
hgsql -e "delete from hgcentral.hubPublic"
#hgsql -e "delete from hgcentral.hubStatus"
hgsql -e "delete from hgcentral.blatServers"
hgsql -e "delete from hgcentral.liftOverChain"
hgsql -e "delete from hgcentral.targetDb"
echo "recreating hgcentral tables"
hgsql -e "load data local infile \"/tmp/dbDb2.tsv\" into table hgcentral.dbDb ignore 1 lines;"
hgsql -e "load data local infile \"/tmp/clade.tsv\" into table hgcentral.clade ignore 1 lines;"
hgsql -e "load data local infile \"/tmp/genomeClade.tsv\" into table hgcentral.genomeClade ignore 1 lines;"
hgsql -e "load data local infile \"/tmp/defaultDb.tsv\" into table hgcentral.defaultDb ignore 1 lines;"
echo "processing genomes"
cols=$(head -n 1 /tmp/dbDb.tsv)
tail -n+2 /tmp/dbDb.tsv | while IFS=$'\t' read -r $cols
do
if [ $gid != "NA" ] && [ $active -eq 1 ]; then
echo "- $name"
echo " - creating core genome tables"
hgsql -e "create database $name"
hgsql $name < /opt/kent/src/hg/lib/chromInfo.sql
hgsql $name < /opt/kent/src/hg/lib/grp.sql
hgsql $name < /opt/kent/src/hg/lib/gap.sql
hgsql -e "load data local infile \"$nibPath/chromInfo.tab\" into table $name.chromInfo;"
hgLoadSqlTab $name cytoBandIdeo /opt/kent/src/hg/lib/cytoBandIdeo.sql $nibPath/cytoband.bed
#######################################
### PROCESS TRACKS ####################
#######################################
echo " - downloading trackDb spreadsheet"
curl -sL "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key="$gid"&hl=en&exportFormat=tsv" |\
tr -d "\r" | sed 's/^\t/NA\t/g' | sed 's/\t$/\tNA/g' | sed 's/\t\t/\tNA\t/g' > $nibPath/trackDb.tsv && echo "" >> $nibPath/trackDb.tsv
echo " - deleting html files"
rm -fr $nibPath/html
# loop through each track
echo " - processing tracks"
tcols=$(head -n 1 $nibPath/trackDb.tsv)
tail -n+2 $nibPath/trackDb.tsv | while IFS=$'\t' read -r $tcols
do
echo " - $track"
if [[ $bigDataUrl != "NA" ]] && [[ ${_on} -eq 1 ]]; then
if [[ ! $bigDataUrl =~ ":" ]] && [[ ! -f $bigDataUrl ]]; then
echo "WARNING: $bigDataUrl DOES NOT EXIST for track $track"
fi
case $type in
bigWig|bigBed|bam|vcfTabix)
hgBbiDbLink $name $track $bigDataUrl
;;
bed)
hgLoadBed $name $track $bigDataUrl
;;
genePred)
hgLoadGenePred $name $track $bigDataUrl
;;
esac
fi
mkdir -p $nibPath/html
#if [ "$html" != "NA" ]; then
echo "<h2>Credits</h2> $_credits <hr>" > /gbdb/$name/${html}.html
echo "<h2>Methods</h2> $_methods <hr>" >> /gbdb/$name/${html}.html
#fi
done
#######################################
# create track database file from spreadsheet
awk -F "\t" '{if(NR==1){split($0,xx,"\t"); nf=NF} ; if(NR>1 && $1==1){for (i = 1; i <= nf; i++) print xx[i]" "$i; print "" }}' \
$nibPath/trackDb.tsv | grep -v " NA$" | grep -v "^_" > $nibPath/trackDb.ra
hgFindSpec $nibPath $name hgFindSpec /opt/kent/src/hg/lib/hgFindSpec.sql $nibPath
hgTrackDb $nibPath $name trackDb /opt/kent/src/hg/lib/trackDb.sql $nibPath
hgsql -e "GRANT SELECT on $name.* TO readonly@'172.%.%.%' IDENTIFIED BY 'access';"
hgsql -e "GRANT ALL PRIVILEGES on *.* TO root@'172.%.%.%' IDENTIFIED BY '"${SQL_PASSWORD}"' WITH GRANT OPTION;" mysql
fi
done
hgsql -e "FLUSH PRIVILEGES;"