-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerckx-init.sh
executable file
·61 lines (54 loc) · 1.77 KB
/
merckx-init.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
# Download DBpedia dataset and initialize MERCKX data files
echo
echo "MERCKX init - Download DBpedia dataset and initialize MERCKX data files"
echo "WARNING: files are large so each step may take several minutes to complete..."
echo
# DBpedia download root URL
DOWNLOAD_PATH=http://downloads.dbpedia.org/3.9
# create data directories
echo "*** Creating data directories..."
if [ ! -d dbpedia ]; then
mkdir dbpedia
fi
if [ ! -d data ]; then
mkdir data
fi
# extract list of entitites using rdf:type
if [ ! -f dbpedia/instance_types_en.nt.bz2 ]; then
echo "*** Downloading DBpedia dataset files..."
echo "--- Downloading dbpedia/instance_types_en.nt.bz2 ..."
rm -f dbpedia/instance_types_en.*
rm -f data/*.lst
wget -q $DOWNLOAD_PATH/en/instance_types_en.nt.bz2 -P dbpedia
fi
if [ ! -f dbpedia/instance_types_en.nt ]; then
echo "--- Decompressing dbpedia/instance_types_en.nt.bz2 ..."
bzip2 -dk dbpedia/instance_types_en.nt.bz2
fi
# rdfs:label in multiple languages (en nl fr ...)
for lang in ${@:-en nl fr}
do
if [ "$lang" = "en" ]
then
if [ ! -f dbpedia/labels_en.nt.bz2 ]; then
echo "--- Downloading dbpedia/labels_en.nt.bz2 ..."
wget -q $DOWNLOAD_PATH/en/labels_en.nt.bz2 -P dbpedia
fi
if [ ! -f dbpedia/labels_en.nt ]; then
echo "--- Decompressing dbpedia/labels_en.nt.bz2 ..."
bzip2 -dk dbpedia/labels_en.nt.bz2
fi
else
if [ ! -f dbpedia/labels_en_uris_$lang.nt.bz2 ]; then
echo "--- Downloading dbpedia/labels_en_uris_$lang.nt.bz2 ..."
wget -q $DOWNLOAD_PATH/$lang/labels_en_uris_$lang.nt.bz2 -P dbpedia
fi
if [ ! -f dbpedia/labels_en_uris_$lang.nt ]; then
echo "--- Decompressing dbpedia/labels_en_uris_$lang.nt.bz2 ..."
bzip2 -dk dbpedia/labels_en_uris_$lang.nt.bz2
fi
fi
done
# initialize MERCKX data files
python merckx-init.py $@