forked from hortonworks/hive-testbench
-
Notifications
You must be signed in to change notification settings - Fork 2
/
util_runtpcds.sh
117 lines (92 loc) · 2.97 KB
/
util_runtpcds.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash
function timedate() {
TZ="America/Los_Angeles" date
echo ""
}
function usageExit() {
echo "Usage: sh util_runtpcds.sh SCALE FORMAT"
echo "SCALE must be greater than 1"
echo "FORMAT must be either 'orc' | 'parquet'"
exit 1
}
function setupRun() {
ID=`TZ='America/Los_Angeles' date +"%m.%d.%Y-%H.%M.%S"`
# --- QUERY FILE NAME ---
QUERY_BASE_NAME="sample-queries-tpcds/query"
QUERY_FILE_EXT=".sql"
# --- SETTINGS ---
SETTINGS_PATH="settings.sql"
# --- REPORT NAME ---
REPORT_NAME="time_elapsed_tpcds"
# --- DATABASE ---
DATABASE="tpcds_bin_partitioned_${FORMAT}_${SCALE}"
# --- CLOCK ---
CLOCK_FILE="aaa_clocktime.txt"
if [[ -f $CLOCK_FILE ]]; then
rm $CLOCK_FILE
echo "Old clock removed"
fi
echo "Created new clock"
# generate time report
rm $REPORT_NAME*".csv"
echo "Old report removed"
echo "query #", "secs elapsed", "status" > $REPORT_NAME".csv"
echo "New report generated"
# remove old llapio_summary
rm "llapio_summary"*".csv"
echo "Old llapio_summary*.csv removed"
# clear and make new log directory
if [[ -d log_query/ ]]; then
rm -r log_query/
echo "Old logs removed"
fi
mkdir log_query/
echo "Log folder generated"
# make executable
chmod +x util_internalGetPAT.sh
chmod +x util_internalRunQuery.sh
chmod -R +x PAT/
# absolute path
CURR_DIR="`pwd`/"
}
function runBenchmark() {
echo "Run queries for TPC-DS ${FORMAT} at scale ${SCALE}" > $CLOCK_FILE
timedate >> $CLOCK_FILE
# range of queries
START=1
END=99
REPEAT=1
for (( QUERY_NUM = $START; QUERY_NUM <= $END; QUERY_NUM++ )); do
for (( j = 0; j < $REPEAT; j++ )); do
query_path=(${QUERY_BASE_NAME}${QUERY_NUM}${QUERY_FILE_EXT})
LOG_PATH="log_query/logquery${QUERY_NUM}.${j}.txt"
./util_internalRunQuery.sh "$DATABASE" "$CURR_DIR$SETTINGS_PATH" "$CURR_DIR$query_path" "$CURR_DIR$LOG_PATH" "$QUERY_NUM" "$CURR_DIR$REPORT_NAME.csv"
# ./util_internalGetPAT.sh /$CURR_DIR/util_internalRunQuery.sh "$DATABASE" "$CURR_DIR$SETTINGS_PATH" "$CURR_DIR$query_path" "$CURR_DIR$LOG_PATH" "$QUERY_NUM" "$CURR_DIR$REPORT_NAME.csv" tpcdsPAT"$ID"/query"$i"/
done
done
echo "Finished" >> $CLOCK_FILE
timedate >> $CLOCK_FILE
}
function generateZipReport() {
python3 parselog.py
mv $REPORT_NAME".csv" $REPORT_NAME$ID".csv"
zip -j log_query.zip log_query/*
zip -r "tpcds-"$SCALE"GB-"$ID".zip" log_query.zip PAT/PAT-collecting-data/results/tpcdsPAT"$ID"/* $REPORT_NAME$ID".csv" "llapio_summary"*".csv"
rm log_query.zip
}
# --- SCRIPT START ---
SCALE=$1
FORMAT=$2
if [[ "X$SCALE" == "X" || $SCALE -eq 1 ]]; then
usageExit
fi
if ! [[ "$SCALE" =~ ^[0-9]+$ ]]; then
echo "'$SCALE' is not a number!"
usageExit
fi
if [[ "$FORMAT" != "orc" && "$FORMAT" != "parquet" ]]; then
usageExit
fi
setupRun
runBenchmark
generateZipReport