-
Notifications
You must be signed in to change notification settings - Fork 3
/
collect_job_data.sh
70 lines (55 loc) · 2.67 KB
/
collect_job_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
# This script fetches data from a Galaxy database and archives it in S3 bucket.
# The data is from the previous month and includes two tables: job and
# job_metric_numeric. The data from both tables is uploaded to an S3 bucket as
# two tar.gz files. On Main, the script runs about 30 minutes.
# To use, edit the hard coded paths and invoke via ./collect_job_data.sh
START_TIME=$(date +%s)
BUCKET_NAME=usegalaxy-historic-data
cd /home/afgane/jobs_history
PREVIOUS_MONTH=`date --date="$(date +%Y-%m-15) - 1 month" "+%Y-%m"`
PREVIOUS_MONTH_START=`date --date="$(date +%Y-%m-01) - 1 month" "+%Y-%m-%d"`
PREVIOUS_MONTH_END=`date --date="$(date +%Y-%m-01) - 1 day" "+%Y-%m-%d"`
printf '%s -- Creating SQL queries.\n' "$(date)";
# Create job query
echo "COPY (
SELECT
id, create_time, tool_id, tool_version, user_id, destination_id
FROM
job
WHERE
create_time >= '$PREVIOUS_MONTH_START'
AND create_time < '$PREVIOUS_MONTH_END') TO STDOUT;" > job_query.sql
# Create metrics query
echo "COPY (
SELECT
job.id,
job.destination_id,
job_metric_numeric.metric_name,
job_metric_numeric.metric_value
FROM
job
INNER JOIN job_metric_numeric
ON job_metric_numeric.job_id = job.id
WHERE
job.create_time >= '$PREVIOUS_MONTH_START' AND
job.create_time < '$PREVIOUS_MONTH_END' AND
(job_metric_numeric.metric_name = 'galaxy_slots' OR
job_metric_numeric.metric_name = 'memory.max_usage_in_bytes' OR
job_metric_numeric.metric_name = 'galaxy_memory_mb' OR
job_metric_numeric.metric_name = 'galaxy_slots' OR
job_metric_numeric.metric_name = 'cpuacct.usage' OR
job_metric_numeric.metric_name = 'runtime_seconds')) TO STDOUT;" > metrics_query.sql
printf '%s -- Fetching job table data for %s...\n' "$(date)" "$PREVIOUS_MONTH";
psql galaxy_main < job_query.sql > job_table.txt;
printf '%s -- Got the job table data; archiving...\n' "$(date)";
tar -czf job_table.tar.gz job_table.txt;
printf '%s -- Fetching job metrics table data for %s...\n' "$(date)" "$PREVIOUS_MONTH";
psql galaxy_main < metrics_query.sql > metrics_table.txt;
printf '%s -- Got the job metrics table data; archiving...\n' "$(date)";
tar -czf metrics_table.tar.gz metrics_table.txt
printf '%s -- Uploading %s data to S3 bucket usegalaxy-historic-data...\n' "$(date)" "$PREVIOUS_MONTH";
/home/afgane/aws_cli/bin/aws --profile cost-modeling s3 cp ./job_table.tar.gz s3://$BUCKET_NAME/$PREVIOUS_MONTH-job-table.tar.gz --acl public-read
/home/afgane/aws_cli/bin/aws --profile cost-modeling s3 cp ./metrics_table.tar.gz s3://$BUCKET_NAME/$PREVIOUS_MONTH-metrics-table.tar.gz --acl public-read
printf '%s -- Done.\n' "$(date)";
printf "Runtime: $(date -ud "@$(($(date +%s) - $START_TIME))" +%T) (HH:MM:SS)";