-
Notifications
You must be signed in to change notification settings - Fork 2
/
acquire_data.yml
48 lines (44 loc) · 2.61 KB
/
acquire_data.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
---
# Acquires the data from an archive database.
# See also, https://github.com/Connexions/cnx-archive/issues/197
# Note, this host, 'data_source', is specific to this playbook.
# It's this way on purpose. You'll be grabbing data from a live
# server.
# Please DO NOT use this during peak hours!
- name: gather info
hosts: data_source
vars_prompt:
- name: "db_name"
prompt: "What database are we extracting?"
default: repository
- name: "db_user"
prompt: "What database user should we use?"
default: postgres
vars:
keep_filenames:
- "index.cnxml"
- "index.cnxml.html"
- "index_auto_generated.cnxml"
- "ruleset.css"
- "featured-cover.png"
- "collection.xml"
keep_filename_str: "{{ keep_filenames|map('tojson')|join(',')|regex_replace('\"', \"'\") }}"
tasks:
- name: dump database without files
shell: "nice -n {{ nice_priority }} pg_dump -U {{ db_user }} --exclude-table-data=files --exclude-table-data=module_files {{ db_name }} | gzip > cnxarchive_dump_without_files.sql.gz"
- name: dump database files table for index.cnxml, index.cnxml.html, index_auto_generated.cnxml
shell: "nice -n {{ nice_priority }} psql -U {{ db_user }} {{ db_name }} -c \"copy ( SELECT fileid, md5, sha1, file, media_type FROM files WHERE fileid IN (SELECT fileid FROM module_files WHERE filename IN ({{ keep_filename_str }}) ) ) TO STDOUT\" | gzip > cnxarchive_index_files.txt.gz"
- name: dump database files table for other files
shell: "nice -n {{ nice_priority }} psql -U {{ db_user }} {{ db_name }} -c \"copy ( SELECT fileid, md5, sha1, 'dummy file', media_type FROM files WHERE fileid NOT IN (SELECT fileid FROM module_files WHERE filename IN ({{ keep_filename_str }}) ) ) TO STDOUT\" | gzip > cnxarchive_other_files.txt.gz"
- name: dump database module files table for index.cnxml, index.cnxml.html, index_auto_generated.cnxml
shell: "nice -n {{ nice_priority }} psql -U {{ db_user }} {{ db_name }} -c \"copy ( SELECT module_ident, fileid, filename FROM module_files ) TO STDOUT\" | gzip > cnxarchive_index_module_files.txt.gz"
- name: produce dump filename
shell: "echo \"cnxarchive_dump.$(date +'%Y-%m-%d').tar\""
register: dump_filename
- name: create dump .tar.gz file
shell: "nice -n {{ nice_priority }} tar cf {{ dump_filename.stdout }} --remove-files cnxarchive_dump_without_files.sql.gz cnxarchive_index_files.txt.gz cnxarchive_other_files.txt.gz cnxarchive_index_module_files.txt.gz"
- name: download dump .tar file
fetch:
src: "{{ dump_filename.stdout }}"
dest: "cnxarchive_dump.tar"
flat: yes