-
Notifications
You must be signed in to change notification settings - Fork 3
/
import_zip.py
executable file
·41 lines (32 loc) · 1.23 KB
/
import_zip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python
from argparse import ArgumentParser
import logging
import sys
from ncd.athena import Athena
from ncd.data_zip import DataZip
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
parser = ArgumentParser(description='Load a file of National Caseload Data.')
parser.add_argument(
'--data-bucket', help='S3 bucket name for data files', required=True)
parser.add_argument(
'--results-bucket', help='S3 bucket name for query results', required=True)
parser.add_argument(
'--s3-prefix', help='Prefix for data file paths on S3', required=True)
parser.add_argument('--db-name', help='Database name on Athena', required=True)
parser.add_argument('zip_path', help='Path to a zip file from NCD')
def main(raw_args):
args = parser.parse_args(raw_args)
athena = Athena(
data_bucket=args.data_bucket, results_bucket=args.results_bucket,
s3_prefix=args.s3_prefix, db_name=args.db_name)
athena.create_db()
DataZip(args.zip_path, athena).load()
if __name__ == '__main__':
main(sys.argv[1:])