-
Notifications
You must be signed in to change notification settings - Fork 8
/
aws-audit.py
executable file
·634 lines (535 loc) · 20.3 KB
/
aws-audit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
#! /usr/bin/env python3
#
# aws-audit.py
#
# download, parse and create an email report for general AWS spend
#
# this script is designed to run as a cron job, providing weekly (incremental)
# and end-of-month billing reports. the only difference between these reports
# is the formatting of the email subject and preamble.
#
import argparse
import collections
import csv
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from io import StringIO
import locale
import os
import smtplib
import socket
import sys
# basic config import (namedtuple setup)
import config
# common aws utilities
import awslib
# support for N-ary tree data structure to support OUs
import tree
# email settings: user-defined content and server information
import emailsettings
locale.setlocale(locale.LC_ALL, '') # for comma formatting
def parse_billing_data(billing_data):
"""
parse the billing data and store it in a hash
args:
billing_data: CSV object of billing data
returns:
user_dict: dict, keyed by AWS ID, containing name, user total for all
services, and currency
currency: string, currency used (ie: USD)
month: billing month (for CSV output)
year: billing year (for CSV output)
"""
user_dict = collections.defaultdict(dict)
currency = ''
month = ''
year = ''
for row in billing_data:
if len(row) < 4:
continue
if row[3] == 'AccountTotal':
if not currency:
currency = row[23]
if not month or not year:
date = row[6]
month = date[5:7]
year = date[0:4]
acct_num = row[2]
user_dict[acct_num]['name'] = row[9]
user_dict[acct_num]['total'] = float(row[24])
user_dict[acct_num]['currency'] = row[23]
return user_dict, currency, month, year
def init_tree(aws_id, default_currency):
"""
initializes the OU tree datastructure
args:
aws_id: the AWS ID of the root consolidated billing account
default_currency: the default currency
returns:
root Node object
"""
root_ou = awslib.get_root_ou_id(aws_id)
root = tree.Node(id=root_ou.id, name=root_ou.name, currency=default_currency)
return root
def populate_tree(tree, user_dict, default_currency):
"""
populates the OU-based tree, mapping account/OU to billing data. if users
are in the bill, but not in the AWS org (due to leaving), the left-over
accounts are returned.
args:
tree: root node object
user_dict: dict created from parsing billing file
default_currency: the default currency pulled from the billing CSV
"""
current_node = tree
children = awslib.get_ou_children(current_node.id)
accounts = awslib.get_accounts_for_ou(current_node.id)
if accounts:
for account in accounts:
if account.id not in user_dict:
# account has zero spend and not showing up in the billing CSV
current_node.add_account(config.AccountInfo(
id=account.id,
name=account.name,
total=0.0,
currency=default_currency)
)
else:
current_node.add_account(config.AccountInfo(
id=account.id,
name=account.name,
total=user_dict[account.id]['total'],
currency=user_dict[account.id]['currency'])
)
if children is not None:
for child in children:
current_node.add_child(
id=child.id,
name=child.name,
currency=default_currency
)
for child in current_node.children:
populate_tree(
child,
user_dict,
default_currency
)
def add_leavers(root, user_dict, default_currency):
"""
find AWS accounts that have spend in the billing CSV, but are not in the
consolidated billing family. create a top-level node containing these
users and their spend.
args:
root: the root Node of the entire OU tree
user_dict: the user dict generated from the billing CSV
default_currency: the default currency
"""
leavers_node_added = False
aws_accounts = awslib.get_accounts_for_org()
for id in user_dict.keys():
if id not in aws_accounts:
if not leavers_node_added:
leavers_node_added = True
leavers_node = root.add_child(id='leavers',
name='No Longer in AWS Organization',
currency=default_currency
)
leavers_node.add_account(config.AccountInfo(
id=id,
name=user_dict[id]['name'],
total=user_dict[id]['total'],
currency=user_dict[id]['currency'])
)
def generate_simple_csv(user_dict, outfile=None, limit=0.0,
month=None, year=None):
"""
output account-based spends to a CSV. can create a new file, or append to an
existing one.
the CSV header is defined in CSV_HEADER and can be used to customize the
field names you want to output.
if you want to change the fields that are printed out, please update
the list definitions of 'line' w/the variables you would like to display.
the default settings for this reflect the way in which our lab categorizes
projects, and may require tweaking for other types of orgs.
args:
limit: only print the OU spend that's greater than this
outfile: name of the CSV to write to.
month: month of the report (gleaned from the billing CSV)
year: year of the report (gleaned from the billing CSV)
"""
CSV_HEADER = ['year', 'month', 'person', 'spend']
account_details = list()
limit = float(limit) or 0.0
locale.setlocale(locale.LC_ALL, '')
if os.path.isfile(outfile):
append = True
else:
append = False
# add the header to the CSV if we're creating it
if append is False:
with open(outfile, 'w', newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(CSV_HEADER)
# for each user, get the OU that they are the member of
for id in user_dict.keys():
u = user_dict[id]
account_details.append((u['name'], id, u['total'], u['currency']))
for acct in sorted(account_details, key = lambda acct: acct[2], reverse = True):
(acct_name, acct_num, acct_total, acct_total_currency) = acct
if acct_total < limit:
continue
acct_total_str = locale.format("%.2f", acct_total, grouping=True)
acct_total_str = '$' + str(acct_total_str)
with open(outfile, 'a', newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
line = [year, month, acct_name, acct_total_str]
writer.writerow(line)
def generate_leaderboard(user_dict, display_ids, top, default_currency):
"""
list top N spenders
args:
user_dict: dict of all users and individual total spends
display_ids: display each user's AWS ID after their name
default_currency: default currency
top_users: limit output to N top users. if 0, print all.
"""
total_spend = 0
report = ''
account_details = list()
top_spenders = list()
# for each user, get the OU that they are the member of
for id in user_dict.keys():
u = user_dict[id]
account_details.append((u['name'], id, u['total'], u['currency']))
top_spenders = sorted(account_details, key = lambda acct: acct[2], reverse = True)[:top]
total_spend = sum([x[2] for x in top_spenders])
sum_str = locale.format('%.2f', total_spend, grouping=True)
report = "== AWS top %s leaderboard: $%s %s ==\n\n" \
% (top, sum_str, default_currency)
for acct in top_spenders:
(acct_name, acct_num, acct_total, acct_total_currency) = acct
acct_total_str = locale.format("%.2f", acct_total, grouping=True)
if display_ids:
report = report + "{:<25}\t({})\t{} {}\n".format(acct_name, acct_num,
acct_total_str,
acct_total_currency)
else:
report = report + "{:<25}\t\t${} {}\n".format(acct_name,
acct_total_str,
acct_total_currency)
report = report + "\n\n"
return report
def generate_simple_report(user_dict, limit, display_ids, default_currency):
"""
generate the billing report, categorized by OU.
args:
user_dict: dict of all users and individual total spends
limit: display only amounts greater then this in the report.
default is 0 (all accounts shown)
display_ids: display each user's AWS ID after their name
default_currency: default currency
"""
total_spend = 0
report = ''
account_details = list()
# for each user, get the OU that they are the member of
for id in user_dict.keys():
u = user_dict[id]
total_spend = total_spend + u['total']
account_details.append((u['name'], id, u['total'], u['currency']))
sum_str = locale.format('%.2f', total_spend, grouping=True)
report = "== Current AWS totals: $%s %s (only shown below: > $%s) ==\n\n" \
% (sum_str, default_currency, limit)
for acct in sorted(account_details, key = lambda acct: acct[2], reverse = True):
(acct_name, acct_num, acct_total, acct_total_currency) = acct
if acct_total < limit:
continue
acct_total_str = locale.format("%.2f", acct_total, grouping=True)
if display_ids:
report = report + "{:<25}\t({})\t{} {}\n".format(acct_name, acct_num,
acct_total_str,
acct_total_currency)
else:
report = report + "{:<25}\t\t${} {}\n".format(acct_name,
acct_total_str,
acct_total_currency)
return report
def create_plots(acctcsv=None, orgcsv=None):
"""
create plots based on existing CSV data, and save them to the local FS.
args:
acctcsv: full path to the account-based spends CSV
orgcsv: full path to the org-based spends CSV
returns:
tuple of the full path to the plots created, or None
"""
import plots # slow import is slow
account_plot = org_plot = None
if acctcsv is not None:
outfile = os.path.splitext(acctcsv)[0]
account_plot = plots.account_spend_plot(csvfile=acctcsv, outputfilename=outfile)
if orgcsv is not None:
outfile = os.path.splitext(orgcsv)[0]
org_plot = plots.org_spend_plot(csvfile=orgcsv, outputfilename=outfile)
return account_plot, org_plot
def send_email(report, weekly, plots):
"""
send the report as an email, with the to:, from:, subject: and preamble
defined in emailsettings.py.
args:
report: the raw string containing the final report
weekly: boolean, if true use weekly email formatting. if false, use
monthly.
plots: a tuple of plot file locations to attach to the email
"""
account_plot, org_plot = plots
if weekly:
subject = emailsettings.EMAIL_SUBJECT_WEEKLY
preamble = emailsettings.EMAIL_PREAMBLE_WEEKLY + \
emailsettings.EMAIL_PREAMBLE
else:
subject = emailsettings.EMAIL_SUBJECT_MONTHLY
preamble = emailsettings.EMAIL_PREAMBLE_MONTHLY + \
emailsettings.EMAIL_PREAMBLE
report = preamble + report + "\n\n---\nSent from %s.\n" % \
(socket.gethostname())
message_body = MIMEText(report)
msg = MIMEMultipart()
msg['Subject'] = subject
msg['From'] = emailsettings.EMAIL_FROM_ADDR
msg['To'] = emailsettings.EMAIL_TO_ADDR
msg.attach(message_body)
if account_plot:
img_data = open(account_plot, 'rb').read()
image = MIMEImage(img_data, name=os.path.basename(account_plot))
msg.attach(image)
if org_plot:
img_data = open(org_plot, 'rb').read()
image = MIMEImage(img_data, name=os.path.basename(org_plot))
msg.attach(image)
s = smtplib.SMTP(emailsettings.MAIL_SERVER)
s.sendmail(emailsettings.EMAIL_FROM_ADDR,
[emailsettings.EMAIL_TO_ADDR],
msg.as_string())
def parse_args():
desc = """
Download, parse and create reports for general AWS spend, optionally
sending the report as an e-mail and/or output CSV-based spending data.
"""
epil = """
Please refer to README.md for more detailed usage instructions and examples.
"""
parser = argparse.ArgumentParser(description=desc, epilog=epil)
# AWS settings
parser.add_argument("-i",
"--id",
help="""
AWS account ID for consolidated billing. Required unless using the --local
argument.
""",
type=str,
metavar="AWS_ID")
parser.add_argument("-b",
"--bucket",
help="""
S3 billing bucket name. Required unless using the --local argument.
""",
type=str,
metavar="S3_BILLING_BUCKET")
parser.add_argument("-L",
"--local",
help="""
Read a consolidated billing CSV from the filesystem and bypass
downloading from S3.
""",
type=str,
metavar="LOCAL_BILLING_CSV")
parser.add_argument("-s",
"--save",
help="Save the billing CSV to the local directory.",
action="store_true")
# output formatting
parser.add_argument("-q",
"--quiet",
help="Do not print to STDOUT.",
action="store_true")
parser.add_argument('-o',
"--ou",
help="""
Use AWS Organizational Units to group users. This option will greatly increase
the amount of time it takes the script to run. If this option is specified,
but no OUs have been defined for this consolidated billing group, the script
will still run successfully but will take much longer to complete.
""",
action="store_true")
parser.add_argument("-l",
"--limit",
help="""
Do not display spends less than this value on the report. Any spends not
displayed will still be counted towards all totals. Default is 5.00.
""",
type=float,
default=5.0)
parser.add_argument("-D",
"--display_ids",
help="Display AWS account IDs in the report.",
action="store_true")
parser.add_argument("-f",
"--full",
help="""
Generate a full report. This option is only useful when using OUs in
a consolidated billing setting, and the --ou option is used. An additional
section is added at the end of the original report that lists all users sorted
by spend. If the --ou argument is not set, this option will be ignored.
""",
action="store_true")
parser.add_argument("-e",
"--email",
help="""
Send the report as an email, using the settings defined in emailsettings.py.
""",
action="store_true")
parser.add_argument("-O",
"--orgcsv",
help="""
Output org/project-based spends to a CSV. If FILENAME exists, the script
will append to the file instead of creating a new one.
""",
type=str,
metavar="FILENAME")
parser.add_argument("-C",
"--csv",
help="""
Output account-based spends to a CSV. If FILENAME exists, the script
will append to the file instead of creating a new one.
""",
type=str,
metavar="FILENAME")
parser.add_argument("-p",
"--plot",
help="""
Create plots of CSV data. Only useful if the --csv or --orgcsv arguments
are used. This will create PNG plots that are saved in the directory where
the CSV data lives, and will share the filename of the CSV file used to create
the plot. If this argument is specified with the --email argument, any images
will be attached to the resulting message.
""",
action="store_true")
parser.add_argument("-T",
"--top",
help="""
Display the top N spenders at the beginning of the report. 0 (default) will
ignore this argument.
""",
type=int,
default=0)
# monthly or weekly style email reports
frequency = parser.add_mutually_exclusive_group()
frequency.add_argument("-w",
"--weekly",
help="""
Formats the email subject and body to deonte a "weekly" report on spend,
from the start of the current month to the present day.
""",
action="store_true")
frequency.add_argument("-m",
"--monthly",
help="""
Formats the email subject and body to denote an "end of month" report.
""",
action="store_true")
args = parser.parse_args()
return args
def main():
args = parse_args()
if args.id is None and args.local is None:
print("Please specify an AWS account id with the --id argument, " +
"unless reading in a local billing CSV with --local <filename>.")
sys.exit(-1)
if args.bucket is None and args.local is None:
print("Please specify a S3 billing bucket name with the --bucket " +
"argument, unless reading in a local billing CSV with --local " +
"<filename>.")
sys.exit(-1)
if args.id is None and args.ou is not None:
print("You must supply an AWS account id with the --id argument when " +
"using the --ou argument.")
sys.exit(-1)
if args.email and (not args.weekly and not args.monthly):
print("Please specify the frequency formatting of the email using " +
"--weekly or --monthly")
sys.exit(-1)
if args.orgcsv and not args.ou:
print("You must specify the --ou argument to use the --orgcsv option.")
sys.exit(-1)
if args.csv or args.orgcsv:
if args.csv == args.orgcsv:
print("Please use different filenames for the --csv and --orgcsv options.")
sys.exit(-1)
if args.plot and (not args.csv or not args.orgcsv):
print("You must specify at least one CSV file to plot with the --csv or " +
" --orgcsv options.")
sys.exit(-1)
report = ''
billing_data = awslib.get_latest_bill(
args.id,
args.bucket,
args.local,
args.save
)
user_dict, currency, month, year = parse_billing_data(billing_data)
# leaderboard?
if args.top != 0:
report = generate_leaderboard(
user_dict,
args.display_ids,
args.top,
currency
)
# no OU tree, just spew out the report
if not args.ou:
report = report + generate_simple_report(
user_dict,
args.limit,
args.display_ids,
currency
)
# use the OU tree, more complex report
else:
root = init_tree(args.id, currency)
populate_tree(root, user_dict, currency)
# handle those who have left the org, but are in the billing CSV.
add_leavers(root, user_dict, currency)
sum_str = locale.format('%.2f', root.node_spend, grouping=True)
report = report + \
'== Current AWS totals: $%s %s (only shown below: > $%s) ==\n\n' \
% (sum_str, currency, args.limit)
old_stdout = sys.stdout
tree_output = StringIO()
sys.stdout = tree_output
root.print_tree(limit=args.limit, display_ids=args.display_ids)
sys.stdout = old_stdout
report = report + tree_output.getvalue()
# add the basic report to the end if desired
if args.full:
report = report + '\n\n'
report = report + generate_simple_report(
user_dict,
args.limit,
args.display_ids,
currency
)
if args.csv:
generate_simple_csv(user_dict, outfile=args.csv, month=month, year=year)
if args.orgcsv:
root.generate_project_csv(outfile=args.orgcsv, month=month, year=year)
account_plot = org_plot = None
if args.plot:
account_plot, org_plot = create_plots(acctcsv=args.csv, orgcsv=args.orgcsv)
if not args.quiet:
print(report)
if args.email:
send_email(report, args.weekly, (account_plot, org_plot))
if __name__ == "__main__":
main()