-
Notifications
You must be signed in to change notification settings - Fork 100
/
Copy pathgraphios.py
executable file
·586 lines (530 loc) · 20.7 KB
/
graphios.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
#!/usr/bin/python -tt
# vim: set ts=4 sw=4 tw=79 et :
# Copyright (C) 2011 Shawn Sterling <shawn@systemtemplar.org>
#
# With contributions from:
#
# Juan Jose Presa <juanjop@gmail.com>
# Ranjib Dey <dey.ranjib@gmail.com>
# Ryan Davis <https://github.com/ryepup>
# Alexey Diyan <alexey.diyan@gmail.com>
# Steffen Zieger <me@saz.sh>
# Nathan Bird <ecthellion@gmail.com>
# Dave Josephsen <dave@skeptech.org>
# Emil Thelin <https://github.com/gummiboll>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# graphios: this program will read nagios host and service perfdata, and
# send it to a carbon server.
#
# The latest version of this code will be found on my github page:
# https://github.com/shawn-sterling
from ConfigParser import SafeConfigParser
from optparse import OptionParser
import copy
import graphios_backends as backends
import logging
import logging.handlers
import os
import os.path
import re
import sys
import time
# ##########################################################
# ### Do not edit this file, edit the graphios.cfg #####
# nagios spool directory
spool_directory = '/var/spool/nagios/graphios'
# graphios log info
log_file = ''
log_max_size = 24
# by default we will check the current path for graphios.cfg, if config_file
# is passed as a command line argument we will use that instead.
config_file = ''
# This is overridden via config file
debug = False
# config dictionary
cfg = {}
# backend global
be = ""
# available loglevels for graphios.cfg
loglevels = {
'logging.DEBUG': logging.DEBUG,
'logging.INFO': logging.INFO,
'logging.WARNING': logging.WARNING,
'logging.ERROR': logging.ERROR,
'logging.CRITICAL': logging.CRITICAL
}
# options parsing
parser = OptionParser("""usage: %prog [options]
sends nagios performance data to carbon.
""")
parser.add_option('-v', "--verbose", action="store_true", dest="verbose",
help="sets logging to DEBUG level")
parser.add_option('-q', "--quiet", action="store_true", dest="quiet",
help="sets logging to WARNING level")
parser.add_option("--spool-directory", dest="spool_directory",
default=spool_directory,
help="where to look for nagios performance data")
parser.add_option("--log-file", dest="log_file",
default=log_file,
help="file to log to")
parser.add_option("--backend", dest="backend", default="stdout",
help="sets which storage backend to use")
parser.add_option("--config_file", dest="config_file", default="",
help="set custom config file location")
parser.add_option("--test", action="store_true", dest="test", default="",
help="Turns on test mode, which won't send to backends")
parser.add_option("--replace_char", dest="replace_char", default="_",
help="Replacement Character (default '_'")
parser.add_option("--sleep_time", dest="sleep_time", default=15,
help="How much time to sleep between checks")
parser.add_option("--sleep_max", dest="sleep_max", default=480,
help="Max time to sleep between runs")
parser.add_option("--server", dest="server", default="",
help="Server address (for backend)")
parser.add_option("--no_replace_hostname", action="store_false",
dest="replace_hostname", default=True,
help="Replace '.' in nagios hostnames, default on.")
parser.add_option("--reverse_hostname", action="store_true",
dest="reverse_hostname",
help="Reverse nagios hostname, default off.")
log = logging.getLogger('log')
class GraphiosMetric(object):
def __init__(self):
self.LABEL = '' # The name in the perfdata from nagios
self.VALUE = '' # The measured value of that metric
self.UOM = '' # The unit of measure for the metric
self.DATATYPE = '' # HOSTPERFDATA|SERVICEPERFDATA
self.METRICTYPE = 'gauge' # gauge|counter|timer etc..
self.TIMET = '' # Epoc time the measurement was taken
self.HOSTNAME = '' # name of th host measured
self.SERVICEDESC = '' # nagios configured service description
self.PERFDATA = '' # the space-delimited raw perfdata
self.SERVICECHECKCOMMAND = '' # literal check command syntax
self.HOSTCHECKCOMMAND = '' # literal check command syntax
self.HOSTSTATE = '' # current state afa nagios is concerned
self.HOSTSTATETYPE = '' # HARD|SOFT
self.SERVICESTATE = '' # current state afa nagios is concerned
self.SERVICESTATETYPE = '' # HARD|SOFT
self.METRICBASEPATH = '' # Establishes a root base path
self.GRAPHITEPREFIX = '' # graphios prefix
self.GRAPHITEPOSTFIX = '' # graphios suffix
self.VALID = False # if this metric is valid
if 'metric_base_path' in cfg:
self.METRICBASEPATH = cfg['metric_base_path']
def validate(self):
# because we eliminated all whitespace, there shouldn't be any quotes
# this happens more with windows nagios plugins
re.sub("'", "", self.LABEL)
re.sub('"', "", self.LABEL)
re.sub("'", "", self.VALUE)
re.sub('"', "", self.VALUE)
self.check_adjust_hostname()
if (
self.TIMET is not '' and
self.PERFDATA is not '' and
self.HOSTNAME is not ''
):
if "use_service_desc" in cfg and cfg["use_service_desc"] is True:
if self.SERVICEDESC != '' or self.DATATYPE == 'HOSTPERFDATA':
self.VALID = True
else:
# not using service descriptions
if (
# We should keep this logic and not check for a
# base path here. Just because there's a base path
# doesn't mean the metric should be considered valid
self.GRAPHITEPREFIX == "" and
self.GRAPHITEPOSTFIX == ""
):
self.VALID = False
else:
self.VALID = True
def check_adjust_hostname(self):
if cfg["reverse_hostname"]:
self.HOSTNAME = '.'.join(reversed(self.HOSTNAME.split('.')))
if cfg["replace_hostname"]:
self.HOSTNAME = self.HOSTNAME.replace(".",
cfg["replacement_character"])
def chk_bool(value):
"""
checks if value is a stringified boolean
"""
if (value.lower() == "true"):
return True
elif (value.lower() == "false"):
return False
return value
def read_config(config_file):
"""
reads the config file
"""
if config_file == '':
# check same dir as graphios binary
my_file = "%s/graphios.cfg" % sys.path[0]
if os.path.isfile(my_file):
config_file = my_file
else:
# check /etc/graphios/graphios.cfg
config_file = "/etc/graphios/graphios.cfg"
config = SafeConfigParser()
# The logger won't be initialized yet, so we use print_debug
if os.path.isfile(config_file):
config.read(config_file)
config_dict = {}
for section in config.sections():
# there should only be 1 'graphios' section
print_debug("section: %s" % section)
config_dict['name'] = section
for name, value in config.items(section):
config_dict[name] = chk_bool(value)
print_debug("config[%s]=%s" % (name, value))
# print config_dict
return config_dict
else:
print_debug("Can't open config file: %s" % config_file)
print """\nEither modify the script at the config_file = '' line and
specify where you want your config file to be, or create a config file
in the above directory (which should be the same dir the graphios.py is in)
or you can specify --config=myconfigfilelocation at the command line."""
sys.exit(1)
def verify_config(config_dict):
"""
verifies the required config variables are found
"""
global spool_directory
ensure_list = ['replacement_character', 'log_file', 'log_max_size',
'log_level', 'sleep_time', 'sleep_max', 'test_mode',
'reverse_hostname', 'replace_hostname']
missing_values = []
for ensure in ensure_list:
if ensure not in config_dict:
missing_values.append(ensure)
if len(missing_values) > 0:
print "\nMust have value in config file for:\n"
for value in missing_values:
print "%s\n" % value
sys.exit(1)
if not config_dict['log_level'] in loglevels.keys():
print "Unknown loglevel: " + config_dict['log_level'] + '\n'
print "Available loglevels:"
print '\n'.join(sorted(loglevels.keys()))
sys.exit(1)
if "spool_directory" in config_dict:
spool_directory = config_dict['spool_directory']
def print_debug(msg):
"""
prints a debug message if global debug is True
"""
if debug:
print msg
def verify_options(opts):
"""
verify the passed command line options, puts into global cfg
"""
global cfg
global spool_directory
# because these have defaults in the parser section we know they will be
# set. So we don't have to do a bunch of ifs.
if "log_file" not in cfg:
cfg["log_file"] = opts.log_file
if cfg["log_file"] == "''" or cfg["log_file"] == "":
cfg["log_file"] = "%s/graphios.log" % sys.path[0]
cfg["log_max_size"] = 24
if opts.verbose:
cfg["debug"] = True
cfg["log_level"] = "logging.DEBUG"
elif opts.quiet:
cfg["debug"] = False
cfg["log_level"] = "logging.WARNING"
else:
cfg["debug"] = False
cfg["log_level"] = "logging.INFO"
if opts.test:
cfg["test_mode"] = True
else:
cfg["test_mode"] = False
cfg["replacement_character"] = opts.replace_char
cfg["spool_directory"] = opts.spool_directory
cfg["sleep_time"] = opts.sleep_time
cfg["sleep_max"] = opts.sleep_max
cfg["replace_hostname"] = opts.replace_hostname
cfg["reverse_hostname"] = opts.reverse_hostname
spool_directory = opts.spool_directory
# cfg["backend"] = opts.backend
handle_backends(opts)
# cfg["enable_carbon"] = True
return cfg
def handle_backends(opts):
global cfg
if opts.backend == "carbon" or opts.backend == "statsd":
if not opts.server:
print "Must also have --server for carbon or statsd."
sys.exit(1)
if opts.backend == "carbon":
cfg["enable_carbon"] = True
cfg["carbon_servers"] = opts.server
if opts.backend == "statsd":
cfg["enable_statsd"] = True
cfg["statsd_server"] = opts.server
if opts.backend == "librato":
print "Use graphios.cfg for librato."
sys.exit(1)
def configure():
"""
sets up graphios config
"""
global debug
try:
cfg["log_max_size"] = int(cfg["log_max_size"])
except ValueError:
print "log_max_size needs to be a integer"
sys.exit(1)
# Convert cfg["log_max_size"] to bytes. Assume its already in bytes
# if its > 1000000
if cfg["log_max_size"] < 1000000:
log_max_bytes = cfg["log_max_size"]*1024*1024
else:
log_max_bytes = cfg["log_max_size"]
log_handler = logging.handlers.RotatingFileHandler(
cfg["log_file"], maxBytes=log_max_bytes, backupCount=4,
# encoding='bz2')
)
formatter = logging.Formatter(
"%(asctime)s %(filename)s %(levelname)s %(message)s",
"%B %d %H:%M:%S")
log_handler.setFormatter(formatter)
log.addHandler(log_handler)
if cfg.get("debug") is True or cfg['log_level'] == 'logging.DEBUG':
log.debug("adding streamhandler")
log.setLevel(logging.DEBUG)
log.addHandler(logging.StreamHandler())
debug = True
else:
log.setLevel(loglevels[cfg['log_level']])
debug = False
def process_log(file_name):
""" process log lines into GraphiosMetric Objects.
input is a tab delimited series of key/values each of which are delimited
by '::' it looks like:
DATATYPE::HOSTPERFDATA TIMET::1399738074 etc..
"""
processed_objects = [] # the final list of metric objects we'll return
graphite_lines = 0 # count the number of valid lines we process
try:
host_data_file = open(file_name, "r")
file_array = host_data_file.readlines()
host_data_file.close()
except (IOError, OSError) as ex:
log.critical("Can't open file:%s error: %s" % (file_name, ex))
sys.exit(2)
# parse each line into a metric object
for line in file_array:
if not re.search("^DATATYPE::", line):
continue
# log.debug('parsing: %s' % line)
graphite_lines += 1
variables = line.split('\t')
mobj = get_mobj(variables)
if mobj:
# break out the metric object into one object per perfdata metric
# log.debug('perfdata:%s' % mobj.PERFDATA)
for metric in mobj.PERFDATA.split():
try:
nobj = copy.copy(mobj)
(nobj.LABEL, d) = metric.split('=')
v = d.split(';')[0]
u = v
nobj.VALUE = re.sub("[a-zA-Z%]", "", v)
nobj.UOM = re.sub("[^a-zA-Z]+", "", u)
processed_objects.append(nobj)
except:
log.critical("failed to parse label: '%s' part of perf"
"string '%s'" % (metric, nobj.PERFDATA))
continue
return processed_objects
def get_mobj(nag_array):
"""
takes a split array of nagios variables and returns a mobj if it's
valid. otherwise return False.
"""
mobj = GraphiosMetric()
for var in nag_array:
# drop the metric if we can't split it for any reason
try:
(var_name, value) = var.split('::', 1)
except:
log.warn("could not split value %s, dropping metric" % var)
return False
value = re.sub("/", cfg["replacement_character"], value)
if re.search("PERFDATA", var_name):
mobj.PERFDATA = value
elif re.search("^\$_", value):
continue
else:
value = re.sub("\s", "", value)
setattr(mobj, var_name, value)
mobj.validate()
if mobj.VALID is True:
return mobj
return False
def handle_file(file_name, graphite_lines):
"""
archive processed metric lines and delete the input log files
"""
if "test_mode" in cfg and cfg["test_mode"] is True:
log.debug("graphite_lines:%s" % graphite_lines)
else:
try:
os.remove(file_name)
except (OSError, IOError) as ex:
log.critical("couldn't remove file %s error:%s" % (file_name, ex))
else:
log.debug("deleted %s" % file_name)
def process_spool_dir(directory):
"""
processes the files in the spool directory
"""
global be
log.debug("Processing spool directory %s", directory)
num_files = 0
mobjs_len = 0
try:
perfdata_files = os.listdir(directory)
except (IOError, OSError) as e:
print "Exception '%s' reading spool directory: %s" % (e, directory)
print "Check if dir exists, or file permissions."
print "Exiting."
sys.exit(1)
for perfdata_file in perfdata_files:
mobjs = []
processed_dict = {}
all_done = True
file_dir = os.path.join(directory, perfdata_file)
if check_skip_file(perfdata_file, file_dir):
continue
num_files += 1
mobjs = process_log(file_dir)
mobjs_len = len(mobjs)
processed_dict = send_backends(mobjs)
# process the output from the backends and decide the fate of the file
for backend in be["essential_backends"]:
if processed_dict[backend] < mobjs_len:
log.critical("keeping %s, insufficent metrics sent from %s. \
Should be %s, got %s" % (file_dir, backend,
mobjs_len,
processed_dict[backend]))
all_done = False
if all_done is True:
handle_file(file_dir, len(mobjs))
log.info("Processed %s files (%s metrics) in %s" % (num_files,
mobjs_len, directory))
def check_skip_file(file_name, file_dir):
"""
checks if file should be skipped
"""
if (
file_name == "host-perfdata" or
file_name == "service-perfdata"
):
return True
elif re.match('^_', file_name):
return True
if os.stat(file_dir)[6] == 0:
# file was 0 bytes
handle_file(file_dir, 0)
return True
if os.path.isdir(file_dir):
return True
return False
def init_backends():
"""
I'm going to be a little forward thinking with this and build a global dict
of enabled back-ends whose values are instantiations of the back-end
objects themselves. I know, global bad, but hypothetically we could modify
this dict dynamically via a runtime-interface (graphiosctl?) to turn off/on
backends without having to restart the graphios process. I feel like
that's enough of a win to justify the global. If you build it, they will
come.
"""
global be
be = {} # a top-level global for important backend-related stuff
be["enabled_backends"] = {} # a dict of instantiated backend objects
be["essential_backends"] = [] # a list of backends we actually care about
# PLUGIN WRITERS! register your new backends by adding their obj name here
avail_backends = ("carbon",
"statsd",
"librato",
"influxdb",
"influxdb09",
"stdout",
)
# populate the controller dict from avail + config. this assumes you named
# your backend the same as the config option that enables your backend (eg.
# carbon and enable_carbon)
for backend in avail_backends:
cfg_option = "enable_%s" % (backend)
if cfg_option in cfg and cfg[cfg_option] is True:
backend_obj = getattr(backends, backend)
be["enabled_backends"][backend] = backend_obj(cfg)
nerf_option = "nerf_%s" % (backend)
if nerf_option in cfg:
if cfg[nerf_option] is False:
be["essential_backends"].append(backend)
else:
be["essential_backends"].append(backend)
# not proud of that slovenly conditional ^^
log.info("Enabled backends: %s" % be["enabled_backends"].keys())
def send_backends(metrics):
"""
use the enabled_backends dict to call into the backend send functions
"""
global be
if len(be["enabled_backends"]) < 1:
log.critical("At least one Back-end must be enabled in graphios.cfg")
sys.exit(1)
ret = {} # return a dict of who processed what
processed_lines = 0
for backend in be["enabled_backends"]:
processed_lines = be["enabled_backends"][backend].send(metrics)
# log.debug('%s processed %s metrics' % backend, processed_lines)
ret[backend] = processed_lines
return ret
def main():
log.info("graphios startup.")
try:
while True:
process_spool_dir(spool_directory)
log.debug("graphios sleeping.")
time.sleep(float(cfg["sleep_time"]))
except KeyboardInterrupt:
log.info("ctrl-c pressed. Exiting graphios.")
if __name__ == '__main__':
if len(sys.argv) > 1:
(options, args) = parser.parse_args()
# print options
if options.config_file:
cfg = read_config(options.config_file)
else:
cfg = verify_options(options)
else:
cfg = read_config(config_file)
verify_config(cfg)
configure()
# print cfg
init_backends()
main()