Skip to content

Commit

Permalink
Merge pull request #338 from Deleh/fix/netdata_swap_chart
Browse files Browse the repository at this point in the history
Support New Netdata Swap Chart Name
  • Loading branch information
fmessmer authored Jan 22, 2024
2 parents d56b499 + 3f56ba0 commit aa1fde3
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions cob_monitoring/src/cpu_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def __init__(self, hostname, diag_hostname):

self._num_cores = rospy.get_param('~num_cores', psutil.cpu_count())

self._netdata_version = None

# CPU stats
self._info_stat = DiagnosticStatus()
self._info_stat.name = '%s CPU Info' % diag_hostname
Expand Down Expand Up @@ -314,13 +316,34 @@ def check_free_memory(self, interval=1):
diag_vals.append(KeyValue(key = 'Mem Free', value = str(free_mem)))
diag_vals.append(KeyValue(key = 'Mem Buff/Cache', value = str(cache_mem)))

netdata_swp, error = self._netdata_interface.query_netdata('system.swap', interval)
# Netdata versions differ in chart names
netdata_swap_charts = ['mem.swap', 'system.swap']
error_count = 0
netdata_chart_err = ''
for chart in netdata_swap_charts:
try:
netdata_swp, error = self._netdata_interface.query_netdata(chart, interval)

# Count individual connection errors for mutliple chart names
except requests.ConnectionError as err:
error_count += 1
netdata_chart_err += chart + ' '

netdata_swp = None
error = str(err)

if netdata_swp:
break

netdata_chart_err = "{} of {} failed: {}".format(error_count, len(netdata_swap_charts), netdata_chart_err)

if not netdata_swp:
diag_level = DiagnosticStatus.WARN
diag_msg = 'Swap Usage Error'
diag_vals = [ KeyValue(key = 'Swap Usage Error', value = 'Could not fetch data from netdata'),
KeyValue(key = 'Output', value = netdata_swp),
KeyValue(key = 'Error', value= error) ]
diag_vals = [ KeyValue(key='Swap Usage Error', value='Could not fetch data from netdata'),
KeyValue(key='Failed Chart Names', value=netdata_chart_err),
KeyValue(key='Output', value=netdata_swp),
KeyValue(key='Error', value=error) ]
return (diag_vals, diag_msg, diag_level)

del netdata_swp['time']
Expand Down Expand Up @@ -547,6 +570,11 @@ def check_info(self, event):
diag_msgs = []
diag_level = DiagnosticStatus.OK

if self._netdata_version is None:
self._netdata_version = self._netdata_interface.query_netdata_info()['version']

diag_vals.append(KeyValue(key='Netdata version', value=self._netdata_version))

if self._check_core_temps:
interval = math.ceil(self._usage_timer._period.to_sec())
core_vals, core_msgs, core_level = self.check_core_temps(interval=interval)
Expand Down

0 comments on commit aa1fde3

Please sign in to comment.