From 020eabab6fe1bf858810c5c948253a98f9c323df Mon Sep 17 00:00:00 2001 From: Denis Lehmann Date: Fri, 12 Jan 2024 18:51:10 +0100 Subject: [PATCH 1/2] support new netdata swap chart name --- cob_monitoring/src/cpu_monitor.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/cob_monitoring/src/cpu_monitor.py b/cob_monitoring/src/cpu_monitor.py index 4efbb211..d9639ea2 100755 --- a/cob_monitoring/src/cpu_monitor.py +++ b/cob_monitoring/src/cpu_monitor.py @@ -314,13 +314,34 @@ def check_free_memory(self, interval=1): diag_vals.append(KeyValue(key = 'Mem Free', value = str(free_mem))) diag_vals.append(KeyValue(key = 'Mem Buff/Cache', value = str(cache_mem))) - netdata_swp, error = self._netdata_interface.query_netdata('system.swap', interval) + # Netdata versions differ in chart names + netdata_swap_charts = ['mem.swap', 'system.swap'] + error_count = 0 + netdata_chart_err = '' + for chart in netdata_swap_charts: + try: + netdata_swp, error = self._netdata_interface.query_netdata(chart, interval) + + # Count individual connection errors for mutliple chart names + except requests.ConnectionError as err: + error_count += 1 + netdata_chart_err += chart + ' ' + + netdata_swp = None + error = str(err) + + if netdata_swp: + break + + netdata_chart_err = "{} of {} failed: {}".format(error_count, len(netdata_swap_charts), netdata_chart_err) + if not netdata_swp: diag_level = DiagnosticStatus.WARN diag_msg = 'Swap Usage Error' - diag_vals = [ KeyValue(key = 'Swap Usage Error', value = 'Could not fetch data from netdata'), - KeyValue(key = 'Output', value = netdata_swp), - KeyValue(key = 'Error', value= error) ] + diag_vals = [ KeyValue(key='Swap Usage Error', value='Could not fetch data from netdata'), + KeyValue(key='Failed Chart Names', value=netdata_chart_err), + KeyValue(key='Output', value=netdata_swp), + KeyValue(key='Error', value=error) ] return (diag_vals, diag_msg, diag_level) del netdata_swp['time'] From 3f56ba05070d9401181a6c59ad68232c33db8674 Mon Sep 17 00:00:00 2001 From: Denis Lehmann Date: Thu, 18 Jan 2024 16:11:15 +0100 Subject: [PATCH 2/2] add netdata version to cpu info --- cob_monitoring/src/cpu_monitor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cob_monitoring/src/cpu_monitor.py b/cob_monitoring/src/cpu_monitor.py index d9639ea2..8571d72a 100755 --- a/cob_monitoring/src/cpu_monitor.py +++ b/cob_monitoring/src/cpu_monitor.py @@ -56,6 +56,8 @@ def __init__(self, hostname, diag_hostname): self._num_cores = rospy.get_param('~num_cores', psutil.cpu_count()) + self._netdata_version = None + # CPU stats self._info_stat = DiagnosticStatus() self._info_stat.name = '%s CPU Info' % diag_hostname @@ -568,6 +570,11 @@ def check_info(self, event): diag_msgs = [] diag_level = DiagnosticStatus.OK + if self._netdata_version is None: + self._netdata_version = self._netdata_interface.query_netdata_info()['version'] + + diag_vals.append(KeyValue(key='Netdata version', value=self._netdata_version)) + if self._check_core_temps: interval = math.ceil(self._usage_timer._period.to_sec()) core_vals, core_msgs, core_level = self.check_core_temps(interval=interval)