From 4bf4a47bf16abfad0bcfc27a47f86173d4de6068 Mon Sep 17 00:00:00 2001
From: kx1t <kx1t@amsat.org>
Date: Thu, 9 Nov 2023 11:28:58 -0500
Subject: [PATCH] updates

---
 README.md                                     |  5 +--
 rootfs/etc/s6-overlay/scripts/message-monitor | 34 +++++++++++++------
 rootfs/scripts/healthcheck.sh                 | 18 +++++-----
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 6bcbd4d..5ddd176 100644
--- a/README.md
+++ b/README.md
@@ -275,8 +275,9 @@ You should now be feeding ADSB-ES & UAT to the "new" aggregators, FlightAware, a
 | Variable | Description                                                                                                                                 | Default |
 | -------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
 | `TZ`     | Local timezone in ["TZ database name" format](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones).                                | `UTC`   |
-| `LAT`    | Latitude of your receiver. Only required if you want range statistics for InfluxDB or Prometheus, or if you are using the autogain script.  | Unset   |
-| `LON`    | Longitude of your receiver. Only required if you want range statistics for InfluxDB or Prometheus, or if you are using the autogain script. | Unset   |
+| `LAT`    | Latitude of your receiver. Only required if you want range statistics for InfluxDB, Prometheus, or tar1090/ultrafeeder graphs. | Unset   |
+| `LON`    | Longitude of your receiver. Only required if you want range statistics for InfluxDB, Prometheus, or tar1090/ultrafeeder graphs. | Unset   |
+| `DUMP978_MSG_MONITORING_INTERVAL` | Interval between runs of the Message Monitor that checks if new messages are received. Format of value is anything that is accepted by the Linux `sleep` command | Unset (15 minutes) |
 
 ### `dump978-fa` General Options
 
diff --git a/rootfs/etc/s6-overlay/scripts/message-monitor b/rootfs/etc/s6-overlay/scripts/message-monitor
index 3c83226..c66c9b6 100755
--- a/rootfs/etc/s6-overlay/scripts/message-monitor
+++ b/rootfs/etc/s6-overlay/scripts/message-monitor
@@ -1,34 +1,46 @@
 #!/command/with-contenv bash
-#shellcheck shell=bash
+#shellcheck shell=bash disable=SC1091
+
+source /scripts/common
+mkdir -p /run/stats
+s6wrap=(s6wrap --quiet --prepend="$(basename "$0")" --timestamps --args)
 
 while :
 do
     # Make sure we're receiving messages from the SDR
     # get the number of messages received since process start:
-    mkdir -p /run/stats
+
     if [[ -f /run/skyaware978/aircraft.json ]]; then
         read -r new_msg_count <<< "$(jq .messages /run/skyaware978/aircraft.json 2>/dev/null)"
     else
         new_msg_count="STARTING"
     fi
     # get the number of messages previously read, or 0 if there's no history:
-    if [[ -f /run/stats/msgs_since_last_healthcheck ]]; then
-        read -r old_msg_count < /run/stats/msgs_since_last_healthcheck
-        secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/stats/msgs_since_last_healthcheck) ))"
+    if [[ -f /run/stats/msgs_since_last_monitor_run ]]; then
+        read -r old_msg_count < /run/stats/msgs_since_last_monitor_run
+        secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/stats/msgs_since_last_monitor_run) ))"
     else
         old_msg_count=0
-        secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/service/skyaware978) ))"    # use skyaware978 modify time as the creation time of the container
+        secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/service/skyaware) ))"    # use skyaware978 modify time as the creation time of the container
+    fi
+
+    # if new_msg_count < old_msg_count, dump978 must have restarted since the previous run of this script
+    # in that case, assume that old_msg_count=0
+    if (( new_msg_count < old_msg_count )); then
+        old_msg_count=0
     fi
 
     if [[ "$new_msg_count" == "STARTING" ]]; then
-        echo "[$(date)][STARTING] No messages have been received as the container is still starting"
+        "${s6wrap[@]}" echo "[STARTING] No messages have been received as the container is still starting"
         new_msg_count=0
     elif (( new_msg_count == old_msg_count )); then
-        echo "[$(date)][UNHEALTHY] No messages received since last HealthCheck ($secs_since_last_check secs ago)"
+        "${s6wrap[@]}" echo "[WARNING] No messages received since last run of the Messages Monitor ($secs_since_last_check secs ago)"
+    elif (( new_msg_count > old_msg_count )); then
+        "${s6wrap[@]}" echo "[OK] $(( new_msg_count - old_msg_count )) messages received since last run of the Messages Monitor ($secs_since_last_check secs ago)"
     else
-        echo "[$(date)][ERROR] This situation cannot occur; new_msg_count=$new_msg_count; old_msg_count=$old_msg_count"
+        "${s6wrap[@]}" echo "[ERROR] This situation cannot occur, please notify the software maintainers. new_msg_count=$new_msg_count; old_msg_count=$old_msg_count"
     fi
-    echo "$new_msg_count" > /run/stats/msgs_since_last_healthcheck
+    echo "$new_msg_count" > /run/stats/msgs_since_last_monitor_run
 
-    sleep 15m
+    sleep "${DUMP978_MSG_MONITORING_INTERVAL:-15m}" & wait !
 done
diff --git a/rootfs/scripts/healthcheck.sh b/rootfs/scripts/healthcheck.sh
index 900458a..174c568 100755
--- a/rootfs/scripts/healthcheck.sh
+++ b/rootfs/scripts/healthcheck.sh
@@ -51,14 +51,16 @@ fi
 services=($(basename -a $(find /run/service/ -maxdepth 1 -type l)))
 # For each service...
 for service in "${services[@]}"; do
-    abnormal_deaths="$(s6-svdt -s "/run/service/$service" | awk '/exitcode/ && !/exitcode 0/' | wc -l)"
-    if (( abnormal_deaths > 0 )); then
-        echo "[$(date)][UNHEALTHY] abnormal death count for service $service is $abnormal_deaths"
-        EXITCODE=1
-        # Reset service death counts
-        s6-svdt-clear "/run/service/$service"
-    else
-        echo "[$(date)][HEALTHY] no abnormal death count for service $service"
+    if [[ "${service:0:5}" != "s6rc-" ]]; then 
+        abnormal_deaths="$(s6-svdt -s "/run/service/$service" | awk '/exitcode/ && !/exitcode 0/' | wc -l)"
+        if (( abnormal_deaths > 0 )); then
+            echo "[$(date)][UNHEALTHY] abnormal death count for service $service is $abnormal_deaths"
+            EXITCODE=1
+            # Reset service death counts
+            s6-svdt-clear "/run/service/$service"
+        else
+            echo "[$(date)][HEALTHY] no abnormal death count for service $service"
+        fi
     fi
 done