From 4bf4a47bf16abfad0bcfc27a47f86173d4de6068 Mon Sep 17 00:00:00 2001 From: kx1t Date: Thu, 9 Nov 2023 11:28:58 -0500 Subject: [PATCH] updates --- README.md | 5 +-- rootfs/etc/s6-overlay/scripts/message-monitor | 34 +++++++++++++------ rootfs/scripts/healthcheck.sh | 18 +++++----- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 6bcbd4d..5ddd176 100644 --- a/README.md +++ b/README.md @@ -275,8 +275,9 @@ You should now be feeding ADSB-ES & UAT to the "new" aggregators, FlightAware, a | Variable | Description | Default | | -------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ------- | | `TZ` | Local timezone in ["TZ database name" format](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). | `UTC` | -| `LAT` | Latitude of your receiver. Only required if you want range statistics for InfluxDB or Prometheus, or if you are using the autogain script. | Unset | -| `LON` | Longitude of your receiver. Only required if you want range statistics for InfluxDB or Prometheus, or if you are using the autogain script. | Unset | +| `LAT` | Latitude of your receiver. Only required if you want range statistics for InfluxDB, Prometheus, or tar1090/ultrafeeder graphs. | Unset | +| `LON` | Longitude of your receiver. Only required if you want range statistics for InfluxDB, Prometheus, or tar1090/ultrafeeder graphs. | Unset | +| `DUMP978_MSG_MONITORING_INTERVAL` | Interval between runs of the Message Monitor that checks if new messages are received. Format of value is anything that is accepted by the Linux `sleep` command | Unset (15 minutes) | ### `dump978-fa` General Options diff --git a/rootfs/etc/s6-overlay/scripts/message-monitor b/rootfs/etc/s6-overlay/scripts/message-monitor index 3c83226..c66c9b6 100755 --- a/rootfs/etc/s6-overlay/scripts/message-monitor +++ b/rootfs/etc/s6-overlay/scripts/message-monitor @@ -1,34 +1,46 @@ #!/command/with-contenv bash -#shellcheck shell=bash +#shellcheck shell=bash disable=SC1091 + +source /scripts/common +mkdir -p /run/stats +s6wrap=(s6wrap --quiet --prepend="$(basename "$0")" --timestamps --args) while : do # Make sure we're receiving messages from the SDR # get the number of messages received since process start: - mkdir -p /run/stats + if [[ -f /run/skyaware978/aircraft.json ]]; then read -r new_msg_count <<< "$(jq .messages /run/skyaware978/aircraft.json 2>/dev/null)" else new_msg_count="STARTING" fi # get the number of messages previously read, or 0 if there's no history: - if [[ -f /run/stats/msgs_since_last_healthcheck ]]; then - read -r old_msg_count < /run/stats/msgs_since_last_healthcheck - secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/stats/msgs_since_last_healthcheck) ))" + if [[ -f /run/stats/msgs_since_last_monitor_run ]]; then + read -r old_msg_count < /run/stats/msgs_since_last_monitor_run + secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/stats/msgs_since_last_monitor_run) ))" else old_msg_count=0 - secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/service/skyaware978) ))" # use skyaware978 modify time as the creation time of the container + secs_since_last_check="$(( $(date +%s) - $(stat -c '%Y' /run/service/skyaware) ))" # use skyaware978 modify time as the creation time of the container + fi + + # if new_msg_count < old_msg_count, dump978 must have restarted since the previous run of this script + # in that case, assume that old_msg_count=0 + if (( new_msg_count < old_msg_count )); then + old_msg_count=0 fi if [[ "$new_msg_count" == "STARTING" ]]; then - echo "[$(date)][STARTING] No messages have been received as the container is still starting" + "${s6wrap[@]}" echo "[STARTING] No messages have been received as the container is still starting" new_msg_count=0 elif (( new_msg_count == old_msg_count )); then - echo "[$(date)][UNHEALTHY] No messages received since last HealthCheck ($secs_since_last_check secs ago)" + "${s6wrap[@]}" echo "[WARNING] No messages received since last run of the Messages Monitor ($secs_since_last_check secs ago)" + elif (( new_msg_count > old_msg_count )); then + "${s6wrap[@]}" echo "[OK] $(( new_msg_count - old_msg_count )) messages received since last run of the Messages Monitor ($secs_since_last_check secs ago)" else - echo "[$(date)][ERROR] This situation cannot occur; new_msg_count=$new_msg_count; old_msg_count=$old_msg_count" + "${s6wrap[@]}" echo "[ERROR] This situation cannot occur, please notify the software maintainers. new_msg_count=$new_msg_count; old_msg_count=$old_msg_count" fi - echo "$new_msg_count" > /run/stats/msgs_since_last_healthcheck + echo "$new_msg_count" > /run/stats/msgs_since_last_monitor_run - sleep 15m + sleep "${DUMP978_MSG_MONITORING_INTERVAL:-15m}" & wait ! done diff --git a/rootfs/scripts/healthcheck.sh b/rootfs/scripts/healthcheck.sh index 900458a..174c568 100755 --- a/rootfs/scripts/healthcheck.sh +++ b/rootfs/scripts/healthcheck.sh @@ -51,14 +51,16 @@ fi services=($(basename -a $(find /run/service/ -maxdepth 1 -type l))) # For each service... for service in "${services[@]}"; do - abnormal_deaths="$(s6-svdt -s "/run/service/$service" | awk '/exitcode/ && !/exitcode 0/' | wc -l)" - if (( abnormal_deaths > 0 )); then - echo "[$(date)][UNHEALTHY] abnormal death count for service $service is $abnormal_deaths" - EXITCODE=1 - # Reset service death counts - s6-svdt-clear "/run/service/$service" - else - echo "[$(date)][HEALTHY] no abnormal death count for service $service" + if [[ "${service:0:5}" != "s6rc-" ]]; then + abnormal_deaths="$(s6-svdt -s "/run/service/$service" | awk '/exitcode/ && !/exitcode 0/' | wc -l)" + if (( abnormal_deaths > 0 )); then + echo "[$(date)][UNHEALTHY] abnormal death count for service $service is $abnormal_deaths" + EXITCODE=1 + # Reset service death counts + s6-svdt-clear "/run/service/$service" + else + echo "[$(date)][HEALTHY] no abnormal death count for service $service" + fi fi done