forked from sushantohalder/SwarmStatusCheck
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_SwarmStatus.sh
79 lines (63 loc) · 4.5 KB
/
check_SwarmStatus.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/bash
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Introduction
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# This script is made for a specific use-case i.e. to monitor the swarm quorum. So a docker swarm quorum is formed when there are more than half managers are up and woking.
# So this script returns Status 'Ok' when the all the managers are healthy and working, 'Warning' when any of the manager is unhealthy, And 'Critical' when more than half of the managers are unhealthy.
# In this script we have used 'docker node ls' command to know the status of the managers from any node. So if it is a worker node where we run this script, then it returns 'Ok' as on woker node we cann't get the status of any node.
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# License
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# This plugin monitors the docker swarm quorum.
# Copyright (C) 2018 Sushanto Halder
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
is_manager=$(docker info | grep “Is Manager” | awk ‘{print $3}’) # checks if the node is worker node or not.
if [ “x$is_manager” = “xfalse” ]
then
echo "OK - This is workernode."
exit 0
fi
arr=( $(docker node ls --format {{.ManagerStatus}}) )
if [ $? -gt 0 ];
then
echo "CRITICAL - Swarm is Unhealthy (not in quorum)" # 'docker node ls' provides the status of manager nodes. It exits with exit code 2 if the swarm is unhealthy.
exit 2
else
reqStatus='Leader'
altStatus='Reachable'
fg=0
for element in "${arr[@]}";
do
if [[ "$element" == "$reqStatus" || "$element" == "$altStatus" ]];
then
fg=$((fg+1)) # increases the value of fg by one each time it encounters a reachable manager or the leader.
fi
done
n=${#arr[@]} # n is the number of manager nodes in the swarm quorum.
if [ "$n" -gt "$fg" ];
then
n=$((n/2))
if [ "$fg" -gt "$n" ];
then
echo "WARNING - Some manager is down" # if any of the manager is down then it returns a warning
exit 1
else
echo "CRITICAL - Swarm is Unhealthy (not in quorum)" # if more than half of the managers are unhealthy, then it shows that the swarm cluster is in Critical state.
exit 2
fi
else
echo "OK - Swarm is Healthy (in quorum)"
exit 0
fi
fi