-
Notifications
You must be signed in to change notification settings - Fork 0
/
dnschecker.go
146 lines (132 loc) · 3.94 KB
/
dnschecker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
package gopatrol
import (
"fmt"
"net"
"time"
"github.com/miekg/dns"
)
// DNSChecker implements a Checker for TCP endpoints.
type DNSChecker struct {
Slug string `json:"slug" valid:"required"`
// Name is the name of the endpoint.
Name string `json:"name,omitempty" valid:"required"`
// URL is the URL of the endpoint.
URL string `json:"url,omitempty" valid:"required"`
Type string `json:"type,omitempty" valid:"required"`
// ThresholdRTT is the maximum round trip time to
// allow for a healthy endpoint. If non-zero and a
// request takes longer than ThresholdRTT, the
// endpoint will be considered unhealthy. Note that
// this duration includes any in-between network
// latency.
ThresholdRTT time.Duration `json:"threshold_rtt,omitempty"`
// Attempts is how many requests the client will
// make to the endpoint in a single check.
Attempts int `json:"attempts,omitempty"` // This is the fqdn of the target server to query the DNS server for.
Host string `json:"hostname_fqdn,omitempty"`
// Timeout is the maximum time to wait for a
// TCP connection to be established.
Timeout time.Duration `json:"timeout,omitempty"`
LastChecked time.Time `json:"last_checked"`
LastChange time.Time `json:"last_change"`
LastStatus string `json:"last_status"`
}
// Check performs checks using c according to its configuration.
// An error is only returned if there is a configuration error.
func (c DNSChecker) Check() (Result, error) {
if c.Attempts < 1 {
c.Attempts = 1
}
result := Result{Name: c.Name, URL: c.URL, Timestamp: time.Now().UTC(), Slug: c.Slug}
result.Times = c.doChecks()
result = c.conclude(result)
result = c.checkEventAndNotif(result)
return result, nil
}
// doChecks executes and returns each attempt.
func (c DNSChecker) doChecks() Attempts {
var err error
var conn net.Conn
timeout := c.Timeout
if timeout == 0 {
timeout = 1 * time.Second
}
checks := make(Attempts, c.Attempts)
for i := 0; i < c.Attempts; i++ {
start := time.Now()
if c.Host != "" {
hostname := c.Host
m1 := new(dns.Msg)
m1.Id = dns.Id()
m1.RecursionDesired = true
m1.Question = make([]dns.Question, 1)
m1.Question[0] = dns.Question{hostname, dns.TypeA, dns.ClassINET}
d := new(dns.Client)
if err != nil {
checks[i].Error = err.Error()
continue
}
_, _, err := d.Exchange(m1, c.URL)
if err != nil {
checks[i].Error = err.Error()
continue
}
}
if conn, err = net.DialTimeout("tcp", c.URL, c.Timeout); err == nil {
conn.Close()
}
checks[i].RTT = time.Since(start)
if err != nil {
checks[i].Error = err.Error()
continue
}
}
return checks
}
// conclude takes the data in result from the attempts and
// computes remaining values needed to fill out the result.
// It detects degraded (high-latency) responses and makes
// the conclusion about the result's status.
func (c *DNSChecker) conclude(result Result) Result {
result.ThresholdRTT = c.ThresholdRTT
// Check errors (down)
for i := range result.Times {
if result.Times[i].Error != "" {
result.Down = true
return result
}
}
// Check round trip time (degraded)
if c.ThresholdRTT > 0 {
stats := result.ComputeStats()
if stats.Median > c.ThresholdRTT {
result.Notice = fmt.Sprintf("median round trip time exceeded threshold (%s)", c.ThresholdRTT)
result.Degraded = true
return result
}
}
result.Healthy = true
return result
}
func (c DNSChecker) checkEventAndNotif(result Result) Result {
switch {
case result.Down:
if c.LastStatus == "healthy" || c.LastStatus == "" {
result.Notification = true
result.Event = true
} else {
lastResultTime := result.Timestamp
lastChangeTime := c.LastChange
diffMinutes := lastResultTime.Sub(lastChangeTime).Minutes()
if c.LastStatus == "down" && diffMinutes > 5.0 {
result.Notification = true
}
}
case result.Healthy:
if c.LastStatus == "down" || c.LastStatus == "" {
result.Notification = true
result.Event = true
}
}
return result
}