From 52cbf7efc767b43ebcdc8ad6489be40cd086e2d8 Mon Sep 17 00:00:00 2001 From: Lukas Schreiner Date: Sun, 14 Apr 2024 16:22:15 +0200 Subject: [PATCH] Uptime and ICMP metrics New: - Include uptime metrics - Include hosts ICMP metrics --- .github/workflows/release.yml | 4 +- CHANGES.md | 5 ++- README.md | 7 +-- monit_exporter.go | 80 +++++++++++++++++++++++++++++++---- 4 files changed, 82 insertions(+), 14 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6d661ca..6a326ac 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,6 +6,8 @@ name: Release monit_exporter on: push: tags: [ "*" ] + release: + types: [created] jobs: @@ -22,7 +24,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: 1.21.3 + go-version: 1.21.9 - name: Run GoReleaser uses: goreleaser/goreleaser-action@master diff --git a/CHANGES.md b/CHANGES.md index 70f3b55..70a8133 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,7 +10,7 @@ The headers are: - Enhancements - Features -## 0.3.0 (2024-04-xx) +## 0.3.0 (2024-04-14) ### Bugs - Catching scrape errors @@ -35,12 +35,15 @@ The headers are: - Added extraction of: - port response times - unix socket response times + - ICMP response times (hosts) - CPU usage - Memory usage - Disk write metrics - Disk read metrics - I/O service times - Network link metrics + - Uptime metrics + - Monit version information - Added option in order to ignore TLS certificate validation (restricted and not recommended) ## 0.2.2 (2023-10-22) diff --git a/README.md b/README.md index f045af3..118e092 100644 --- a/README.md +++ b/README.md @@ -34,16 +34,17 @@ These metrics are exported by `monit_exporter`: | name | description | |----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| monit_service_check | Monit service check info with following labels provided:
`check_name`
Name of monit check
`monitored`
Specifies, if the service is monitored or not, whereas `0` means no and `1` means yes.
`type`
Specifies the type of service.
+| monit_service_check | Monit service check info with following labels provided:
`check_name`
Name of monit check
`monitored`
Specifies, if the service is monitored or not, whereas `0` means no, `1` means yes, `2` means init and `4` means waiting. A combination is possible. E.g. `5` means, that the service is monitored, but currently waiting for e.g. the right time range.
`type`
Specifies the type of service.

Value of this metric is representing "error". Means: `0` is "service is fine" whereas `> 0` means, there is an error. | monit_service_cpu_perc | Monit service CPU info with following labels:
`check_name`
Name of monit check
`type`
Specifies value type whereas value can be `percentage` or `percentage_total`
| monit_service_mem_bytes | Monit service mem info with following labels:
`check_name`
Name of monit check
`type`
Specifies value type whereas value can be `kilobyte` or `kilobyte_total`
| monit_service_network_link_state | Monit service link states
`check_name`
Name of monit check

Value can be either `-1` = Not available, `0` = down and `1` = up | monit_service_network_link_statistics | Monit service link statistics
`check_name`
Name of monit check
`direction`
Specifies link direction (upload / download)
`unit`
Spcifies unit of metrics (bytes, errors, packets)
`type`
Specifies the type with either now or total. Whereas now means "per second"
-| monit_service_port_response_times | Monit service port and unix socket checks response times
`check_name`
Name of monit check
`hostname`
Specifies hostname checked
`path`
Specifies a unix socket path
`port`
Specifies port to check
`protocol`
Specifies protocol used for checking service (e.g. POP, IMAP, REDIS, etc.). Default is a RAW check.
`type`
Specifies protocol type (e.g. TCP, UDP, UNIX)
`uri`
Gives full URI for the service check including type, host and port or path.
+| monit_service_port_response_times | Monit service port, unix socket and icmp checks response times
`check_name`
Name of monit check
`hostname`
Specifies hostname checked
`path`
Specifies a unix socket path
`port`
Specifies port to check
`protocol`
Specifies protocol used for checking service (e.g. POP, IMAP, REDIS, etc.). Default is a RAW check.
`type`
Specifies protocol type (e.g. TCP, UDP, UNIX, ICMP)
`uri`
Gives full URI for the service check including type, host and port or path.
| monit_service_read_bytes | Monit service Disk Read Bytes
`check_name`
Name of monit check
`type`
Specifies type of read / write. Possible values: read_count, read_count_total. Value is given in bytes.
+| monit_service_uptime | Service and server uptime in seconds
`check_name`
Name of monit check
`type`
Type of the uptime service check (Possible values: `system` / `server`)
| monit_service_write_bytes | Monit service Disk Writes Bytes
`check_name`
Name of monit check
`type`
Specifies type of read / write. Possible values: write_count, write_count_total. Value is given in bytes.
| monit_up | Monit status availability. `0` = not available and `1` = available - +| monit_version | Monit current version as label - the value is const 1. #### Service types diff --git a/monit_exporter.go b/monit_exporter.go index 8fcc0de..e617054 100644 --- a/monit_exporter.go +++ b/monit_exporter.go @@ -45,23 +45,32 @@ var serviceTypes = map[int]string{ } type monitXML struct { + MonitServer monitServer `xml:"server"` MonitServices []monitService `xml:"service"` } +type monitServer struct { + Hostname string `xml:"localhostname"` + Uptime int64 `xml:"uptime"` + Version string `xml:"version"` +} + // Simplified structure of monit check. type monitService struct { - Type int `xml:"type,attr"` - Name string `xml:"name"` - Status int `xml:"status"` - Monitored string `xml:"monitor"` - Memory monitServiceMem `xml:"memory"` CPU monitServiceCPU `xml:"cpu"` - DiskWrite monitServiceDisk `xml:"write"` DiskRead monitServiceDisk `xml:"read"` - ServiceTimes monitServiceTime `xml:"servicetime"` + DiskWrite monitServiceDisk `xml:"write"` + Link monitServiceLink `xml:"link"` + Memory monitServiceMem `xml:"memory"` + Monitored string `xml:"monitor"` + Name string `xml:"name"` Ports []monitServicePort `xml:"port"` + ServiceTimes monitServiceTime `xml:"servicetime"` + Status int `xml:"status"` + Type int `xml:"type,attr"` UnixSockets []monitServicePort `xml:"unix"` - Link monitServiceLink `xml:"link"` + Uptime int64 `xml:"uptime"` + Icmp monitServiceIcmp `xml:"icmp"` } type monitServiceMem struct { @@ -110,6 +119,11 @@ type monitServiceLinkDirection struct { Errors monitNetworkCount `xml:"errors"` } +type monitServiceIcmp struct { + Type string `xml:"type"` + Responsetime float64 `xml:"responsetime"` +} + type monitBytes struct { Count int `xml:"count"` Total int `xml:"total"` @@ -127,6 +141,8 @@ type Exporter struct { client *http.Client up prometheus.Gauge + version *prometheus.GaugeVec + checkUptime *prometheus.GaugeVec checkStatus *prometheus.GaugeVec checkMem *prometheus.GaugeVec checkCPU *prometheus.GaugeVec @@ -237,6 +253,20 @@ func NewExporter(c *Config) (*Exporter, error) { Name: "up", Help: "Monit status availability", }), + version: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "version", + Help: "Monit version", + }, + []string{"version"}, + ), + checkUptime: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "service_uptime", + Help: "Monit service and server uptime", + }, + []string{"check_name", "type"}, + ), checkStatus: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, Name: "service_check", @@ -300,6 +330,8 @@ func NewExporter(c *Config) (*Exporter, error) { // implements prometheus.Collector. func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { e.up.Describe(ch) + e.version.Describe(ch) + e.checkUptime.Describe(ch) e.checkStatus.Describe(ch) e.checkCPU.Describe(ch) e.checkMem.Describe(ch) @@ -326,6 +358,15 @@ func (e *Exporter) scrape() error { log.Errorf("Error parsing data from monit: %v\n%s", err, data) } else { e.up.Set(1) + e.checkUptime.With( + prometheus.Labels{ + "check_name": parsedData.MonitServer.Hostname, + "type": "server", + }).Set(float64(parsedData.MonitServer.Uptime)) + e.version.With( + prometheus.Labels{ + "version": parsedData.MonitServer.Version, + }).Set(1) // Constructing metrics for _, service := range parsedData.MonitServices { e.checkStatus.With(prometheus.Labels{"check_name": service.Name, "type": serviceTypes[service.Type], "monitored": service.Monitored}).Set(float64(service.Status)) @@ -336,7 +377,7 @@ func (e *Exporter) scrape() error { "monitored": service.Monitored, }).Set(float64(service.Status)) - // Memory + CPU only for specifiy status types (cf. monit/xml.c) + // Memory + CPU + Uptime only for specifiy status types (cf. monit/xml.c) if service.Type == SERVICE_TYPE_PROCESS || service.Type == SERVICE_TYPE_SYSTEM { e.checkMem.With( prometheus.Labels{ @@ -358,6 +399,11 @@ func (e *Exporter) scrape() error { "check_name": service.Name, "type": "percentage_total", }).Set(float64(service.CPU.PercentTotal)) + e.checkUptime.With( + prometheus.Labels{ + "check_name": service.Name, + "type": serviceTypes[service.Type], + }).Set(float64(service.Uptime)) } if service.Type == SERVICE_TYPE_PROCESS || service.Type == SERVICE_TYPE_FILESYSTEM { e.checkDiskWrite.With( @@ -392,6 +438,20 @@ func (e *Exporter) scrape() error { e.addNetLinkElement(&service, "upload", &service.Link.Upload) } + // ICMP checks + if service.Type == SERVICE_TYPE_HOST && service.Icmp.Type != "" { + e.checkPortRespTimes.With( + prometheus.Labels{ + "check_name": service.Name, + "type": "ICMP", + "hostname": "", + "path": "", + "port": "", + "protocol": strings.ToUpper(service.Icmp.Type), + "uri": "", + }).Set(float64(service.Icmp.Responsetime)) + } + // Port checks for _, port := range service.Ports { var uri = fmt.Sprintf("%s://%s:%s", strings.ToLower(port.Type), port.Hostname, port.Portnumber) @@ -458,6 +518,8 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { e.checkStatus.Reset() if err := e.scrape(); err == nil { e.up.Collect(ch) + e.version.Collect(ch) + e.checkUptime.Collect(ch) e.checkStatus.Collect(ch) e.checkMem.Collect(ch) e.checkCPU.Collect(ch)