Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added determining device type and use it at scrape data #205

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/.release
/.tarballs
debug/
.idea/
NiceGuyIT marked this conversation as resolved.
Show resolved Hide resolved

Manifest
smartctl_exporter
Expand Down
51 changes: 39 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package main
import (
"net/http"
"os"
"strings"
"sync"
"time"

Expand All @@ -32,11 +33,18 @@ import (
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
)

// Device
type Device struct {
Name string `json:"name"`
Info_Name string `json:"info_name"`
Type string `json:"type"`
}

// SMARTctlManagerCollector implements the Collector interface.
type SMARTctlManagerCollector struct {
CollectPeriod string
CollectPeriodDuration time.Duration
Devices []string
Devices []Device

logger log.Logger
mutex sync.Mutex
Expand Down Expand Up @@ -106,24 +114,43 @@ var (
)

// scanDevices uses smartctl to gather the list of available devices.
func scanDevices(logger log.Logger) []string {
func scanDevices(logger log.Logger) []Device {
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)

json := readSMARTctlDevices(logger)
scanDevices := json.Get("devices").Array()
var scanDeviceResult []string
var scanDeviceResult []Device
for _, d := range scanDevices {
deviceName := d.Get("name").String()
deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String()))
if filter.ignored(deviceName) {
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
} else {
level.Info(logger).Log("msg", "Found device", "name", deviceName)
scanDeviceResult = append(scanDeviceResult, deviceName)
device := Device{
Name: d.Get("name").String(),
Info_Name: deviceName,
Type: d.Get("type").String(),
}
scanDeviceResult = append(scanDeviceResult, device)
}
}
return scanDeviceResult
}

func filterDevices(logger log.Logger, devices []Device, filters []string) []Device {
var filtered []Device
for _, d := range devices {
for _, filter := range filters {
level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter)
if strings.Contains(d.Info_Name, filter) {
filtered = append(filtered, d)
break
}
}
}
return filtered
}

func main() {
metricsPath := kingpin.Flag(
"web.telemetry-path", "Path under which to expose metrics",
Expand All @@ -140,21 +167,21 @@ func main() {
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())

var devices []string
var devices []Device
devices = scanDevices(logger)
zxzharmlesszxz marked this conversation as resolved.
Show resolved Hide resolved
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
if len(*smartctlDevices) > 0 {
devices = *smartctlDevices
} else {
level.Info(logger).Log("msg", "No devices specified, trying to load them automatically")
devices = scanDevices(logger)
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
devices = filterDevices(logger, devices, *smartctlDevices)
level.Info(logger).Log("msg", "Devices filtered", "count", len(devices))
}

collector := SMARTctlManagerCollector{
Devices: devices,
logger: logger,
}

if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 {
if *smartctlRescanInterval >= 1*time.Second {
level.Info(logger).Log("msg", "Start background scan process")
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
go collector.RescanForDevices()
Expand Down
34 changes: 17 additions & 17 deletions readjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result {
}

// Reading fake smartctl json
func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
s := strings.Split(device, "/")
func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
s := strings.Split(device.Name, "/")
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
jsonFile, err := os.ReadFile(filename)
Expand All @@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
}

// Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) {
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
start := time.Now()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", "--device="+device.Type, device.Name).Output()
if err != nil {
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device)
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name)
}
json := parseJSON(string(out))
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json)
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start))
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
return json, rcOk && jsonOk
}

Expand All @@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
}

// Select json source and parse
func readData(logger log.Logger, device string) gjson.Result {
func readData(logger log.Logger, device Device) gjson.Result {
if *smartctlFakeData {
return readFakeSMARTctl(logger, device)
}
Expand All @@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
j, found := jsonCache.Load(device)
if !found {
level.Warn(logger).Log("msg", "device not found", "device", device)
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
}
return j.(JSONCache).JSON
}
Expand All @@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result {
}

// Parse smartctl return code
func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool {
func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool {
result := true
if SMARTCtlResult > 0 {
b := SMARTCtlResult
if (b & 1) != 0 {
level.Error(logger).Log("msg", "Command line did not parse", "device", device)
level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name)
result = false
}
if (b & (1 << 1)) != 0 {
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device)
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name)
result = false
}
if (b & (1 << 2)) != 0 {
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device)
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name)
}
if (b & (1 << 3)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device)
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name)
}
if (b & (1 << 4)) != 0 {
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device)
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name)
}
if (b & (1 << 5)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device)
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name)
}
if (b & (1 << 6)) != 0 {
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device)
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name)
}
if (b & (1 << 7)) != 0 {
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device)
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name)
}
}
return result
Expand Down
27 changes: 26 additions & 1 deletion smartctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package main

import (
"fmt"
"regexp"
"strings"

"github.com/go-kit/log"
Expand Down Expand Up @@ -42,6 +43,30 @@ type SMARTctl struct {
device SMARTDevice
}

func extractDiskName(input string) string {
re := regexp.MustCompile(`^(?:/dev/(?P<bus_name>\S+)/(?P<bus_num>\S+)\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
match := re.FindStringSubmatch(input)

if len(match) > 0 {
busNameIndex := re.SubexpIndex("bus_name")
busNumIndex := re.SubexpIndex("bus_num")
diskIndex := re.SubexpIndex("disk")
var name []string
if busNameIndex != -1 && match[busNameIndex] != "" {
name = append(name, match[busNameIndex])
}
if busNumIndex != -1 && match[busNumIndex] != "" {
name = append(name, match[busNumIndex])
}
if diskIndex != -1 && match[diskIndex] != "" {
name = append(name, match[diskIndex])
}

return strings.Join(name, "_")
}
return ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this return "" feels weird; falling back to a non-zero string is probably safer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe but I didn't change logic of function, I only change method to extract string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The info_name part is key here, I think we just need a large corpus to verify.

}

// NewSMARTctl is smartctl constructor
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
var model_name string
Expand All @@ -60,7 +85,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
json: json,
logger: logger,
device: SMARTDevice{
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(model_name),
Expand Down