diff --git a/.gitignore b/.gitignore index de1023d..b94c7e7 100755 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /bin /proxmox-exporter .vscode +.idea diff --git a/README.md b/README.md index b3bb3d8..72819e9 100755 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This exporter accepts one or multiple Proxmox manager API endpoints to disperse It also does API response caching. We cache the responses for up to 29 seconds, which should produce fresh metrics if scraped in 30 second intervals, or respond with cache at least half of the time if scraped in 15 second intervals. If you run highly available Prometheus instances that each scrape this exporter, it should only need to make the same set of requests to Proxmox's API one time per 30 second scrape interval. -When cache is _not_ used, this exporter makes `1 + (6 * )` API requests against your cluster to display its metrics. One to list nodes in the PVE cluster, and 6 per-node to reach all of the node specific endpoints this exporter gets its data from. The number of API endpoints it uses may increase as additional types of metrics are added. One additional request is made on this exporter's start up, to retrieve the name of a Proxmox cluster for your timeseries labels, if it's a clustered PVE setup, but the exporter has no need to reload this endpoint over time. +When cache is _not_ used, this exporter makes `2 + (6 * )` API requests against your cluster to display its metrics. One to list nodes in the PVE cluster, 6 per-node to reach all of the node specific endpoints this exporter gets its data from, and one additional request to the cluster resources endpoint if running in a cluster. The number of API endpoints it uses may increase as additional types of metrics are added. One additional request is made on this exporter's start up, to retrieve the name of a Proxmox cluster for your timeseries labels, if it's a clustered PVE setup, but the exporter has no need to reload this endpoint over time. The number of nodes in your cluster shouldn't significantly slow down this exporter's response time, because each set of requests for a node are made concurrently. diff --git a/go.mod b/go.mod index e2deb92..8965161 100755 --- a/go.mod +++ b/go.mod @@ -5,25 +5,25 @@ go 1.21 require ( github.com/gorilla/mux v1.8.1 github.com/patrickmn/go-cache v2.1.0+incompatible - github.com/prometheus/client_golang v1.19.0 + github.com/prometheus/client_golang v1.19.1 github.com/spf13/cobra v1.8.0 github.com/spf13/viper v1.18.2 - github.com/starttoaster/go-proxmox v0.0.1 + github.com/starttoaster/go-proxmox v0.0.2 ) require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect - github.com/pelletier/go-toml/v2 v2.1.1 // indirect - github.com/prometheus/client_model v0.6.0 // indirect - github.com/prometheus/common v0.48.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.53.0 // indirect + github.com/prometheus/procfs v0.14.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect @@ -32,10 +32,10 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect - golang.org/x/sys v0.17.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/protobuf v1.32.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 0edb8a5..27df68f 100755 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -32,19 +32,19 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= -github.com/pelletier/go-toml/v2 v2.1.1 h1:LWAJwfNvjQZCFIDKWYQaM62NcYeYViCmWIwmOStowAI= -github.com/pelletier/go-toml/v2 v2.1.1/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= -github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= -github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= -github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= -github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE= -github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE= +github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= +github.com/prometheus/procfs v0.14.0 h1:Lw4VdGGoKEZilJsayHf0B+9YgLGREba2C6xr+Fdfq6s= +github.com/prometheus/procfs v0.14.0/go.mod h1:XL+Iwz8k8ZabyZfMFHPiilCniixqQarAy5Mu67pHlNQ= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -64,11 +64,12 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ= github.com/spf13/viper v1.18.2/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= -github.com/starttoaster/go-proxmox v0.0.1 h1:fdb/p6GgJ2rs06ubw872ba6r8r2yVjirx/duBrnFUlY= -github.com/starttoaster/go-proxmox v0.0.1/go.mod h1:eyvj8nnoKJfwSjXIisLkErTtE2ErEsUHExcjG2zf+mQ= +github.com/starttoaster/go-proxmox v0.0.2 h1:4BI2DlIfFTDxUE80DLhshfzDTe/IGXVeQW1HUSdvr+s= +github.com/starttoaster/go-proxmox v0.0.2/go.mod h1:eyvj8nnoKJfwSjXIisLkErTtE2ErEsUHExcjG2zf+mQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= @@ -78,15 +79,15 @@ github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8 github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= -golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= -golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/internal/prometheus/node.go b/internal/prometheus/node.go index 8a70dff..e8bcdf1 100644 --- a/internal/prometheus/node.go +++ b/internal/prometheus/node.go @@ -19,7 +19,7 @@ type collectNodeResponse struct { clusterMemAlloc int } -func (c *Collector) collectNode(ch chan<- prometheus.Metric, node proxmox.GetNodesData, resultChan chan<- collectNodeResponse, wg *sync.WaitGroup) { +func (c *Collector) collectNode(ch chan<- prometheus.Metric, clusterResources *proxmox.GetClusterResourcesResponse, node proxmox.GetNodesData, resultChan chan<- collectNodeResponse, wg *sync.WaitGroup) { defer wg.Done() defer logger.Logger.Debug("finished requests for node data", "node", node.Node) var resp collectNodeResponse @@ -38,7 +38,7 @@ func (c *Collector) collectNode(ch chan<- prometheus.Metric, node proxmox.GetNod if err != nil { logger.Logger.Error("failed making request to get node VMs", "node", node.Node, "error", err.Error()) } else { - vmMetrics = c.collectVirtualMachineMetrics(ch, node, vms) + vmMetrics = c.collectVirtualMachineMetrics(ch, clusterResources, node, vms) } // Get lxc data on this node diff --git a/internal/prometheus/prometheus.go b/internal/prometheus/prometheus.go index 90e4cfb..d9ec10e 100755 --- a/internal/prometheus/prometheus.go +++ b/internal/prometheus/prometheus.go @@ -1,9 +1,11 @@ package prometheus import ( + "fmt" "sync" "github.com/prometheus/client_golang/prometheus" + "github.com/starttoaster/go-proxmox" "github.com/starttoaster/proxmox-exporter/internal/logger" wrappedProxmox "github.com/starttoaster/proxmox-exporter/internal/proxmox" ) @@ -176,6 +178,16 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { return } + // Retrieve cluster resources -- only does this if a cluster name was detected, because it uses a cluster API endpoint + var clusterResources *proxmox.GetClusterResourcesResponse + if wrappedProxmox.ClusterName != "" { + var err error + clusterResources, err = wrappedProxmox.GetClusterResources() + if err != nil { + logger.Logger.Debug(fmt.Sprintf("ignoring error requesting cluster resources, this is probably not a clustered PVE node: %s", err.Error())) + } + } + // Cluster level metric variables (added to in each iteration of the loop below) clusterCPUs := 0 clusterCPUsAlloc := 0 @@ -189,7 +201,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { // Collect node metrics from each of the nodes for _, node := range nodes.Data { wg.Add(1) - go c.collectNode(ch, node, resultChan, &wg) + go c.collectNode(ch, clusterResources, node, resultChan, &wg) } // Close the result channel after all goroutines finish diff --git a/internal/prometheus/virtualmachine.go b/internal/prometheus/virtualmachine.go index d5585d2..2ac0a3b 100644 --- a/internal/prometheus/virtualmachine.go +++ b/internal/prometheus/virtualmachine.go @@ -1,6 +1,7 @@ package prometheus import ( + "github.com/starttoaster/proxmox-exporter/internal/logger" "strconv" "strings" @@ -16,9 +17,33 @@ type collectVirtualMachineMetricsResponse struct { } // collectLxcMetrics adds metrics to the registry that are per-VM and returns VM aggregate data for higher level metrics -func (c *Collector) collectVirtualMachineMetrics(ch chan<- prometheus.Metric, node proxmox.GetNodesData, vms *proxmox.GetNodeQemuResponse) *collectVirtualMachineMetricsResponse { +func (c *Collector) collectVirtualMachineMetrics(ch chan<- prometheus.Metric, clusterResources *proxmox.GetClusterResourcesResponse, node proxmox.GetNodesData, vms *proxmox.GetNodeQemuResponse) *collectVirtualMachineMetricsResponse { var res collectVirtualMachineMetricsResponse for _, vm := range vms.Data { + // Checks if cluster resources were provided. If they were, this will check if a VM is a template. + var vmIsTemplate bool + if clusterResources != nil { + for _, res := range clusterResources.Data { + var name string + if res.Name != nil { + name = *res.Name + } + var template int + if res.Template != nil { + template = *res.Template + } + if vm.Name == name && template == 1 { + vmIsTemplate = true + } + } + } + + // Don't collect VM metrics on templates + if vmIsTemplate { + logger.Logger.Debug("excluding VM from collecting metrics because it is a template.", "name", vm.Name) + continue + } + // Add vm up metric status := 0.0 if strings.EqualFold(vm.Status, "running") { diff --git a/internal/proxmox/cluster.go b/internal/proxmox/cluster.go index 5505b0c..ba7c0fb 100644 --- a/internal/proxmox/cluster.go +++ b/internal/proxmox/cluster.go @@ -49,3 +49,45 @@ func GetClusterStatus() (*proxmox.GetClusterStatusResponse, error) { return cluster, nil } + +// GetClusterResources returns a proxmox GetClusterResourcesResponse object or an error from the /cluster/resources endpoint +func GetClusterResources() (*proxmox.GetClusterResourcesResponse, error) { + // Chech cache + var resources *proxmox.GetClusterResourcesResponse + if x, found := cash.Get("GetClusterResources"); found { + var ok bool + resources, ok = x.(*proxmox.GetClusterResourcesResponse) + if ok { + log.Logger.Debug("proxmox request was found in cache for GetClusterResources") + return resources, nil + } + } + + // Make request if not found in cache + var err error + for clientName, c := range clients { + // Check if client was banned, skip if is + if c.banned { + continue + } + + resources, _, err = c.client.Cluster.GetClusterResources() + if err == nil { + break + } else { + banClient(clientName, c) + } + } + if err != nil { + return nil, err + } + + if resources == nil { + return nil, fmt.Errorf("request to get cluster resources was not successful. It's possible all clients are banned") + } + + // Update cache + cash.Set("GetClusterResources", resources, cache.NoExpiration) + + return resources, nil +}