Skip to content

Commit

Permalink
Add constlabel for network. Handle the case where network changes (#148)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmmarslender authored Mar 17, 2024
1 parent 637f0c9 commit 0f55813
Show file tree
Hide file tree
Showing 11 changed files with 152 additions and 33 deletions.
6 changes: 3 additions & 3 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import (
var cfgFile string
var (
gitVersion string
buildTime string
buildTime string
)

// rootCmd represents the base command when called without any subcommands
var rootCmd = &cobra.Command{
Use: "chia-exporter",
Short: "Prometheus metric exporter for Chia Blockchain",
Use: "chia-exporter",
Short: "Prometheus metric exporter for Chia Blockchain",
Version: fmt.Sprintf("%s (%s)", gitVersion, buildTime),
}

Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/chia-network/chia-exporter
go 1.21

require (
github.com/chia-network/go-chia-libs v0.6.0
github.com/chia-network/go-chia-libs v0.7.0
github.com/chia-network/go-modules v0.0.4
github.com/go-sql-driver/mysql v1.8.0
github.com/oschwald/maxminddb-golang v1.12.0
Expand All @@ -19,6 +19,7 @@ require (
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/websocket v1.5.1 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chia-network/go-chia-libs v0.6.0 h1:HF9OHb2oLRvsVkM5umH11m085UrMp5Pg/xMO43EHmBE=
github.com/chia-network/go-chia-libs v0.6.0/go.mod h1:uqlNmpIlkRd4vtE+cXoUIAEdbDgCKK0Gp9XzrPgpkEM=
github.com/chia-network/go-chia-libs v0.7.0 h1:mTTw1A9Vi9rrzoh08uBAzVRt5lwbIN1KNy/m2uWyL5o=
github.com/chia-network/go-chia-libs v0.7.0/go.mod h1:Y/ND/A4O1RTVfAy6jlK3ATS584VyiNqOKbHSMwrl21k=
github.com/chia-network/go-modules v0.0.4 h1:XlCcuT4j1krLvsFT1Y49Un5xORwcTc8jjE4SHih7OTI=
github.com/chia-network/go-modules v0.0.4/go.mod h1:JP8mG/9ieE76VcGIbzD5G3/4YDmvNhRryiQwp8GQr1U=
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
Expand All @@ -24,6 +24,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY=
github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
Expand Down
23 changes: 8 additions & 15 deletions internal/metrics/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ type CrawlerServiceMetrics struct {
// Holds a reference to the main metrics container this is a part of
metrics *Metrics

// Current network
network *string

// Interfaces with Maxmind
maxMindCountryDB *maxminddb.Reader
maxMindASNDB *maxminddb.Reader
Expand All @@ -42,13 +45,11 @@ type CrawlerServiceMetrics struct {

// Debug Metric
debug *prometheus.GaugeVec

// Current network
network *string
}

// InitMetrics sets all the metrics properties
func (s *CrawlerServiceMetrics) InitMetrics() {
func (s *CrawlerServiceMetrics) InitMetrics(network *string) {
s.network = network
// Crawler Metrics
s.totalNodes5Days = s.metrics.newGauge(chiaServiceCrawler, "total_nodes_5_days", "Total number of nodes that have been gossiped around the network with a timestamp in the last 5 days. The crawler did not necessarily connect to all of these peers itself.")
s.reliableNodes = s.metrics.newGauge(chiaServiceCrawler, "reliable_nodes", "reliable nodes are nodes that have port 8444 open and have available space for more peer connections")
Expand Down Expand Up @@ -252,18 +253,10 @@ func (s *CrawlerServiceMetrics) ProcessIPASNMapping(ips *rpc.GetIPsAfterTimestam
return
}
if s.network == nil {
netInfo, _, err := s.metrics.httpClient.CrawlerService.GetNetworkInfo(&rpc.GetNetworkInfoOptions{})
if err != nil {
log.Errorf("Could not get network information to store with ASN data: %s\n", err.Error())
return
}
if netInfo.NetworkName.IsAbsent() {
log.Error("network name was absent in get_network_info request. Can't store ASNs without network name")
return
}
netName := netInfo.NetworkName.MustGet()
s.network = &netName
log.Errorln("Network information missing. Can't store ASN data without network")
return
}

type countStruct struct {
ASN uint32
Organization string
Expand Down
2 changes: 1 addition & 1 deletion internal/metrics/farmer.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type FarmerServiceMetrics struct {
}

// InitMetrics sets all the metrics properties
func (s *FarmerServiceMetrics) InitMetrics() {
func (s *FarmerServiceMetrics) InitMetrics(network *string) {
s.totalPlotsValue = map[types.Bytes32]uint64{}
s.nodeIDToHostname = map[types.Bytes32]string{}

Expand Down
2 changes: 1 addition & 1 deletion internal/metrics/fullnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ type FullNodeServiceMetrics struct {
}

// InitMetrics sets all the metrics properties
func (s *FullNodeServiceMetrics) InitMetrics() {
func (s *FullNodeServiceMetrics) InitMetrics(network *string) {
// BlockchainState Metrics
s.difficulty = s.metrics.newGauge(chiaServiceFullNode, "difficulty", "Current network difficulty")
s.mempoolCost = s.metrics.newGauge(chiaServiceFullNode, "mempool_cost", "Current mempool size in cost")
Expand Down
2 changes: 1 addition & 1 deletion internal/metrics/harvester.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ type HarvesterServiceMetrics struct {
}

// InitMetrics sets all the metrics properties
func (s *HarvesterServiceMetrics) InitMetrics() {
func (s *HarvesterServiceMetrics) InitMetrics(network *string) {
// Connection Metrics
s.connectionCount = s.metrics.newGaugeVec(chiaServiceHarvester, "connection_count", "Number of active connections for each type of peer", []string{"node_type"})

Expand Down
115 changes: 108 additions & 7 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const (
// serviceMetrics defines methods that must be on all metrics services
type serviceMetrics interface {
// InitMetrics registers any metrics (gauges, counters, etc) on creation of the metrics object
InitMetrics()
InitMetrics(network *string)

// InitialData is called after the websocket connection is opened to allow each service
// to load any initial data that should be reported
Expand All @@ -58,14 +58,16 @@ type serviceMetrics interface {
type Metrics struct {
metricsPort uint16
client *rpc.Client
network *string

// httpClient is another instance of the rpc.Client in HTTP mode
// This is used rarely, to request data in response to a websocket event that is too large to fit on a single
// websocket connection or needs to be paginated
httpClient *rpc.Client

// This holds a custom prometheus registry so that only our metrics are exported, and not the default go metrics
registry *prometheus.Registry
registry *prometheus.Registry
dynamicPromHandler *dynamicPromHandler

// Holds a MySQL DB Instance if configured
mysqlClient *sql.DB
Expand Down Expand Up @@ -123,9 +125,13 @@ func NewMetrics(port uint16, logLevel log.Level) (*Metrics, error) {
metrics.serviceMetrics[chiaServiceHarvester] = &HarvesterServiceMetrics{metrics: metrics}
metrics.serviceMetrics[chiaServiceFarmer] = &FarmerServiceMetrics{metrics: metrics}

// See if we can get the network now
// If not, the reconnect handler will handle it later
_, _ = metrics.checkNetwork()

// Init each service's metrics
for _, service := range metrics.serviceMetrics {
service.InitMetrics()
service.InitMetrics(metrics.network)
}

return metrics, nil
Expand Down Expand Up @@ -154,6 +160,40 @@ func (m *Metrics) createDBClient() error {
return nil
}

// Returns boolean indicating if network changed from previously known value
func (m *Metrics) checkNetwork() (bool, error) {
var currentNetwork string
var newNetwork string
if m.network == nil {
currentNetwork = ""
} else {
currentNetwork = *m.network
}

m.client.SetSyncMode()
defer m.client.SetAsyncMode()

netInfo, _, err := m.client.DaemonService.GetNetworkInfo(&rpc.GetNetworkInfoOptions{})
if err != nil {
return false, fmt.Errorf("error checking network info: %w", err)
}

if netInfo != nil && netInfo.NetworkName.IsPresent() {
network := netInfo.NetworkName.MustGet()
m.network = &network
newNetwork = network
} else {
m.network = nil
newNetwork = ""
}

if currentNetwork != newNetwork {
return true, nil
}

return false, nil
}

// newGauge returns a lazy gauge that follows naming conventions
func (m *Metrics) newGauge(service chiaService, name string, help string) *wrappedPrometheus.LazyGauge {
opts := prometheus.GaugeOpts{
Expand All @@ -163,6 +203,12 @@ func (m *Metrics) newGauge(service chiaService, name string, help string) *wrapp
Help: help,
}

if m.network != nil {
opts.ConstLabels = map[string]string{
"network": *m.network,
}
}

gm := prometheus.NewGauge(opts)

lg := &wrappedPrometheus.LazyGauge{
Expand All @@ -183,6 +229,12 @@ func (m *Metrics) newGaugeVec(service chiaService, name string, help string, lab
Help: help,
}

if m.network != nil {
opts.ConstLabels = map[string]string{
"network": *m.network,
}
}

gm := prometheus.NewGaugeVec(opts, labels)

m.registry.MustRegister(gm)
Expand All @@ -199,6 +251,12 @@ func (m *Metrics) newCounter(service chiaService, name string, help string) *wra
Help: help,
}

if m.network != nil {
opts.ConstLabels = map[string]string{
"network": *m.network,
}
}

cm := prometheus.NewCounter(opts)

lc := &wrappedPrometheus.LazyCounter{
Expand All @@ -218,6 +276,12 @@ func (m *Metrics) newCounterVec(service chiaService, name string, help string, l
Help: help,
}

if m.network != nil {
opts.ConstLabels = map[string]string{
"network": *m.network,
}
}

gm := prometheus.NewCounterVec(opts, labels)

m.registry.MustRegister(gm)
Expand All @@ -237,14 +301,27 @@ func (m *Metrics) OpenWebsocket() error {
return err
}

err = m.client.AddHandler(m.websocketReceive)
_, err = m.client.AddHandler(m.websocketReceive)
if err != nil {
return err
}

m.client.AddDisconnectHandler(m.disconnectHandler)
m.client.AddReconnectHandler(m.reconnectHandler)

// First, we check the network and see if it changed
// If changed, we completely replace the prometheus registry with a new registry and re-init all metrics
// otherwise, we call the reconnected handlers
changed, _ := m.checkNetwork()
if changed {
m.registry = prometheus.NewRegistry()
m.dynamicPromHandler.updateHandler(promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
// Init each service's metrics
for _, service := range m.serviceMetrics {
service.InitMetrics(m.network)
}
}

for _, service := range m.serviceMetrics {
service.InitialData()
service.SetupPollingMetrics()
Expand All @@ -264,7 +341,9 @@ func (m *Metrics) CloseWebsocket() error {
func (m *Metrics) StartServer() error {
log.Printf("Starting metrics server on port %d", m.metricsPort)

http.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
m.dynamicPromHandler = &dynamicPromHandler{}
m.dynamicPromHandler.updateHandler(promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
http.Handle("/metrics", m.dynamicPromHandler)
http.HandleFunc("/healthz", healthcheckEndpoint)
return http.ListenAndServe(fmt.Sprintf(":%d", m.metricsPort), nil)
}
Expand Down Expand Up @@ -303,8 +382,30 @@ func (m *Metrics) disconnectHandler() {

func (m *Metrics) reconnectHandler() {
log.Debug("Calling reconnect handlers")
for _, service := range m.serviceMetrics {
service.Reconnected()

// First, we check the network and see if it changed
// If changed, we completely replace the prometheus registry with a new registry and re-init all metrics
// otherwise, we call the reconnected handlers
changed, err := m.checkNetwork()
if changed || err != nil {
if err != nil {
m.network = nil
log.Errorf("Error checking network. Assuming network changed and resetting metrics: %s\n", err.Error())
}

log.Info("Network Changed, resetting all metrics")
m.registry = prometheus.NewRegistry()
m.dynamicPromHandler.updateHandler(promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
// Init each service's metrics
for _, service := range m.serviceMetrics {
service.InitMetrics(m.network)
service.InitialData()
}
} else {
log.Debug("Network did not change")
for _, service := range m.serviceMetrics {
service.Reconnected()
}
}
}

Expand Down
22 changes: 22 additions & 0 deletions internal/metrics/promhandler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package metrics
// Everything in here is to support making a new prometheus registry and having the http endpoint pick it up seamlessly

import (
"net/http"
"sync/atomic"
)

type dynamicPromHandler struct {
handler atomic.Value // Stores the current http.Handler
}

func (d *dynamicPromHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// Retrieve the current handler and serve the request
handler := d.handler.Load().(http.Handler)
handler.ServeHTTP(w, r)
}

// Function to update the handler
func (d *dynamicPromHandler) updateHandler(newHandler http.Handler) {
d.handler.Store(newHandler)
}
2 changes: 1 addition & 1 deletion internal/metrics/timelord.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type TimelordServiceMetrics struct {
}

// InitMetrics sets all the metrics properties
func (s *TimelordServiceMetrics) InitMetrics() {
func (s *TimelordServiceMetrics) InitMetrics(network *string) {
s.fastestTimelord = s.metrics.newCounter(chiaServiceTimelord, "fastest_timelord", "Counter for how many times this timelord has been fastest since the exporter has been running")
s.slowTimelord = s.metrics.newCounter(chiaServiceTimelord, "slow_timelord", "Counter for how many times this timelord has NOT been the fastest since the exporter has been running")
s.estimatedIPS = s.metrics.newGauge(chiaServiceTimelord, "estimated_ips", "Current estimated IPS. Updated every time a new PoT Challenge is complete")
Expand Down
2 changes: 1 addition & 1 deletion internal/metrics/wallet.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type WalletServiceMetrics struct {
}

// InitMetrics sets all the metrics properties
func (s *WalletServiceMetrics) InitMetrics() {
func (s *WalletServiceMetrics) InitMetrics(network *string) {
// Connection Metrics
s.connectionCount = s.metrics.newGaugeVec(chiaServiceWallet, "connection_count", "Number of active connections for each type of peer", []string{"node_type"})

Expand Down

0 comments on commit 0f55813

Please sign in to comment.