From 666e0b39c597f10c6b7e2679e7bfed51ff58e3b3 Mon Sep 17 00:00:00 2001 From: Chris Marslender Date: Mon, 2 Dec 2024 12:17:11 -0600 Subject: [PATCH] Handle initial data cases where daemon was initially up but services were not (#202) * Return from refreshing file sizes if we can't get the config * if version isn't set when receiving responses, try to get it again * Check whether or not we know the version once we start receiving messages from services Fixes a case where daemon is up but service is not for the initial "Get Version" messages * Go chia libs 0.20.1 * Get initial plots/wallets if those didn't work right away --- go.mod | 2 +- go.sum | 7 ++++--- internal/metrics/crawler.go | 11 +++++++++++ internal/metrics/farmer.go | 11 +++++++++++ internal/metrics/fullnode.go | 14 +++++++++++++- internal/metrics/harvester.go | 18 ++++++++++++++++++ internal/metrics/timelord.go | 11 +++++++++++ internal/metrics/wallet.go | 18 ++++++++++++++++++ 8 files changed, 87 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 87368ee..7ce5ba2 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/chia-network/chia-exporter go 1.21 require ( - github.com/chia-network/go-chia-libs v0.20.0 + github.com/chia-network/go-chia-libs v0.20.1 github.com/chia-network/go-modules v0.0.8 github.com/go-sql-driver/mysql v1.8.1 github.com/oschwald/maxminddb-golang v1.13.1 diff --git a/go.sum b/go.sum index a33bc98..736c1b4 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chia-network/go-chia-libs v0.20.0 h1:rdW1Ob/hb32Pl89w4/9SkoGWVhyC+tOkeRqvPr/xuvg= -github.com/chia-network/go-chia-libs v0.20.0/go.mod h1:npTqaFSjTdMxE7hc0LOmWJmWGqcs+IERarK5fDxXk/I= +github.com/chia-network/go-chia-libs v0.20.1 h1:uEPrjtxot0oipTuWT63Spw9t9kj56JdhdFBS4ZgYdjc= +github.com/chia-network/go-chia-libs v0.20.1/go.mod h1:+RMorskgxwYzPGf2gIyW0k7FGDdLrrH4X5ATrrMreb0= github.com/chia-network/go-modules v0.0.8 h1:VATMxehRISOhaRwPo/GL735IKWW0G7sUYH2OmBofsfE= github.com/chia-network/go-modules v0.0.8/go.mod h1:OdvlWftyJc3+i3QYv5cfQsiQASL7Em7fJnzdmPmj07M= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= @@ -94,8 +94,9 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= diff --git a/internal/metrics/crawler.go b/internal/metrics/crawler.go index 780046c..2b8816f 100644 --- a/internal/metrics/crawler.go +++ b/internal/metrics/crawler.go @@ -30,6 +30,7 @@ type CrawlerServiceMetrics struct { metrics *Metrics // General Service Metrics + gotVersionResponse bool version *prometheus.GaugeVec // Current network @@ -128,6 +129,7 @@ func (s *CrawlerServiceMetrics) SetupPollingMetrics(ctx context.Context) {} // Disconnected clears/unregisters metrics when the connection drops func (s *CrawlerServiceMetrics) Disconnected() { s.version.Reset() + s.gotVersionResponse = false s.totalNodes5Days.Unregister() s.reliableNodes.Unregister() s.ipv4Nodes5Days.Unregister() @@ -143,9 +145,18 @@ func (s *CrawlerServiceMetrics) Reconnected() { // ReceiveResponse handles crawler responses that are returned over the websocket func (s *CrawlerServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { + // Sometimes, when we reconnect, or start exporter before chia is running + // the daemon is up before the service, and the initial request for the version + // doesn't make it to the service + // daemon doesn't queue these messages for later, they just get dropped + if !s.gotVersionResponse { + utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{})) + } + switch resp.Command { case "get_version": versionHelper(resp, s.version) + s.gotVersionResponse = true case "get_peer_counts": fallthrough case "loaded_initial_peers": diff --git a/internal/metrics/farmer.go b/internal/metrics/farmer.go index 17ee97b..ed3a1e9 100644 --- a/internal/metrics/farmer.go +++ b/internal/metrics/farmer.go @@ -24,6 +24,7 @@ type FarmerServiceMetrics struct { metrics *Metrics // General Service Metrics + gotVersionResponse bool version *prometheus.GaugeVec // Connection Metrics @@ -108,6 +109,7 @@ func (s *FarmerServiceMetrics) SetupPollingMetrics(ctx context.Context) {} // Disconnected clears/unregisters metrics when the connection drops func (s *FarmerServiceMetrics) Disconnected() { s.version.Reset() + s.gotVersionResponse = false s.connectionCount.Reset() s.plotFilesize.Reset() s.plotCount.Reset() @@ -123,9 +125,18 @@ func (s *FarmerServiceMetrics) Reconnected() { // ReceiveResponse handles crawler responses that are returned over the websocket func (s *FarmerServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { + // Sometimes, when we reconnect, or start exporter before chia is running + // the daemon is up before the service, and the initial request for the version + // doesn't make it to the service + // daemon doesn't queue these messages for later, they just get dropped + if !s.gotVersionResponse { + utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{})) + } + switch resp.Command { case "get_version": versionHelper(resp, s.version) + s.gotVersionResponse = true case "get_connections": s.GetConnections(resp) case "new_farming_info": diff --git a/internal/metrics/fullnode.go b/internal/metrics/fullnode.go index 673b300..5d65d21 100644 --- a/internal/metrics/fullnode.go +++ b/internal/metrics/fullnode.go @@ -36,7 +36,8 @@ type FullNodeServiceMetrics struct { metrics *Metrics // General Service Metrics - version *prometheus.GaugeVec + gotVersionResponse bool + version *prometheus.GaugeVec // GetBlockchainState Metrics difficulty *wrappedPrometheus.LazyGauge @@ -187,6 +188,7 @@ func (s *FullNodeServiceMetrics) SetupPollingMetrics(ctx context.Context) { // Disconnected clears/unregisters metrics when the connection drops func (s *FullNodeServiceMetrics) Disconnected() { s.version.Reset() + s.gotVersionResponse = false s.difficulty.Unregister() s.mempoolCost.Unregister() s.mempoolMinFee.Reset() @@ -228,9 +230,18 @@ func (s *FullNodeServiceMetrics) Reconnected() { // ReceiveResponse handles full node related responses that are returned over the websocket func (s *FullNodeServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { + // Sometimes, when we reconnect, or start exporter before chia is running + // the daemon is up before the service, and the initial request for the version + // doesn't make it to the service + // daemon doesn't queue these messages for later, they just get dropped + if !s.gotVersionResponse { + utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{})) + } + switch resp.Command { case "get_version": versionHelper(resp, s.version) + s.gotVersionResponse = true case "get_blockchain_state": s.GetBlockchainState(resp) // Ask for connection info when we get updated blockchain state @@ -424,6 +435,7 @@ func (s *FullNodeServiceMetrics) RefreshFileSizes() { cfg, err := config.GetChiaConfig() if err != nil { log.Errorf("Error getting chia config: %s\n", err.Error()) + return } database := cfg.GetFullPath(cfg.FullNode.DatabasePath) databaseWal := fmt.Sprintf("%s-wal", database) diff --git a/internal/metrics/harvester.go b/internal/metrics/harvester.go index 6595a4c..788d0e0 100644 --- a/internal/metrics/harvester.go +++ b/internal/metrics/harvester.go @@ -25,8 +25,11 @@ type HarvesterServiceMetrics struct { metrics *Metrics // General Service Metrics + gotVersionResponse bool version *prometheus.GaugeVec + gotPlotsResponse bool + // Connection Metrics connectionCount *prometheus.GaugeVec @@ -124,6 +127,8 @@ func (s *HarvesterServiceMetrics) httpGetPlots() { // Disconnected clears/unregisters metrics when the connection drops func (s *HarvesterServiceMetrics) Disconnected() { s.version.Reset() + s.gotVersionResponse = false + s.gotPlotsResponse = false s.connectionCount.Reset() s.totalPlots.Unregister() s.plotFilesize.Reset() @@ -140,15 +145,28 @@ func (s *HarvesterServiceMetrics) Reconnected() { // ReceiveResponse handles crawler responses that are returned over the websocket func (s *HarvesterServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { + // Sometimes, when we reconnect, or start exporter before chia is running + // the daemon is up before the service, and the initial request for the version + // doesn't make it to the service + // daemon doesn't queue these messages for later, they just get dropped + if !s.gotVersionResponse { + utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{})) + } + if !s.gotPlotsResponse { + s.httpGetPlots() + } + switch resp.Command { case "get_version": versionHelper(resp, s.version) + s.gotVersionResponse = true case "get_connections": s.GetConnections(resp) case "farming_info": s.FarmingInfo(resp) case "get_plots": s.GetPlots(resp) + s.gotPlotsResponse = true case "debug": debugHelper(resp, s.debug) } diff --git a/internal/metrics/timelord.go b/internal/metrics/timelord.go index bb1b48a..2983752 100644 --- a/internal/metrics/timelord.go +++ b/internal/metrics/timelord.go @@ -23,6 +23,7 @@ type TimelordServiceMetrics struct { metrics *Metrics // General Service Metrics + gotVersionResponse bool version *prometheus.GaugeVec // Timelord Metrics @@ -62,6 +63,7 @@ func (s *TimelordServiceMetrics) SetupPollingMetrics(ctx context.Context) {} // Disconnected clears/unregisters metrics when the connection drops func (s *TimelordServiceMetrics) Disconnected() { s.version.Reset() + s.gotVersionResponse = false s.fastestTimelord.Unregister() s.slowTimelord.Unregister() s.estimatedIPS.Unregister() @@ -75,10 +77,19 @@ func (s *TimelordServiceMetrics) Reconnected() { // ReceiveResponse handles crawler responses that are returned over the websocket func (s *TimelordServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { + // Sometimes, when we reconnect, or start exporter before chia is running + // the daemon is up before the service, and the initial request for the version + // doesn't make it to the service + // daemon doesn't queue these messages for later, they just get dropped + if !s.gotVersionResponse { + utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{})) + } + //("finished_pot_challenge", "new_compact_proof", "skipping_peak", "new_peak") switch resp.Command { case "get_version": versionHelper(resp, s.version) + s.gotVersionResponse = true case "finished_pot": s.FinishedPoT(resp) case "new_compact_proof": diff --git a/internal/metrics/wallet.go b/internal/metrics/wallet.go index 4acaad3..76a8642 100644 --- a/internal/metrics/wallet.go +++ b/internal/metrics/wallet.go @@ -25,8 +25,11 @@ type WalletServiceMetrics struct { metrics *Metrics // General Service Metrics + gotVersionResponse bool version *prometheus.GaugeVec + gotWalletsResponse bool + // Connection Metrics connectionCount *prometheus.GaugeVec @@ -93,6 +96,8 @@ func (s *WalletServiceMetrics) SetupPollingMetrics(ctx context.Context) { // Disconnected clears/unregisters metrics when the connection drops func (s *WalletServiceMetrics) Disconnected() { s.version.Reset() + s.gotVersionResponse = false + s.gotWalletsResponse = false s.connectionCount.Reset() s.walletSynced.Unregister() s.confirmedBalance.Reset() @@ -109,9 +114,21 @@ func (s *WalletServiceMetrics) Reconnected() { // ReceiveResponse handles wallet responses that are returned over the websocket func (s *WalletServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { + // Sometimes, when we reconnect, or start exporter before chia is running + // the daemon is up before the service, and the initial request for the version + // doesn't make it to the service + // daemon doesn't queue these messages for later, they just get dropped + if !s.gotVersionResponse { + utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{})) + } + if !s.gotWalletsResponse { + utils.LogErr(s.metrics.client.WalletService.GetWallets(&rpc.GetWalletsOptions{})) + } + switch resp.Command { case "get_version": versionHelper(resp, s.version) + s.gotVersionResponse = true case "get_connections": s.GetConnections(resp) case "coin_added": @@ -124,6 +141,7 @@ func (s *WalletServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) { s.GetWalletBalance(resp) case "get_wallets": s.GetWallets(resp) + s.gotWalletsResponse = true case "debug": debugHelper(resp, s.debug) }