Skip to content

Commit

Permalink
Handle initial data cases where daemon was initially up but services …
Browse files Browse the repository at this point in the history
…were not (#202)

* Return from refreshing file sizes if we can't get the config

* if version isn't set when receiving responses, try to get it again

* Check whether or not we know the version once we start receiving messages from services
Fixes a case where daemon is up but service is not for the initial "Get Version" messages

* Go chia libs 0.20.1

* Get initial plots/wallets if those didn't work right away
  • Loading branch information
cmmarslender authored Dec 2, 2024
1 parent d5767a1 commit 666e0b3
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 5 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/chia-network/chia-exporter
go 1.21

require (
github.com/chia-network/go-chia-libs v0.20.0
github.com/chia-network/go-chia-libs v0.20.1
github.com/chia-network/go-modules v0.0.8
github.com/go-sql-driver/mysql v1.8.1
github.com/oschwald/maxminddb-golang v1.13.1
Expand Down
7 changes: 4 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chia-network/go-chia-libs v0.20.0 h1:rdW1Ob/hb32Pl89w4/9SkoGWVhyC+tOkeRqvPr/xuvg=
github.com/chia-network/go-chia-libs v0.20.0/go.mod h1:npTqaFSjTdMxE7hc0LOmWJmWGqcs+IERarK5fDxXk/I=
github.com/chia-network/go-chia-libs v0.20.1 h1:uEPrjtxot0oipTuWT63Spw9t9kj56JdhdFBS4ZgYdjc=
github.com/chia-network/go-chia-libs v0.20.1/go.mod h1:+RMorskgxwYzPGf2gIyW0k7FGDdLrrH4X5ATrrMreb0=
github.com/chia-network/go-modules v0.0.8 h1:VATMxehRISOhaRwPo/GL735IKWW0G7sUYH2OmBofsfE=
github.com/chia-network/go-modules v0.0.8/go.mod h1:OdvlWftyJc3+i3QYv5cfQsiQASL7Em7fJnzdmPmj07M=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
Expand Down Expand Up @@ -94,8 +94,9 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
Expand Down
11 changes: 11 additions & 0 deletions internal/metrics/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type CrawlerServiceMetrics struct {
metrics *Metrics

// General Service Metrics
gotVersionResponse bool
version *prometheus.GaugeVec

// Current network
Expand Down Expand Up @@ -128,6 +129,7 @@ func (s *CrawlerServiceMetrics) SetupPollingMetrics(ctx context.Context) {}
// Disconnected clears/unregisters metrics when the connection drops
func (s *CrawlerServiceMetrics) Disconnected() {
s.version.Reset()
s.gotVersionResponse = false
s.totalNodes5Days.Unregister()
s.reliableNodes.Unregister()
s.ipv4Nodes5Days.Unregister()
Expand All @@ -143,9 +145,18 @@ func (s *CrawlerServiceMetrics) Reconnected() {

// ReceiveResponse handles crawler responses that are returned over the websocket
func (s *CrawlerServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
// Sometimes, when we reconnect, or start exporter before chia is running
// the daemon is up before the service, and the initial request for the version
// doesn't make it to the service
// daemon doesn't queue these messages for later, they just get dropped
if !s.gotVersionResponse {
utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{}))
}

switch resp.Command {
case "get_version":
versionHelper(resp, s.version)
s.gotVersionResponse = true
case "get_peer_counts":
fallthrough
case "loaded_initial_peers":
Expand Down
11 changes: 11 additions & 0 deletions internal/metrics/farmer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type FarmerServiceMetrics struct {
metrics *Metrics

// General Service Metrics
gotVersionResponse bool
version *prometheus.GaugeVec

// Connection Metrics
Expand Down Expand Up @@ -108,6 +109,7 @@ func (s *FarmerServiceMetrics) SetupPollingMetrics(ctx context.Context) {}
// Disconnected clears/unregisters metrics when the connection drops
func (s *FarmerServiceMetrics) Disconnected() {
s.version.Reset()
s.gotVersionResponse = false
s.connectionCount.Reset()
s.plotFilesize.Reset()
s.plotCount.Reset()
Expand All @@ -123,9 +125,18 @@ func (s *FarmerServiceMetrics) Reconnected() {

// ReceiveResponse handles crawler responses that are returned over the websocket
func (s *FarmerServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
// Sometimes, when we reconnect, or start exporter before chia is running
// the daemon is up before the service, and the initial request for the version
// doesn't make it to the service
// daemon doesn't queue these messages for later, they just get dropped
if !s.gotVersionResponse {
utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{}))
}

switch resp.Command {
case "get_version":
versionHelper(resp, s.version)
s.gotVersionResponse = true
case "get_connections":
s.GetConnections(resp)
case "new_farming_info":
Expand Down
14 changes: 13 additions & 1 deletion internal/metrics/fullnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ type FullNodeServiceMetrics struct {
metrics *Metrics

// General Service Metrics
version *prometheus.GaugeVec
gotVersionResponse bool
version *prometheus.GaugeVec

// GetBlockchainState Metrics
difficulty *wrappedPrometheus.LazyGauge
Expand Down Expand Up @@ -187,6 +188,7 @@ func (s *FullNodeServiceMetrics) SetupPollingMetrics(ctx context.Context) {
// Disconnected clears/unregisters metrics when the connection drops
func (s *FullNodeServiceMetrics) Disconnected() {
s.version.Reset()
s.gotVersionResponse = false
s.difficulty.Unregister()
s.mempoolCost.Unregister()
s.mempoolMinFee.Reset()
Expand Down Expand Up @@ -228,9 +230,18 @@ func (s *FullNodeServiceMetrics) Reconnected() {

// ReceiveResponse handles full node related responses that are returned over the websocket
func (s *FullNodeServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
// Sometimes, when we reconnect, or start exporter before chia is running
// the daemon is up before the service, and the initial request for the version
// doesn't make it to the service
// daemon doesn't queue these messages for later, they just get dropped
if !s.gotVersionResponse {
utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{}))
}

switch resp.Command {
case "get_version":
versionHelper(resp, s.version)
s.gotVersionResponse = true
case "get_blockchain_state":
s.GetBlockchainState(resp)
// Ask for connection info when we get updated blockchain state
Expand Down Expand Up @@ -424,6 +435,7 @@ func (s *FullNodeServiceMetrics) RefreshFileSizes() {
cfg, err := config.GetChiaConfig()
if err != nil {
log.Errorf("Error getting chia config: %s\n", err.Error())
return
}
database := cfg.GetFullPath(cfg.FullNode.DatabasePath)
databaseWal := fmt.Sprintf("%s-wal", database)
Expand Down
18 changes: 18 additions & 0 deletions internal/metrics/harvester.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ type HarvesterServiceMetrics struct {
metrics *Metrics

// General Service Metrics
gotVersionResponse bool
version *prometheus.GaugeVec

gotPlotsResponse bool

// Connection Metrics
connectionCount *prometheus.GaugeVec

Expand Down Expand Up @@ -124,6 +127,8 @@ func (s *HarvesterServiceMetrics) httpGetPlots() {
// Disconnected clears/unregisters metrics when the connection drops
func (s *HarvesterServiceMetrics) Disconnected() {
s.version.Reset()
s.gotVersionResponse = false
s.gotPlotsResponse = false
s.connectionCount.Reset()
s.totalPlots.Unregister()
s.plotFilesize.Reset()
Expand All @@ -140,15 +145,28 @@ func (s *HarvesterServiceMetrics) Reconnected() {

// ReceiveResponse handles crawler responses that are returned over the websocket
func (s *HarvesterServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
// Sometimes, when we reconnect, or start exporter before chia is running
// the daemon is up before the service, and the initial request for the version
// doesn't make it to the service
// daemon doesn't queue these messages for later, they just get dropped
if !s.gotVersionResponse {
utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{}))
}
if !s.gotPlotsResponse {
s.httpGetPlots()
}

switch resp.Command {
case "get_version":
versionHelper(resp, s.version)
s.gotVersionResponse = true
case "get_connections":
s.GetConnections(resp)
case "farming_info":
s.FarmingInfo(resp)
case "get_plots":
s.GetPlots(resp)
s.gotPlotsResponse = true
case "debug":
debugHelper(resp, s.debug)
}
Expand Down
11 changes: 11 additions & 0 deletions internal/metrics/timelord.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type TimelordServiceMetrics struct {
metrics *Metrics

// General Service Metrics
gotVersionResponse bool
version *prometheus.GaugeVec

// Timelord Metrics
Expand Down Expand Up @@ -62,6 +63,7 @@ func (s *TimelordServiceMetrics) SetupPollingMetrics(ctx context.Context) {}
// Disconnected clears/unregisters metrics when the connection drops
func (s *TimelordServiceMetrics) Disconnected() {
s.version.Reset()
s.gotVersionResponse = false
s.fastestTimelord.Unregister()
s.slowTimelord.Unregister()
s.estimatedIPS.Unregister()
Expand All @@ -75,10 +77,19 @@ func (s *TimelordServiceMetrics) Reconnected() {

// ReceiveResponse handles crawler responses that are returned over the websocket
func (s *TimelordServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
// Sometimes, when we reconnect, or start exporter before chia is running
// the daemon is up before the service, and the initial request for the version
// doesn't make it to the service
// daemon doesn't queue these messages for later, they just get dropped
if !s.gotVersionResponse {
utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{}))
}

//("finished_pot_challenge", "new_compact_proof", "skipping_peak", "new_peak")
switch resp.Command {
case "get_version":
versionHelper(resp, s.version)
s.gotVersionResponse = true
case "finished_pot":
s.FinishedPoT(resp)
case "new_compact_proof":
Expand Down
18 changes: 18 additions & 0 deletions internal/metrics/wallet.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ type WalletServiceMetrics struct {
metrics *Metrics

// General Service Metrics
gotVersionResponse bool
version *prometheus.GaugeVec

gotWalletsResponse bool

// Connection Metrics
connectionCount *prometheus.GaugeVec

Expand Down Expand Up @@ -93,6 +96,8 @@ func (s *WalletServiceMetrics) SetupPollingMetrics(ctx context.Context) {
// Disconnected clears/unregisters metrics when the connection drops
func (s *WalletServiceMetrics) Disconnected() {
s.version.Reset()
s.gotVersionResponse = false
s.gotWalletsResponse = false
s.connectionCount.Reset()
s.walletSynced.Unregister()
s.confirmedBalance.Reset()
Expand All @@ -109,9 +114,21 @@ func (s *WalletServiceMetrics) Reconnected() {

// ReceiveResponse handles wallet responses that are returned over the websocket
func (s *WalletServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
// Sometimes, when we reconnect, or start exporter before chia is running
// the daemon is up before the service, and the initial request for the version
// doesn't make it to the service
// daemon doesn't queue these messages for later, they just get dropped
if !s.gotVersionResponse {
utils.LogErr(s.metrics.client.FullNodeService.GetVersion(&rpc.GetVersionOptions{}))
}
if !s.gotWalletsResponse {
utils.LogErr(s.metrics.client.WalletService.GetWallets(&rpc.GetWalletsOptions{}))
}

switch resp.Command {
case "get_version":
versionHelper(resp, s.version)
s.gotVersionResponse = true
case "get_connections":
s.GetConnections(resp)
case "coin_added":
Expand All @@ -124,6 +141,7 @@ func (s *WalletServiceMetrics) ReceiveResponse(resp *types.WebsocketResponse) {
s.GetWalletBalance(resp)
case "get_wallets":
s.GetWallets(resp)
s.gotWalletsResponse = true
case "debug":
debugHelper(resp, s.debug)
}
Expand Down

0 comments on commit 666e0b3

Please sign in to comment.