Skip to content

Commit

Permalink
Add OSD num versions health indicator
Browse files Browse the repository at this point in the history
Signed-off-by: Igor Shishkin <me@teran.dev>
  • Loading branch information
teran committed May 10, 2024
1 parent a03240f commit 9fc0e9b
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 0 deletions.
13 changes: 13 additions & 0 deletions models/clusterhealth.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ const (
// Dangerous: >10
ClusterHealthIndicatorTypeOSDsMetadataSize ClusterHealthIndicatorType = "OSD_METADATA_SIZE"

// ClusterHealthIndicatorTypeOSDVersionMismatch indicates different versions
// of running OSD daemons at the same time
//
// Description: running different versions of components is normal only
// while upgrade procedure is a go. In all other cases daemon versions
// should match i.e. their amount must be equal 1 except the case of
// upgrade.
//
// Good: 1
// AtRisk: 2
// Dangerous: >2
ClusterHealthIndicatorTypeOSDsNumDaemonVersions ClusterHealthIndicatorType = "OSD_NUM_DAEMON_VERSIONS"

// ClusterHealthIndicatorTypeQuorum reflects monitor quorum status
//
// Description: monitors in quorum which should be the same as total
Expand Down
19 changes: 19 additions & 0 deletions service/cluster_health/cluster_health.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,25 @@ func OSDsMetadataSize(ctx context.Context, cr models.ClusterReport) (models.Clus
}, nil
}

func OSDsNumDaemonVersions(ctx context.Context, cr models.ClusterReport) (models.ClusterHealthIndicator, error) {
numVersions := len(cr.NumOSDsByVersion)

st := models.ClusterHealthIndicatorStatusUnknown
if numVersions > 2 {
st = models.ClusterHealthIndicatorStatusDangerous
} else if numVersions == 2 {
st = models.ClusterHealthIndicatorStatusAtRisk
} else if numVersions == 1 {
st = models.ClusterHealthIndicatorStatusGood
}

return models.ClusterHealthIndicator{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: strconv.FormatInt(int64(numVersions), 10),
CurrentValueStatus: st,
}, nil
}

func Quorum(ctx context.Context, cr models.ClusterReport) (models.ClusterHealthIndicator, error) {
st := models.ClusterHealthIndicatorStatusGood
if cr.NumMonsInQuorum < cr.NumMons {
Expand Down
84 changes: 84 additions & 0 deletions service/cluster_health/cluster_health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,90 @@ func TestOSDsMetadataSize(t *testing.T) {
}
}

func TestOSDsNumDaemonVersions(t *testing.T) {
tcs := []testCase{
{
name: "single version",
in: models.ClusterReport{
NumOSDsByVersion: map[string]uint16{
"18.2.2": 3,
},
},
expOut: models.ClusterHealthIndicator{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: "1",
CurrentValueStatus: models.ClusterHealthIndicatorStatusGood,
},
},
{
name: "two versions",
in: models.ClusterReport{
NumOSDsByVersion: map[string]uint16{
"18.2.1": 1,
"18.2.2": 3,
},
},
expOut: models.ClusterHealthIndicator{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: "2",
CurrentValueStatus: models.ClusterHealthIndicatorStatusAtRisk,
},
},
{
name: "three versions",
in: models.ClusterReport{
NumOSDsByVersion: map[string]uint16{
"18.2.0": 1,
"18.2.1": 2,
"18.2.2": 3,
},
},
expOut: models.ClusterHealthIndicator{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: "3",
CurrentValueStatus: models.ClusterHealthIndicatorStatusDangerous,
},
},
{
name: "four versions",
in: models.ClusterReport{
NumOSDsByVersion: map[string]uint16{
"17.2.9": 4,
"18.2.0": 1,
"18.2.1": 2,
"18.2.2": 3,
},
},
expOut: models.ClusterHealthIndicator{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: "4",
CurrentValueStatus: models.ClusterHealthIndicatorStatusDangerous,
},
},
{
name: "no versions",
in: models.ClusterReport{
NumOSDsByVersion: map[string]uint16{},
},
expOut: models.ClusterHealthIndicator{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: "0",
CurrentValueStatus: models.ClusterHealthIndicatorStatusUnknown,
},
},
}

for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
r := require.New(t)

i, err := OSDsNumDaemonVersions(context.Background(), tc.in)
r.NoError(err)
r.Equal(tc.expOut, i)
})
}
}

func TestQuorum(t *testing.T) {
tcs := []testCase{
{
Expand Down
1 change: 1 addition & 0 deletions service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ var clusterHealthChecksList = []clusterHealth.ClusterHealthCheck{
clusterHealth.InactivePGs,
clusterHealth.AllowCrimson,
clusterHealth.OSDsMetadataSize,
clusterHealth.OSDsNumDaemonVersions,
}

type Service interface {
Expand Down
5 changes: 5 additions & 0 deletions service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ func (s *serviceTestSuite) TestCheckClusterHealth() {
CurrentValue: "2.30",
CurrentValueStatus: models.ClusterHealthIndicatorStatusGood,
},
{
Indicator: models.ClusterHealthIndicatorTypeOSDsNumDaemonVersions,
CurrentValue: "1",
CurrentValueStatus: models.ClusterHealthIndicatorStatusGood,
},
}, chi)
}

Expand Down

0 comments on commit 9fc0e9b

Please sign in to comment.