Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

K8SPSMDB-934: add more logs to mongodb-healthcheck #1587

Merged
merged 27 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
31b6128
delete unused code and move everything to cmd/mongodb-healthcheck
pooknull Jul 10, 2024
a04e9b8
use Dial from pkg/psmdb/mongo
pooknull Jul 10, 2024
b081438
delete unused code
pooknull Jul 10, 2024
48f209c
use structs from pkg/psmdb/mongo
pooknull Jul 10, 2024
c3b7604
simplify code
pooknull Jul 10, 2024
adadeaf
add more log messages
pooknull Jul 10, 2024
633c872
remove unused env vars
pooknull Jul 10, 2024
8168099
fix lint
pooknull Jul 11, 2024
64768d0
fixes and improvements
pooknull Jul 15, 2024
d1e403c
add LOGS_DIR env var
pooknull Jul 15, 2024
5ae0775
Merge branch 'main' into dev/K8SPSMDB-934
hors Jul 16, 2024
673bfda
rotate logs
pooknull Jul 17, 2024
0d5defe
fix fmt
pooknull Jul 17, 2024
9968a37
Merge branch 'main' into dev/K8SPSMDB-934
pooknull Jul 18, 2024
ca69b77
update liveness test
pooknull Jul 19, 2024
b5f686e
Merge branch 'main' into dev/K8SPSMDB-934
hors Jul 22, 2024
a343dea
fix pvc-resize
pooknull Jul 22, 2024
c9efc0c
return LOG_LEVEL env var
pooknull Jul 23, 2024
2ec97e4
Merge remote-tracking branch 'origin/main' into dev/K8SPSMDB-934
pooknull Jul 23, 2024
d8e7ba6
fmt
pooknull Jul 24, 2024
6010ce2
Merge branch 'main' into dev/K8SPSMDB-934
pooknull Jul 24, 2024
f19bda2
Merge branch 'main' into dev/K8SPSMDB-934
hors Jul 26, 2024
2bf3d16
Merge branch 'main' into dev/K8SPSMDB-934
hors Jul 26, 2024
e95cda3
check only pvcs
pooknull Jul 26, 2024
178484c
Merge branch 'main' into dev/K8SPSMDB-934
pooknull Jul 30, 2024
1c5a404
Merge branch 'main' into dev/K8SPSMDB-934
hors Jul 31, 2024
14d5dd9
Merge branch 'main' into dev/K8SPSMDB-934
hors Aug 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@

package db

import "strconv"
import (
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
)

type Addr struct {
Host string
Port int
}

func (a Addr) String() string {
return a.Host + ":" + strconv.Itoa(a.Port)
type Config struct {
mongo.Config
SSL *SSLConfig
}
41 changes: 7 additions & 34 deletions healthcheck/tools/db/db.go → cmd/mongodb-healthcheck/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@ package db

import (
"context"
"time"

"github.com/pkg/errors"
mgo "go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
Expand All @@ -39,44 +36,20 @@ func Dial(ctx context.Context, conf *Config) (mongo.Client, error) {
log := logf.FromContext(ctx)
log.V(1).Info("Connecting to mongodb", "hosts", conf.Hosts, "ssl", conf.SSL.Enabled, "ssl_insecure", conf.SSL.Insecure)

opts := options.Client().
SetHosts(conf.Hosts).
SetReplicaSet(conf.ReplSetName).
SetAuth(options.Credential{Password: conf.Password, Username: conf.Username}).
SetTLSConfig(conf.TLSConf).
SetConnectTimeout(10 * time.Second).
SetServerSelectionTimeout(10 * time.Second)

if conf.Username != "" && conf.Password != "" {
log.V(1).Info("Enabling authentication for session", "user", conf.Username)
}

client, err := mgo.Connect(ctx, opts)
cl, err := mongo.Dial(&conf.Config)
if err != nil {
return nil, errors.Wrap(err, "connect to mongo replica set")
}

if err := client.Ping(ctx, nil); err != nil {
if err := client.Disconnect(ctx); err != nil {
return nil, errors.Wrap(err, "disconnect client")
}

opts := options.Client().
SetHosts(conf.Hosts).
SetTLSConfig(conf.TLSConf).
SetConnectTimeout(10 * time.Second).
SetServerSelectionTimeout(10 * time.Second).
SetDirect(true)

client, err = mgo.Connect(ctx, opts)
cfg := conf.Config
cfg.Direct = true
cfg.ReplSetName = ""
cl, err = mongo.Dial(&cfg)
if err != nil {
return nil, errors.Wrap(err, "connect to mongo replica set with direct")
}

if err := client.Ping(ctx, nil); err != nil {
return nil, errors.Wrap(err, "ping mongo")
return nil, errors.Wrap(err, "filed to dial mongo")
}
}

return mongo.ToInterface(client), nil
return cl, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import (
logf "sigs.k8s.io/controller-runtime/pkg/log"
)

var lastSSLErr error

type SSLConfig struct {
Enabled bool
PEMKeyFile string
Expand All @@ -42,11 +40,6 @@ func (sc *SSLConfig) loadCaCertificate() (*x509.CertPool, error) {
return certificates, nil
}

// LastSSLError returns the last error related to the DB connection SSL handshake
func LastSSLError() error {
return lastSSLErr
}

func (cnf *Config) configureTLS() error {
log := logf.Log

Expand All @@ -60,7 +53,7 @@ func (cnf *Config) configureTLS() error {

// Configure client cert
if len(cnf.SSL.PEMKeyFile) != 0 {
if err := isFileExists(cnf.SSL.PEMKeyFile); err != nil {
if _, err := os.Stat(cnf.SSL.PEMKeyFile); err != nil {
return errors.Wrapf(err, "check if file with name %s exists", cnf.SSL.PEMKeyFile)
}

Expand All @@ -75,7 +68,7 @@ func (cnf *Config) configureTLS() error {

// Configure CA cert
if len(cnf.SSL.CAFile) != 0 {
if err := isFileExists(cnf.SSL.CAFile); err != nil {
if _, err := os.Stat(cnf.SSL.CAFile); err != nil {
return errors.Wrapf(err, "check if file with name %s exists", cnf.SSL.CAFile)
}

Expand All @@ -91,8 +84,3 @@ func (cnf *Config) configureTLS() error {
cnf.TLSConf = config
return nil
}

func isFileExists(name string) error {
_, err := os.Stat(name)
return err
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,61 +22,17 @@ import (
"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/percona/percona-server-mongodb-operator/healthcheck/tools/db"
"github.com/percona/percona-server-mongodb-operator/cmd/mongodb-healthcheck/db"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
)

// OkMemberStates is a slice of acceptable replication member states
var OkMemberStates = []mongo.MemberState{
mongo.MemberStatePrimary,
mongo.MemberStateSecondary,
mongo.MemberStateRecovering,
mongo.MemberStateArbiter,
mongo.MemberStateStartup2,
mongo.MemberStateRollback,
}

var ErrNoReplsetConfigStr string = "(NotYetInitialized) no replset config has been received"

// getSelfMemberState returns the replication state of the local MongoDB member
func getSelfMemberState(rsStatus *mongo.Status) *mongo.MemberState {
member := rsStatus.GetSelf()
if member == nil || member.Health != mongo.MemberHealthUp {
return nil
}
return &member.State
}

// isStateOk checks if a replication member state matches one of the acceptable member states in 'OkMemberStates'
func isStateOk(memberState *mongo.MemberState, okMemberStates []mongo.MemberState) bool {
for _, state := range okMemberStates {
if *memberState == state {
return true
}
}
return false
}

// HealthCheck checks the replication member state of the local MongoDB member
func HealthCheck(client mongo.Client, okMemberStates []mongo.MemberState) (State, *mongo.MemberState, error) {
rsStatus, err := client.RSStatus(context.TODO())
if err != nil {
return StateFailed, nil, errors.Wrap(err, "get replica set status")
}

state := getSelfMemberState(&rsStatus)
if state == nil {
return StateFailed, state, errors.New("found no member state for self in replica set status")
}
if isStateOk(state, okMemberStates) {
return StateOk, state, nil
}

return StateFailed, state, errors.Errorf("member has unhealthy replication state: %d", state)
}

func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error) {
log := logf.FromContext(ctx).WithName("HealthCheckMongosLiveness")

client, err := db.Dial(ctx, cnf)
if err != nil {
return errors.Wrap(err, "connection error")
Expand All @@ -93,13 +49,16 @@ func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error)
}

if isMasterResp.Msg != "isdbgrid" {
log.V(1).Info("Wrong isMaster msg", "msg", isMasterResp.Msg)
return errors.New("wrong msg")
}

return nil
}

func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelaySeconds int64) (_ *mongo.MemberState, err error) {
log := logf.FromContext(ctx).WithName("HealthCheckMongodLiveness")

client, err := db.Dial(ctx, cnf)
if err != nil {
return nil, errors.Wrap(err, "connection error")
Expand Down Expand Up @@ -134,6 +93,7 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay
// to die before they added to a replset
if res.Err().Error() == ErrNoReplsetConfigStr {
state := mongo.MemberStateUnknown
log.V(1).Info("replSetGetStatus failed", "err", res.Err().Error(), "state", state)
return &state, nil
}
return nil, errors.Wrap(res.Err(), "get replsetGetStatus response")
Expand Down Expand Up @@ -162,6 +122,7 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay

oplogRs := OplogRs{}
if !isMasterResp.IsArbiter {
log.V(1).Info("Getting \"oplog.rs\" info")
res := client.Database("local").RunCommand(ctx, bson.D{
{Key: "collStats", Value: "oplog.rs"},
{Key: "scale", Value: 1024 * 1024 * 1024}, // scale size to gigabytes
Expand All @@ -172,8 +133,8 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay
if err := res.Decode(&oplogRs); err != nil {
return nil, errors.Wrap(err, "decode oplog.rs info")
}
if oplogRs.Ok == 0 {
return nil, errors.New(oplogRs.Errmsg)
if oplogRs.OK == 0 {
return nil, errors.Wrap(errors.New("non-ok response from getting oplog.rs info"), oplogRs.Errmsg)
}
}

Expand All @@ -182,28 +143,22 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay
storageSize = oplogRs.StorageSize
}

log.V(1).Info("Checking state", "state", rsStatus.MyState, "storage size", storageSize)
if err := CheckState(rsStatus, startupDelaySeconds, storageSize); err != nil {
return &rsStatus.MyState, err
}

return &rsStatus.MyState, nil
}

type ServerStatus struct {
Ok int `bson:"ok" json:"ok"`
Errmsg string `bson:"errmsg,omitempty" json:"errmsg,omitempty"`
}

type OplogRs struct {
StorageSize int64 `bson:"storageSize" json:"storageSize"`

Ok int `bson:"ok" json:"ok"`
Errmsg string `bson:"errmsg,omitempty" json:"errmsg,omitempty"`
mongo.OKResponse `bson:",inline"`
StorageSize int64 `bson:"storageSize" json:"storageSize"`
}

type ReplSetStatus struct {
mongo.Status `bson:",inline"`
InitialSyncStatus InitialSyncStatus `bson:"initialSyncStatus" json:"initialSyncStatus"`
mongo.Status `bson:",inline"`
}

type InitialSyncStatus interface{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ import (

"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/percona/percona-server-mongodb-operator/healthcheck/tools/db"
"github.com/percona/percona-server-mongodb-operator/cmd/mongodb-healthcheck/db"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
)

// ReadinessCheck runs a ping on a pmgo.SessionManager to check server readiness
// MongodReadinessCheck runs a ping on a pmgo.SessionManager to check server readiness
func MongodReadinessCheck(ctx context.Context, addr string) error {
log := logf.FromContext(ctx).WithName("MongodReadinessCheck")

var d net.Dialer

log.V(1).Info("Connecting to " + addr)
conn, err := d.DialContext(ctx, "tcp", addr)
if err != nil {
return errors.Wrap(err, "dial")
Expand All @@ -35,6 +41,8 @@ func MongodReadinessCheck(ctx context.Context, addr string) error {
}

func MongosReadinessCheck(ctx context.Context, cnf *db.Config) (err error) {
log := logf.FromContext(ctx).WithName("MongosReadinessCheck")

client, err := db.Dial(ctx, cnf)
if err != nil {
return errors.Wrap(err, "connection error")
Expand All @@ -45,21 +53,23 @@ func MongosReadinessCheck(ctx context.Context, cnf *db.Config) (err error) {
}
}()

ss := ServerStatus{}
log.V(1).Info("Running listDatabases")
resp := mongo.OKResponse{}
cur := client.Database("admin").RunCommand(ctx, bson.D{
{Key: "listDatabases", Value: 1},
{Key: "filter", Value: bson.D{{Key: "name", Value: "admin"}}},
{Key: "nameOnly", Value: true}})
{Key: "nameOnly", Value: true},
})
if cur.Err() != nil {
return errors.Wrap(cur.Err(), "run listDatabases")
}

if err := cur.Decode(&ss); err != nil {
if err := cur.Decode(&resp); err != nil {
return errors.Wrap(err, "decode listDatabases response")
}

if ss.Ok == 0 {
return errors.New(ss.Errmsg)
if resp.OK == 0 {
return errors.Wrap(errors.New("non-ok response from listDatabases"), resp.Errmsg)
}

return nil
Expand Down
Loading
Loading