Skip to content

Commit

Permalink
Merge pull request #10 from doyoubi/PostponeRemovePods
Browse files Browse the repository at this point in the history
Fix connection reset errors when scaling down
  • Loading branch information
doyoubi authored Oct 31, 2020
2 parents 603fa1d + cba22a9 commit a9c85ef
Show file tree
Hide file tree
Showing 14 changed files with 101 additions and 19 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Current Operator version
VERSION ?= v0.1.0-alpha.0
VERSION ?= v0.1.0-alpha.1
# Default bundle image tag
BUNDLE_IMG ?= controller-bundle:$(VERSION)
# Options for 'bundle-build'
Expand Down
2 changes: 1 addition & 1 deletion Makefile.utils
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
OPERATOR_VERSION=$(VERSION)
OPERATOR_HELM_VERSION=0.1.1
OPERATOR_HELM_VERSION=0.1.2
CHECKER_HELM_VERSION=0.1.0

TEST_K8S_VER="v1.18.2"
Expand Down
11 changes: 11 additions & 0 deletions api/v1alpha1/undermoon_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,18 @@ type UndermoonStatus struct {

// Master broker address pointing to the master broker.
// +kubebuilder:validation:MinLength=1
// +optional
MasterBrokerAddress string `json:"masterBrokerAddress"`

// ScaleState is used to controll scaling storage pods.
// +optional
ScaleState string `json:"scaleState"`

// ScaleDownWaitTimestamp is used to wait for some time
// before scaling down storage StatefulSet Pods
// to avoid connection reset.
// +optional
ScaleDownWaitTimestamp metav1.Time `json:"scaleDownWaitTimestamp"`
}

// +kubebuilder:object:root=true
Expand Down
3 changes: 2 additions & 1 deletion api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions config/crd/bases/undermoon.doyoubi.mydomain_undermoons.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,14 @@ spec:
description: Master broker address pointing to the master broker.
minLength: 1
type: string
required:
- masterBrokerAddress
scaleDownWaitTimestamp:
description: ScaleDownWaitTimestamp is used to wait for some time before
scaling down storage StatefulSet Pods to avoid connection reset.
format: date-time
type: string
scaleState:
description: ScaleState is used to controll scaling storage pods.
type: string
type: object
type: object
version: v1alpha1
Expand Down
2 changes: 1 addition & 1 deletion config/manager/overlays/test/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ resources:
images:
- name: controller
newName: localhost:5000/undermoon-operator
newTag: v0.0.2
newTag: v0.1.0-alpha.1

patchesJson6902:
- target:
Expand Down
14 changes: 8 additions & 6 deletions controllers/broker_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package controllers

import (
"context"
"fmt"

undermoonv1alpha1 "github.com/doyoubi/undermoon-operator/api/v1alpha1"
"github.com/go-logr/logr"
Expand All @@ -10,6 +11,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

Expand Down Expand Up @@ -169,13 +171,13 @@ func (con *memBrokerController) reconcileMaster(reqLogger logr.Logger, cr *under
}

func (con *memBrokerController) setMasterBrokerStatus(reqLogger logr.Logger, cr *undermoonv1alpha1.Undermoon, masterBrokerAddress string) error {
cr.Status.MasterBrokerAddress = masterBrokerAddress
err := con.r.client.Status().Update(context.TODO(), cr)
// `masterBrokerAddress` is only exposed to external for debugging.
// It does not need to be accurate so use PATCH here.
patchData := []byte(fmt.Sprintf(`{"status":{"masterBrokerAddress":"%s"}}`, masterBrokerAddress))
patch := client.RawPatch(types.MergePatchType, patchData)

err := con.r.client.Status().Patch(context.TODO(), cr, patch)
if err != nil {
if errors.IsConflict(err) {
reqLogger.Info("Conflict on master broker status. Try again.", "error", err)
return errRetryReconciliation
}
reqLogger.Error(err, "Failed to set master broker address")
return err
}
Expand Down
56 changes: 56 additions & 0 deletions controllers/storage_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"strconv"
"strings"
"time"

undermoonv1alpha1 "github.com/doyoubi/undermoon-operator/api/v1alpha1"
pkgerrors "github.com/pkg/errors"
Expand All @@ -12,10 +13,18 @@ import (
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

const (
scaleStateStable = "Stable"
scaleStateScaleDownWait = "ScaleDownWait"

scaleDownWaitTime = time.Second * 30
)

type storageController struct {
r *UndermoonReconciler
}
Expand Down Expand Up @@ -142,10 +151,57 @@ func (con *storageController) scaleDownStorageStatefulSet(reqLogger logr.Logger,
return errRetryReconciliation
}

replicaNum := int32(int(cr.Spec.ChunkNumber) * halfChunkNodeNumber)
if *storage.Spec.Replicas == replicaNum {
return nil
}

// Postpone scaling down statefulset to avoid connection reset
if cr.Status.ScaleState != scaleStateScaleDownWait {
if err := con.setScaleState(reqLogger, cr, scaleStateScaleDownWait); err != nil {
return err
}
return errRetryReconciliation
}

if cr.Status.ScaleDownWaitTimestamp.Add(scaleDownWaitTime).After(time.Now()) {
reqLogger.Info("Wait extra time before scaling down")
return errRetryReconciliation
}

err := con.updateStorageStatefulSet(reqLogger, cr, storage)
if err != nil {
return err
}

if err := con.setScaleState(reqLogger, cr, scaleStateStable); err != nil {
return err
}

return nil
}

func (con *storageController) setScaleState(reqLogger logr.Logger, cr *undermoonv1alpha1.Undermoon, scaleState string) error {
cr.Status.ScaleState = scaleState
if scaleState == scaleStateScaleDownWait {
cr.Status.ScaleDownWaitTimestamp = metav1.Now()
} else {
cr.Status.ScaleDownWaitTimestamp = metav1.Unix(0, 0)
}

// This needs to be accurate so use UPDATE with ResourceVersion check
// instead of PATCH here.
err := con.r.client.Status().Update(context.TODO(), cr)
if err != nil {
if errors.IsConflict(err) {
reqLogger.Info("Conflict on updating ScaleState. Try again.", "error", err)
return errRetryReconciliation
}
reqLogger.Error(err, "Failed to change ScaleState",
"currState", cr.Status.ScaleState,
"newState", scaleState)
return err
}
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion controllers/undermoon_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (r *UndermoonReconciler) Reconcile(request ctrl.Request) (ctrl.Result, erro
err = r.storageCon.scaleDownStorageStatefulSet(reqLogger, instance, resource.storageStatefulSet, info)
if err != nil {
if err == errRetryReconciliation {
return reconcile.Result{Requeue: true, RequeueAfter: 3 * time.Second}, nil
return reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second}, nil
}
return reconcile.Result{}, err
}
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
k8s.io/api v0.18.6 h1:osqrAXbOQjkKIWDTjrqxWQ3w0GkKb1KA1XkUGHHYpeE=
k8s.io/api v0.18.6/go.mod h1:eeyxr+cwCjMdLAmr2W3RyDI0VvTawSg/3RFFBEnmZGI=
k8s.io/api v0.19.3 h1:GN6ntFnv44Vptj/b+OnMW7FmzkpDoIDLZRvKX3XH9aU=
k8s.io/apiextensions-apiserver v0.18.6 h1:vDlk7cyFsDyfwn2rNAO2DbmUbvXy5yT5GE3rrqOzaMo=
k8s.io/apiextensions-apiserver v0.18.6/go.mod h1:lv89S7fUysXjLZO7ke783xOwVTm6lKizADfvUM/SS/M=
k8s.io/apimachinery v0.18.6 h1:RtFHnfGNfd1N0LeSrKCUznz5xtUP1elRGvHJbL3Ntag=
Expand Down
2 changes: 1 addition & 1 deletion helm/undermoon-cluster/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.1
version: 0.1.2

4 changes: 2 additions & 2 deletions helm/undermoon-operator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.1
version: 0.1.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
#
# This should be the same as the version of undermoon-operator.
appVersion: v0.0.2
appVersion: v0.1.0-alpha.1
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,13 @@ spec:
description: Master broker address pointing to the master broker.
minLength: 1
type: string
required:
- masterBrokerAddress
scaleDownWaitTimestamp:
description: ScaleDownWaitTimestamp is used to wait for some time before scaling down storage StatefulSet Pods to avoid connection reset.
format: date-time
type: string
scaleState:
description: ScaleState is used to controll scaling storage pods.
type: string
type: object
type: object
version: v1alpha1
Expand Down
2 changes: 1 addition & 1 deletion helm/undermoon-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

image:
operatorImage: doyoubi/undermoon-operator
operatorImageTag: v0.1.0-alpha.0
operatorImageTag: v0.1.0-alpha.1
operatorImagePullPolicy: IfNotPresent

nameOverride: ""
Expand Down

0 comments on commit a9c85ef

Please sign in to comment.