From dc4e1c635cf018aea571f1273223a4866fbb6fff Mon Sep 17 00:00:00 2001 From: doyoubi Date: Sat, 31 Oct 2020 18:02:49 +0800 Subject: [PATCH 1/2] Fix connection reset errors when scaling down --- api/v1alpha1/undermoon_types.go | 11 ++++ api/v1alpha1/zz_generated.deepcopy.go | 3 +- ...undermoon.doyoubi.mydomain_undermoons.yaml | 10 +++- controllers/broker_controller.go | 14 +++-- controllers/storage_controller.go | 56 +++++++++++++++++++ controllers/undermoon_controller.go | 2 +- go.sum | 1 + ...undermoon.doyoubi.mydomain_undermoons.yaml | 9 ++- 8 files changed, 94 insertions(+), 12 deletions(-) diff --git a/api/v1alpha1/undermoon_types.go b/api/v1alpha1/undermoon_types.go index 2fd644d..08b994f 100644 --- a/api/v1alpha1/undermoon_types.go +++ b/api/v1alpha1/undermoon_types.go @@ -70,7 +70,18 @@ type UndermoonStatus struct { // Master broker address pointing to the master broker. // +kubebuilder:validation:MinLength=1 + // +optional MasterBrokerAddress string `json:"masterBrokerAddress"` + + // ScaleState is used to controll scaling storage pods. + // +optional + ScaleState string `json:"scaleState"` + + // ScaleDownWaitTimestamp is used to wait for some time + // before scaling down storage StatefulSet Pods + // to avoid connection reset. + // +optional + ScaleDownWaitTimestamp metav1.Time `json:"scaleDownWaitTimestamp"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 97c61cd..08398bf 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -30,7 +30,7 @@ func (in *Undermoon) DeepCopyInto(out *Undermoon) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Undermoon. @@ -105,6 +105,7 @@ func (in *UndermoonSpec) DeepCopy() *UndermoonSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *UndermoonStatus) DeepCopyInto(out *UndermoonStatus) { *out = *in + in.ScaleDownWaitTimestamp.DeepCopyInto(&out.ScaleDownWaitTimestamp) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UndermoonStatus. diff --git a/config/crd/bases/undermoon.doyoubi.mydomain_undermoons.yaml b/config/crd/bases/undermoon.doyoubi.mydomain_undermoons.yaml index 1fb5e26..dd9e405 100644 --- a/config/crd/bases/undermoon.doyoubi.mydomain_undermoons.yaml +++ b/config/crd/bases/undermoon.doyoubi.mydomain_undermoons.yaml @@ -197,8 +197,14 @@ spec: description: Master broker address pointing to the master broker. minLength: 1 type: string - required: - - masterBrokerAddress + scaleDownWaitTimestamp: + description: ScaleDownWaitTimestamp is used to wait for some time before + scaling down storage StatefulSet Pods to avoid connection reset. + format: date-time + type: string + scaleState: + description: ScaleState is used to controll scaling storage pods. + type: string type: object type: object version: v1alpha1 diff --git a/controllers/broker_controller.go b/controllers/broker_controller.go index 1d5f0c5..26b23de 100644 --- a/controllers/broker_controller.go +++ b/controllers/broker_controller.go @@ -2,6 +2,7 @@ package controllers import ( "context" + "fmt" undermoonv1alpha1 "github.com/doyoubi/undermoon-operator/api/v1alpha1" "github.com/go-logr/logr" @@ -10,6 +11,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) @@ -169,13 +171,13 @@ func (con *memBrokerController) reconcileMaster(reqLogger logr.Logger, cr *under } func (con *memBrokerController) setMasterBrokerStatus(reqLogger logr.Logger, cr *undermoonv1alpha1.Undermoon, masterBrokerAddress string) error { - cr.Status.MasterBrokerAddress = masterBrokerAddress - err := con.r.client.Status().Update(context.TODO(), cr) + // `masterBrokerAddress` is only exposed to external for debugging. + // It does not need to be accurate so use PATCH here. + patchData := []byte(fmt.Sprintf(`{"status":{"masterBrokerAddress":"%s"}}`, masterBrokerAddress)) + patch := client.RawPatch(types.MergePatchType, patchData) + + err := con.r.client.Status().Patch(context.TODO(), cr, patch) if err != nil { - if errors.IsConflict(err) { - reqLogger.Info("Conflict on master broker status. Try again.", "error", err) - return errRetryReconciliation - } reqLogger.Error(err, "Failed to set master broker address") return err } diff --git a/controllers/storage_controller.go b/controllers/storage_controller.go index 2005225..6f7bda5 100644 --- a/controllers/storage_controller.go +++ b/controllers/storage_controller.go @@ -4,6 +4,7 @@ import ( "context" "strconv" "strings" + "time" undermoonv1alpha1 "github.com/doyoubi/undermoon-operator/api/v1alpha1" pkgerrors "github.com/pkg/errors" @@ -12,10 +13,18 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) +const ( + scaleStateStable = "Stable" + scaleStateScaleDownWait = "ScaleDownWait" + + scaleDownWaitTime = time.Second * 30 +) + type storageController struct { r *UndermoonReconciler } @@ -142,10 +151,57 @@ func (con *storageController) scaleDownStorageStatefulSet(reqLogger logr.Logger, return errRetryReconciliation } + replicaNum := int32(int(cr.Spec.ChunkNumber) * halfChunkNodeNumber) + if *storage.Spec.Replicas == replicaNum { + return nil + } + + // Postpone scaling down statefulset to avoid connection reset + if cr.Status.ScaleState != scaleStateScaleDownWait { + if err := con.setScaleState(reqLogger, cr, scaleStateScaleDownWait); err != nil { + return err + } + return errRetryReconciliation + } + + if cr.Status.ScaleDownWaitTimestamp.Add(scaleDownWaitTime).After(time.Now()) { + reqLogger.Info("Wait extra time before scaling down") + return errRetryReconciliation + } + err := con.updateStorageStatefulSet(reqLogger, cr, storage) if err != nil { return err } + + if err := con.setScaleState(reqLogger, cr, scaleStateStable); err != nil { + return err + } + + return nil +} + +func (con *storageController) setScaleState(reqLogger logr.Logger, cr *undermoonv1alpha1.Undermoon, scaleState string) error { + cr.Status.ScaleState = scaleState + if scaleState == scaleStateScaleDownWait { + cr.Status.ScaleDownWaitTimestamp = metav1.Now() + } else { + cr.Status.ScaleDownWaitTimestamp = metav1.Unix(0, 0) + } + + // This needs to be accurate so use UPDATE with ResourceVersion check + // instead of PATCH here. + err := con.r.client.Status().Update(context.TODO(), cr) + if err != nil { + if errors.IsConflict(err) { + reqLogger.Info("Conflict on updating ScaleState. Try again.", "error", err) + return errRetryReconciliation + } + reqLogger.Error(err, "Failed to change ScaleState", + "currState", cr.Status.ScaleState, + "newState", scaleState) + return err + } return nil } diff --git a/controllers/undermoon_controller.go b/controllers/undermoon_controller.go index 4210ffb..a63e78a 100644 --- a/controllers/undermoon_controller.go +++ b/controllers/undermoon_controller.go @@ -176,7 +176,7 @@ func (r *UndermoonReconciler) Reconcile(request ctrl.Request) (ctrl.Result, erro err = r.storageCon.scaleDownStorageStatefulSet(reqLogger, instance, resource.storageStatefulSet, info) if err != nil { if err == errRetryReconciliation { - return reconcile.Result{Requeue: true, RequeueAfter: 3 * time.Second}, nil + return reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second}, nil } return reconcile.Result{}, err } diff --git a/go.sum b/go.sum index 20df9b9..227cbba 100644 --- a/go.sum +++ b/go.sum @@ -516,6 +516,7 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= k8s.io/api v0.18.6 h1:osqrAXbOQjkKIWDTjrqxWQ3w0GkKb1KA1XkUGHHYpeE= k8s.io/api v0.18.6/go.mod h1:eeyxr+cwCjMdLAmr2W3RyDI0VvTawSg/3RFFBEnmZGI= +k8s.io/api v0.19.3 h1:GN6ntFnv44Vptj/b+OnMW7FmzkpDoIDLZRvKX3XH9aU= k8s.io/apiextensions-apiserver v0.18.6 h1:vDlk7cyFsDyfwn2rNAO2DbmUbvXy5yT5GE3rrqOzaMo= k8s.io/apiextensions-apiserver v0.18.6/go.mod h1:lv89S7fUysXjLZO7ke783xOwVTm6lKizADfvUM/SS/M= k8s.io/apimachinery v0.18.6 h1:RtFHnfGNfd1N0LeSrKCUznz5xtUP1elRGvHJbL3Ntag= diff --git a/helm/undermoon-operator/templates/undermoon.doyoubi.mydomain_undermoons.yaml b/helm/undermoon-operator/templates/undermoon.doyoubi.mydomain_undermoons.yaml index 42e4ecd..44b3031 100644 --- a/helm/undermoon-operator/templates/undermoon.doyoubi.mydomain_undermoons.yaml +++ b/helm/undermoon-operator/templates/undermoon.doyoubi.mydomain_undermoons.yaml @@ -172,8 +172,13 @@ spec: description: Master broker address pointing to the master broker. minLength: 1 type: string - required: - - masterBrokerAddress + scaleDownWaitTimestamp: + description: ScaleDownWaitTimestamp is used to wait for some time before scaling down storage StatefulSet Pods to avoid connection reset. + format: date-time + type: string + scaleState: + description: ScaleState is used to controll scaling storage pods. + type: string type: object type: object version: v1alpha1 From cba22a9709123f3e3646fdb237f8391c164c9bc5 Mon Sep 17 00:00:00 2001 From: doyoubi Date: Sat, 31 Oct 2020 18:13:44 +0800 Subject: [PATCH 2/2] Bump version --- Makefile | 2 +- Makefile.utils | 2 +- config/manager/overlays/test/kustomization.yaml | 2 +- helm/undermoon-cluster/Chart.yaml | 2 +- helm/undermoon-operator/Chart.yaml | 4 ++-- helm/undermoon-operator/values.yaml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 89567e9..0492b2c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Current Operator version -VERSION ?= v0.1.0-alpha.0 +VERSION ?= v0.1.0-alpha.1 # Default bundle image tag BUNDLE_IMG ?= controller-bundle:$(VERSION) # Options for 'bundle-build' diff --git a/Makefile.utils b/Makefile.utils index 8c1bc0e..e05e6d9 100644 --- a/Makefile.utils +++ b/Makefile.utils @@ -1,5 +1,5 @@ OPERATOR_VERSION=$(VERSION) -OPERATOR_HELM_VERSION=0.1.1 +OPERATOR_HELM_VERSION=0.1.2 CHECKER_HELM_VERSION=0.1.0 TEST_K8S_VER="v1.18.2" diff --git a/config/manager/overlays/test/kustomization.yaml b/config/manager/overlays/test/kustomization.yaml index f5f6a4b..34fcd5e 100644 --- a/config/manager/overlays/test/kustomization.yaml +++ b/config/manager/overlays/test/kustomization.yaml @@ -7,7 +7,7 @@ resources: images: - name: controller newName: localhost:5000/undermoon-operator - newTag: v0.0.2 + newTag: v0.1.0-alpha.1 patchesJson6902: - target: diff --git a/helm/undermoon-cluster/Chart.yaml b/helm/undermoon-cluster/Chart.yaml index 27ce008..863f059 100644 --- a/helm/undermoon-cluster/Chart.yaml +++ b/helm/undermoon-cluster/Chart.yaml @@ -15,5 +15,5 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.2 diff --git a/helm/undermoon-operator/Chart.yaml b/helm/undermoon-operator/Chart.yaml index 2c87d26..1a6835c 100644 --- a/helm/undermoon-operator/Chart.yaml +++ b/helm/undermoon-operator/Chart.yaml @@ -15,11 +15,11 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # # This should be the same as the version of undermoon-operator. -appVersion: v0.0.2 +appVersion: v0.1.0-alpha.1 diff --git a/helm/undermoon-operator/values.yaml b/helm/undermoon-operator/values.yaml index 3b2910a..a61721c 100644 --- a/helm/undermoon-operator/values.yaml +++ b/helm/undermoon-operator/values.yaml @@ -4,7 +4,7 @@ image: operatorImage: doyoubi/undermoon-operator - operatorImageTag: v0.1.0-alpha.0 + operatorImageTag: v0.1.0-alpha.1 operatorImagePullPolicy: IfNotPresent nameOverride: ""