diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/deployment.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/deployment.yaml new file mode 100644 index 00000000..96d10022 --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/deployment.yaml @@ -0,0 +1,30 @@ +metadata: + name: mercari-app + namespace: default +spec: + selector: + matchLabels: + app: mercari + strategy: {} + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: "2023-01-01T00:00:00Z" + creationTimestamp: null + labels: + app: mercari + spec: + containers: + - image: awesome-mercari-app-image + name: app + resources: + requests: + cpu: "10" + memory: 10Gi + - image: awesome-istio-proxy-image + name: istio-proxy + resources: + requests: + cpu: "4" + memory: 4Gi +status: {} diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/hpa.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/hpa.yaml new file mode 100644 index 00000000..3fe64845 --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/hpa.yaml @@ -0,0 +1,51 @@ +metadata: + annotations: + tortoise.autoscaling.mercari.com/managed-by-tortoise: "true" + name: tortoise-hpa-mercari + namespace: default +spec: + behavior: + scaleDown: + policies: + - periodSeconds: 90 + type: Percent + value: 2 + selectPolicy: Max + scaleUp: + policies: + - periodSeconds: 60 + type: Percent + value: 100 + selectPolicy: Max + stabilizationWindowSeconds: 0 + maxReplicas: 15 + metrics: + - containerResource: + container: app + name: cpu + target: + averageUtilization: 30 + type: Utilization + type: ContainerResource + - containerResource: + container: istio-proxy + name: cpu + target: + averageUtilization: 30 + type: Utilization + type: ContainerResource + minReplicas: 15 + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: mercari-app +status: + conditions: + - status: "True" + type: AbleToScale + message: "recommended size matches current size" + - status: "False" + type: ScalingActive + message: "the HPA was unable to compute the replica count: failed to get cpu utilization" + currentMetrics: null + desiredReplicas: 0 diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/tortoise.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/tortoise.yaml new file mode 100644 index 00000000..ec6a759f --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/tortoise.yaml @@ -0,0 +1,136 @@ +metadata: + finalizers: + - tortoise.autoscaling.mercari.com/finalizer + name: mercari + namespace: default +spec: + targetRefs: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: mercari-app + updateMode: Emergency +status: + autoscalingPolicy: + - containerName: app + policy: + cpu: Horizontal + memory: Vertical + - containerName: istio-proxy + policy: + cpu: Horizontal + memory: Vertical + conditions: + containerRecommendationFromVPA: + - containerName: app + maxRecommendation: + cpu: + quantity: "3" + updatedAt: "2023-01-01T00:00:00Z" + memory: + quantity: 3Gi + updatedAt: "2023-01-01T00:00:00Z" + recommendation: + cpu: + quantity: "3" + updatedAt: "2023-01-01T00:00:00Z" + memory: + quantity: 3Gi + updatedAt: "2023-01-01T00:00:00Z" + - containerName: istio-proxy + maxRecommendation: + cpu: + quantity: "3" + updatedAt: "2023-01-01T00:00:00Z" + memory: + quantity: 3Gi + updatedAt: "2023-01-01T00:00:00Z" + recommendation: + cpu: + quantity: "3" + updatedAt: "2023-01-01T00:00:00Z" + memory: + quantity: 3Gi + updatedAt: "2023-01-01T00:00:00Z" + containerResourceRequests: + - containerName: app + resource: + cpu: "6" + memory: 3Gi + - containerName: istio-proxy + resource: + cpu: "4" + memory: 3Gi + tortoiseConditions: + - lastTransitionTime: "2023-01-01T00:00:00Z" + lastUpdateTime: "2023-01-01T00:00:00Z" + message: HPA target utilization is updated + reason: HPATargetUtilizationUpdated + status: "True" + type: HPATargetUtilizationUpdated + - lastTransitionTime: "2023-01-01T00:00:00Z" + lastUpdateTime: "2023-01-01T00:00:00Z" + message: The recommendation is provided + status: "True" + type: VerticalRecommendationUpdated + - lastTransitionTime: "2023-01-01T00:00:00Z" + lastUpdateTime: "2023-01-01T00:00:00Z" + status: "False" + type: FailedToReconcile + containerResourcePhases: + - containerName: app + resourcePhases: + cpu: + lastTransitionTime: null + phase: Working + memory: + lastTransitionTime: "2023-01-01T00:00:00Z" + phase: Working + - containerName: istio-proxy + resourcePhases: + cpu: + lastTransitionTime: null + phase: Working + memory: + lastTransitionTime: "2023-01-01T00:00:00Z" + phase: Working + recommendations: + horizontal: + maxReplicas: + - from: 0 + timezone: Local + to: 24 + updatedAt: "2023-10-06T01:15:47Z" + value: 15 + minReplicas: + - from: 0 + timezone: Local + to: 24 + updatedAt: "2023-10-06T01:15:47Z" + value: 3 + targetUtilizations: + - containerName: app + targetUtilization: + cpu: 30 + - containerName: istio-proxy + targetUtilization: + cpu: 30 + vertical: + containerResourceRecommendation: + - RecommendedResource: + cpu: "6" + memory: 3Gi + containerName: app + - RecommendedResource: + cpu: "4" + memory: 3Gi + containerName: istio-proxy + targets: + horizontalPodAutoscaler: tortoise-hpa-mercari + scaleTargetRef: + kind: "" + name: "" + verticalPodAutoscalers: + - name: tortoise-monitor-mercari + role: Monitor + tortoisePhase: Emergency diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/vpa-Monitor.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/vpa-Monitor.yaml new file mode 100644 index 00000000..81846608 --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/after/vpa-Monitor.yaml @@ -0,0 +1,39 @@ +metadata: + annotations: + tortoise.autoscaling.mercari.com/managed-by-tortoise: "true" + name: tortoise-monitor-mercari + namespace: default +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: mercari-app + updatePolicy: + updateMode: "Off" +status: + conditions: + - lastTransitionTime: null + status: "True" + type: RecommendationProvided + recommendation: + containerRecommendations: + - containerName: app + lowerBound: + cpu: "3" + memory: 3Gi + target: + cpu: "3" + memory: 3Gi + upperBound: + cpu: "5" + memory: 5Gi + - containerName: istio-proxy + lowerBound: + cpu: "3" + memory: 3Gi + target: + cpu: "3" + memory: 3Gi + upperBound: + cpu: "5" + memory: 5Gi diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/deployment.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/deployment.yaml new file mode 100644 index 00000000..693bb43d --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/deployment.yaml @@ -0,0 +1,27 @@ +metadata: + name: mercari-app + namespace: default +spec: + selector: + matchLabels: + app: mercari + strategy: {} + template: + metadata: + annotations: + sidecar.istio.io/inject: "true" + sidecar.istio.io/proxyCPU: "4" + sidecar.istio.io/proxyMemory: "4Gi" + labels: + app: mercari + spec: + containers: + - name: istio-proxy # will be ignored. + image: auto + - image: awesome-mercari-app-image + name: app + resources: + requests: + cpu: "10" + memory: 10Gi + replicas: 10 diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/hpa.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/hpa.yaml new file mode 100644 index 00000000..70641ed2 --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/hpa.yaml @@ -0,0 +1,50 @@ +metadata: + annotations: + tortoise.autoscaling.mercari.com/managed-by-tortoise: "true" + name: tortoise-hpa-mercari + namespace: default +status: + conditions: + - status: "True" + type: AbleToScale + message: "recommended size matches current size" + - status: "False" + type: ScalingActive + message: "the HPA was unable to compute the replica count: failed to get cpu utilization" +spec: + behavior: + scaleDown: + policies: + - periodSeconds: 90 + type: Percent + value: 2 + selectPolicy: Max + scaleUp: + policies: + - periodSeconds: 60 + type: Percent + value: 100 + selectPolicy: Max + stabilizationWindowSeconds: 0 + maxReplicas: 15 + metrics: + - containerResource: + container: app + name: cpu + target: + averageUtilization: 30 + type: Utilization + type: ContainerResource + - containerResource: + container: istio-proxy + name: cpu + target: + averageUtilization: 30 + type: Utilization + type: ContainerResource + minReplicas: 3 + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: mercari-app + diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/tortoise.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/tortoise.yaml new file mode 100644 index 00000000..2757b8f3 --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/tortoise.yaml @@ -0,0 +1,116 @@ +metadata: + name: mercari + namespace: default +spec: + targetRefs: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: mercari-app + updateMode: Auto +status: + autoscalingPolicy: + - policy: + cpu: Horizontal + memory: Vertical + containerName: app + - policy: + cpu: Horizontal + memory: Vertical + containerName: istio-proxy + conditions: + containerResourceRequests: + - containerName: "app" + resource: + cpu: "10" + memory: 10Gi + - containerName: "istio-proxy" + resource: + cpu: "4" + memory: 4Gi + tortoiseConditions: + - message: "HPA target utilization is updated" + reason: HPATargetUtilizationUpdated + status: "True" + type: HPATargetUtilizationUpdated + containerRecommendationFromVPA: + - containerName: app + maxRecommendation: + cpu: + quantity: "3" + updatedAt: null + memory: + quantity: 3Gi + updatedAt: null + recommendation: + cpu: + quantity: "3" + updatedAt: null + memory: + quantity: 3Gi + updatedAt: null + - containerName: istio-proxy + maxRecommendation: + cpu: + quantity: "3" + updatedAt: null + memory: + quantity: 3Gi + updatedAt: null + recommendation: + cpu: + quantity: "3" + updatedAt: null + memory: + quantity: 3Gi + updatedAt: null + recommendations: + horizontal: + maxReplicas: + - from: 0 + timezone: Local + to: 24 + updatedAt: "2023-10-06T01:15:47Z" + value: 15 + minReplicas: + - from: 0 + timezone: Local + to: 24 + updatedAt: "2023-10-06T01:15:47Z" + value: 3 + targetUtilizations: + - containerName: app + targetUtilization: + cpu: 30 + - containerName: istio-proxy + targetUtilization: + cpu: 30 + vertical: + containerResourceRecommendation: + - RecommendedResource: + cpu: "6" + memory: 3Gi + containerName: app + - RecommendedResource: + cpu: "4" + memory: 3Gi + containerName: istio-proxy + targets: + horizontalPodAutoscaler: tortoise-hpa-mercari + verticalPodAutoscalers: + - name: tortoise-monitor-mercari + role: Monitor + tortoisePhase: Working + containerResourcePhases: + - containerName: "app" + resourcePhases: + cpu: + phase: Working + memory: + phase: Working + - containerName: "istio-proxy" + resourcePhases: + cpu: + phase: Working + memory: + phase: Working diff --git a/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/vpa-Monitor.yaml b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/vpa-Monitor.yaml new file mode 100644 index 00000000..2b8063f1 --- /dev/null +++ b/internal/controller/testdata/reconcile-automatic-emergency-mode-hpa-condition/before/vpa-Monitor.yaml @@ -0,0 +1,43 @@ +metadata: + annotations: + tortoise.autoscaling.mercari.com/managed-by-tortoise: "true" + name: tortoise-monitor-mercari + namespace: default +spec: + autoscalingPolicy: + containerPolicies: + - containerName: app + - containerName: istio-proxy + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: mercari-app + updatePolicy: + updateMode: "Off" +status: + conditions: + - lastTransitionTime: null + status: "True" + type: RecommendationProvided + recommendation: + containerRecommendations: + - containerName: app + lowerBound: + cpu: "3" + memory: 3Gi + target: + cpu: "3" + memory: 3Gi + upperBound: + cpu: "5" + memory: 5Gi + - containerName: istio-proxy + lowerBound: + cpu: "3" + memory: 3Gi + target: + cpu: "3" + memory: 3Gi + upperBound: + cpu: "5" + memory: 5Gi diff --git a/internal/controller/tortoise_controller.go b/internal/controller/tortoise_controller.go index 5b5753ec..2ace1d8d 100644 --- a/internal/controller/tortoise_controller.go +++ b/internal/controller/tortoise_controller.go @@ -185,16 +185,17 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ return ctrl.Result{}, err } tortoise = tortoiseService.UpdateTortoiseAutoscalingPolicyInStatus(tortoise, hpa, now) - scalingActive := r.HpaService.CheckHpaMetricStatus(ctx, hpa) - if scalingActive == false && tortoise.Spec.UpdateMode == autoscalingv1beta3.UpdateModeAuto && tortoise.Status.TortoisePhase == autoscalingv1beta3.TortoisePhaseWorking { - //switch to emergency mode only when auto tortoise and already working - tortoise.Spec.UpdateMode = autoscalingv1beta3.UpdateModeEmergency - } - if scalingActive == true && tortoise.Spec.UpdateMode == autoscalingv1beta3.UpdateModeEmergency { - tortoise.Spec.UpdateMode = autoscalingv1beta3.UpdateModeAuto - } tortoise = r.TortoiseService.UpdateTortoisePhase(tortoise, now) + if tortoise.Status.TortoisePhase == autoscalingv1beta3.TortoisePhaseInitializing { + logger.Info("initializing tortoise", "tortoise", req.NamespacedName) + // need to initialize HPA and VPA. + if err := r.initializeVPAAndHPA(ctx, tortoise, currentDesiredReplicaNum, now); err != nil { + return ctrl.Result{}, fmt.Errorf("initialize VPA and HPA: %w", err) + } + + return ctrl.Result{RequeueAfter: r.Interval}, nil + } // Make sure finalizer is added to tortoise. tortoise, err = r.TortoiseService.AddFinalizer(ctx, tortoise) @@ -238,6 +239,13 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ logger.Error(err, "failed to get HPA", "tortoise", req.NamespacedName) return ctrl.Result{}, err } + scalingActive := r.HpaService.CheckHpaMetricStatus(ctx, hpa) + if scalingActive == false && tortoise.Spec.UpdateMode == autoscalingv1beta3.UpdateModeAuto && tortoise.Status.TortoisePhase == autoscalingv1beta3.TortoisePhaseWorking { + //switch to emergency mode only when auto tortoise and already working + tortoise.Spec.UpdateMode = autoscalingv1beta3.UpdateModeEmergency + logger.Info("EMERGENCY MODE") + tortoise = r.TortoiseService.UpdateTortoisePhase(tortoise, now) + } tortoise = r.TortoiseService.UpdateContainerRecommendationFromVPA(tortoise, monitorvpa, now) diff --git a/internal/controller/tortoise_controller_test.go b/internal/controller/tortoise_controller_test.go index ef45be00..792f9987 100644 --- a/internal/controller/tortoise_controller_test.go +++ b/internal/controller/tortoise_controller_test.go @@ -181,25 +181,7 @@ func updateResourcesInTestCaseFile(path string, resource resources) error { } func removeUnnecessaryFieldsFromHPA(hpa *v2.HorizontalPodAutoscaler) *v2.HorizontalPodAutoscaler { - hpa.Status = v2.HorizontalPodAutoscalerStatus{ - Conditions: []v2.HorizontalPodAutoscalerCondition{ - { - Status: "True", - Type: v2.AbleToScale, - Message: "recommended size matches current size", - }, - { - Status: "True", - Type: v2.ScalingActive, - Message: "the HPA was able to successfully calculate a replica count from cpu resource utilization (percentage of request)", - }, - { - Status: "False", - Type: v2.ScalingLimited, - Message: "the desired count is within the acceptable range", - }, - }, - } + //hpa.Status = v2.HorizontalPodAutoscalerStatus{} return hpa } @@ -495,6 +477,11 @@ var _ = Describe("Test TortoiseController", func() { runTest(filepath.Join("testdata", "mutable-autoscalingpolicy-remove-horizontal-2")) }) }) + Context("automatic switch to emergency mode", func() { + It("HPA scalingactive condition false", func() { + runTest(filepath.Join("testdata", "reconcile-automatic-emergency-mode-hpa-condition")) + }) + }) Context("DeletionPolicy is handled correctly", func() { It("[DeletionPolicy = DeleteAll] delete HPA and VPA when Tortoise is deleted", func() { resource := initializeResourcesFromFiles(ctx, k8sClient, "testdata/deletion-policy-all/before") diff --git a/pkg/hpa/service.go b/pkg/hpa/service.go index b1bffad4..11976700 100644 --- a/pkg/hpa/service.go +++ b/pkg/hpa/service.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "math" + "reflect" "regexp" "sort" "time" @@ -767,20 +768,31 @@ func (c *Service) excludeExternalMetric(ctx context.Context, hpa *v2.HorizontalP } func (c *Service) CheckHpaMetricStatus(ctx context.Context, currenthpa *v2.HorizontalPodAutoscaler) bool { - currenthpa = currenthpa.DeepCopy() - conditions := []v2.HorizontalPodAutoscalerCondition{} + //currenthpa = currenthpa.DeepCopy() + logger := log.FromContext(ctx) + if currenthpa == nil { + logger.Info("empty HPA passed into status check, ignore") + return true + } + + if reflect.DeepEqual(currenthpa.Status, v2.HorizontalPodAutoscalerStatus{}) { + logger.Info("HPA empty status, switch to emergency mode") + return false + } + if currenthpa.Status.Conditions == nil { + logger.Info("HPA empty conditions, switch to emergency mode") return false } if currenthpa.Status.CurrentMetrics == nil { + logger.Info("HPA no metrics, switch to emergency mode") return false } - conditions = currenthpa.Status.Conditions + conditions := currenthpa.Status.Conditions currentMetrics := currenthpa.Status.CurrentMetrics if len(conditions) > 0 { - // do what you want for _, condition := range conditions { if condition.Type == "ScalingActive" && condition.Status == "False" && condition.Reason == "FailedGetResourceMetric" { //switch to Emergency mode since no metrics @@ -797,5 +809,7 @@ func (c *Service) CheckHpaMetricStatus(ctx context.Context, currenthpa *v2.Horiz } } + logger.Info("HPA status check passed") + return true }