Skip to content

Commit

Permalink
fix: unexpected downtime in rollouts controller
Browse files Browse the repository at this point in the history
  • Loading branch information
Abhishek Bansal committed Oct 8, 2024
1 parent 50300e5 commit 37ce630
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
23 changes: 23 additions & 0 deletions rollout/trafficrouting.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
rolloututil "github.com/argoproj/argo-rollouts/utils/rollout"
"github.com/argoproj/argo-rollouts/utils/weightutil"
appsv1 "k8s.io/api/apps/v1"
)

// NewTrafficRoutingReconciler identifies return the TrafficRouting Plugin that the rollout wants to modify
Expand Down Expand Up @@ -133,6 +134,23 @@ func (c *Controller) NewTrafficRoutingReconciler(roCtx *rolloutContext) ([]traff
return nil, nil
}

func (c *rolloutContext) checkReplicasAvailable(rs *appsv1.ReplicaSet, desiredWeight int32) bool {
if rs == nil {
return false
}
availableReplicas := rs.Status.AvailableReplicas
totalReplicas := *c.rollout.Spec.Replicas

desiredReplicas := (desiredWeight * totalReplicas) / 100
if availableReplicas < desiredReplicas {
c.log.Infof("ReplicaSet '%s' has %d available replicas, waiting for %d", rs.Name, availableReplicas, desiredReplicas)
return false
}

return true

}

// this currently only be used in the canary strategy
func (c *rolloutContext) reconcileTrafficRouting() error {
reconcilers, err := c.newTrafficRoutingReconciler(c)
Expand Down Expand Up @@ -234,6 +252,10 @@ func (c *rolloutContext) reconcileTrafficRouting() error {
desiredWeight = weightutil.MaxTrafficWeight(c.rollout)
}
}

if !c.checkReplicasAvailable(c.stableRS, 100-desiredWeight) {
return nil
}
// We need to check for revision > 1 because when we first install the rollout we run step 0 this prevents that.
// There is a bigger fix needed for the reasons on why we run step 0 on rollout install, that needs to be explored.
revision, revisionFound := annotations.GetRevisionAnnotation(c.rollout)
Expand Down Expand Up @@ -388,3 +410,4 @@ func (c *rolloutContext) calculateWeightDestinationsFromExperiment() []v1alpha1.
}
return weightDestinations
}

67 changes: 67 additions & 0 deletions rollout/trafficrouting_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1343,3 +1343,70 @@ func TestDontWeightToZeroWhenDynamicallyRollingBackToStable(t *testing.T) {
rs1Updated := f.getUpdatedReplicaSet(scaleUpIndex)
assert.Equal(t, int32(10), *rs1Updated.Spec.Replicas)
}

// This test verifies that if we are shifting traffic to stable replicaset without the stable replicaset being available proportional to the weight, the traffic shouldn't be switched immediately to the stable replicaset.
func TestCheckReplicaSetAvailable(t *testing.T) {
f := newFixture(t)
defer f.Close()

steps := []v1alpha1.CanaryStep{
{
SetWeight: pointer.Int32(60),
},
{
Pause: &v1alpha1.RolloutPause{},
},
}
r1 := newCanaryRollout("foo", 10, nil, steps, pointer.Int32(1), intstr.FromInt(1), intstr.FromInt(1))
r1.Spec.Strategy.Canary.DynamicStableScale = true
r1.Spec.Strategy.Canary.TrafficRouting = &v1alpha1.RolloutTrafficRouting{
SMI: &v1alpha1.SMITrafficRouting{},
}
r1.Spec.Strategy.Canary.CanaryService = "canary"
r1.Spec.Strategy.Canary.StableService = "stable"
r1.Status.ReadyReplicas = 10
r1.Status.AvailableReplicas = 10
r2 := bumpVersion(r1)

rs1 := newReplicaSetWithStatus(r1, 1, 1)
rs2 := newReplicaSetWithStatus(r2, 9, 9)

rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
rs2PodHash := rs2.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
canarySelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: rs2PodHash}
stableSelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: rs1PodHash}
canarySvc := newService("canary", 80, canarySelector, r1)
stableSvc := newService("stable", 80, stableSelector, r1)

// simulate rollback to stable
r2.Spec = r1.Spec
r2.Status.StableRS = rs1PodHash
r2.Status.CurrentPodHash = rs1PodHash // will cause IsFullyPromoted() to be true
r2.Status.Canary.Weights = &v1alpha1.TrafficWeights{
Canary: v1alpha1.WeightDestination{
Weight: 10,
ServiceName: "canary",
PodTemplateHash: rs2PodHash,
},
Stable: v1alpha1.WeightDestination{
Weight: 90,
ServiceName: "stable",
PodTemplateHash: rs1PodHash,
},
}

f.kubeobjects = append(f.kubeobjects, rs1, rs2, canarySvc, stableSvc)
f.replicaSetLister = append(f.replicaSetLister, rs1, rs2)

f.rolloutLister = append(f.rolloutLister, r2)
f.objects = append(f.objects, r2)

f.expectUpdateReplicaSetAction(rs1) // Updates the revision annotation from 1 to 3 from func isScalingEvent
f.expectUpdateRolloutAction(r2) // Update the rollout revision from 1 to 3
f.expectUpdateReplicaSetAction(rs1) // Scale The replicaset from 1 to 10 from func scaleReplicaSet
f.expectPatchRolloutAction(r2) // Updates the rollout status from the scaling to 10 action

f.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler()
f.fakeTrafficRouting.On("RemoveManagedRoutes", mock.Anything, mock.Anything, mock.Anything).Return(nil)
f.run(getKey(r1, t))
}

0 comments on commit 37ce630

Please sign in to comment.