Skip to content

Commit

Permalink
Handle ExternalIPPool range changes in Egress controller (#6685)
Browse files Browse the repository at this point in the history
The validation webhook for ExternalIPPools prevents modifications to IP
address ranges. However, it is theoretically possible for an
ExternalIPPool to be deleted, then recreated immediately with different
IP address ranges, and for the Egress controller to only process the
Egress resources referencing the pool once, *after* the CREATE event has
been handled by the ExternalIPPool controller. This is because the
ExternalIPPool controller is in charge of notifying the Egress
controller through a callback (event handler) mechanism, and that
multiple events for the same ExternalIPPool name can be merged in the
workqueue.

With this change, we try to ensure the same "observable" behavior for
the Egress controller, regardless of whether the DELETE and CREATE
events have been merged or not. The stale Egress IP should be removed,
and a new Egress IP should be allocated from the new IP ranges
(regardless of whether or not the initial Egress IP was specified by the
user).

Prior to this change, the change of IP address ranges would be silently
ignored by the Egress controller.

Signed-off-by: Antonin Bas <antonin.bas@broadcom.com>
  • Loading branch information
antoninbas committed Sep 26, 2024
1 parent 48ce631 commit 903fd99
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 9 deletions.
30 changes: 21 additions & 9 deletions pkg/controller/egress/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,23 @@ func (c *EgressController) setIPAllocation(egressName string, ip net.IP, poolNam
func (c *EgressController) syncEgressIP(egress *egressv1beta1.Egress) (net.IP, *egressv1beta1.Egress, error) {
prevIP, prevIPPool, exists := c.getIPAllocation(egress.Name)
if exists {
// The EgressIP and the ExternalIPPool don't change, do nothing.
if prevIP.String() == egress.Spec.EgressIP && prevIPPool == egress.Spec.ExternalIPPool && c.externalIPAllocator.IPPoolExists(egress.Spec.ExternalIPPool) {
return prevIP, egress, nil
// The EgressIP and the ExternalIPPool haven't changed.
if prevIP.String() == egress.Spec.EgressIP && prevIPPool == egress.Spec.ExternalIPPool {
// If the EgressIP is still valid for the ExternalIPPool, nothing needs to be done.
if c.externalIPAllocator.IPPoolHasIP(prevIPPool, prevIP) {
return prevIP, egress, nil
}
// The ExternalIPPool may no longer exist, or the IP is not in range.
// Reclaim the IP from the Egress API.
klog.InfoS("Allocated EgressIP is no longer part of ExternalIPPool, releasing it", "egress", klog.KObj(egress), "ip", egress.Spec.EgressIP, "pool", egress.Spec.ExternalIPPool)
if updatedEgress, err := c.updateEgressIP(egress, ""); err != nil {
return nil, egress, err
} else {
egress = updatedEgress
}
}
// Either EgressIP or ExternalIPPool changes, release the previous one first.
if err := c.releaseEgressIP(egress.Name, prevIP, prevIPPool); err != nil {
return nil, egress, err
}
c.releaseEgressIP(egress.Name, prevIP, prevIPPool)
}

// Skip allocating EgressIP if ExternalIPPool is not specified and return whatever user specifies.
Expand Down Expand Up @@ -326,20 +335,23 @@ func (c *EgressController) updateEgressIP(egress *egressv1beta1.Egress, ip strin
}

// releaseEgressIP removes the Egress's ipAllocation in the cache and releases the IP to the pool.
func (c *EgressController) releaseEgressIP(egressName string, egressIP net.IP, poolName string) error {
func (c *EgressController) releaseEgressIP(egressName string, egressIP net.IP, poolName string) {
if err := c.externalIPAllocator.ReleaseIP(poolName, egressIP); err != nil {
if err == externalippool.ErrExternalIPPoolNotFound {
// Ignore the error since the external IP Pool could be deleted.
klog.InfoS("Failed to release EgressIP because IP Pool does not exist", "egress", egressName, "ip", egressIP, "pool", poolName)
} else {
// It is possible for the external IP Pool to have been deleted and
// recreated immediately with a different range, which would trigger this
// case. Transient errors in ReleaseIP are not possible, so there is no
// point in retrying. We should still delete our own state by calling
// deleteIPAllocation.
klog.ErrorS(err, "Failed to release IP", "ip", egressIP, "pool", poolName)
return err
}
} else {
klog.InfoS("Released EgressIP", "egress", egressName, "ip", egressIP, "pool", poolName)
}
c.deleteIPAllocation(egressName)
return nil
}

func (c *EgressController) syncEgress(key string) error {
Expand Down
57 changes: 57 additions & 0 deletions pkg/controller/egress/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,63 @@ func TestUpdateEgress(t *testing.T) {
checkExternalIPPoolUsed(t, controller, eipFoo2.Name, 0)
}

// TestRecreateExternalIPPoolWithNewRange tests the case where an ExternalIPPool is deleted, then
// immediately recreated with a different IP range. Specifically we test the scenario where
// syncEgress / syncEgressIP are called only once because the DELETE and CREATE events are merged in
// the workqueue. Ideally, the behavior observed by the user should be the same irrespective of
// whether the events are merged or not.
// Note that in an actual cluster, it is very unlikely that both events would be merged.
func TestRecreateExternalIPPoolWithNewRange(t *testing.T) {
stopCh := make(chan struct{})
defer close(stopCh)

eipFoo1 := newExternalIPPool("pool1", "1.1.1.0/24", "", "")
egress := &v1beta1.Egress{
ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"},
Spec: v1beta1.EgressSpec{
AppliedTo: v1beta1.AppliedTo{
PodSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "foo"},
},
},
EgressIP: "",
ExternalIPPool: eipFoo1.Name,
},
}

controller := newController(nil, []runtime.Object{eipFoo1, egress})
controller.informerFactory.Start(stopCh)
controller.crdInformerFactory.Start(stopCh)
controller.informerFactory.WaitForCacheSync(stopCh)
controller.crdInformerFactory.WaitForCacheSync(stopCh)
go controller.externalIPAllocator.Run(stopCh)
require.True(t, cache.WaitForCacheSync(stopCh, controller.externalIPAllocator.HasSynced))
controller.restoreIPAllocations([]*v1beta1.Egress{egress})

require.True(t, controller.externalIPAllocator.IPPoolExists(eipFoo1.Name))
getEgressIP, egress, err := controller.syncEgressIP(egress)
require.NoError(t, err)
assert.Equal(t, net.ParseIP("1.1.1.1"), getEgressIP)

// Delete and recreate the ExternalIPPool immediately with a different IP range. We do not
// call syncEgressIP in-between, so the Egress controller doesn't have a chance to process
// both changes independently.
controller.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), eipFoo1.Name, metav1.DeleteOptions{})
require.EventuallyWithT(t, func(t *assert.CollectT) {
assert.False(t, controller.externalIPAllocator.IPPoolExists(eipFoo1.Name))
}, 1*time.Second, 10*time.Millisecond)

eipFoo1 = newExternalIPPool("pool1", "1.1.2.0/24", "", "")
controller.crdClient.CrdV1beta1().ExternalIPPools().Create(context.TODO(), eipFoo1, metav1.CreateOptions{})
require.EventuallyWithT(t, func(t *assert.CollectT) {
assert.True(t, controller.externalIPAllocator.IPPoolExists(eipFoo1.Name))
}, 1*time.Second, 10*time.Millisecond)

getEgressIP, _, err = controller.syncEgressIP(egress)
require.NoError(t, err)
assert.Equal(t, net.ParseIP("1.1.2.1"), getEgressIP)
}

func TestSyncEgressIP(t *testing.T) {
tests := []struct {
name string
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/externalippool/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ type ExternalIPAllocator interface {
// UpdateIPAllocation marks the IP in the specified ExternalIPPool as occupied.
UpdateIPAllocation(externalIPPool string, ip net.IP) error
// ReleaseIP releases the IP to the IP pool.
// It returns ErrExternalIPPoolNotFound if the externalIPPool does not exist.
// Any other error indicates that the IP was not allocated, or is not currently allocated.
// In case of an error, there is no reason to try again with the same arguments, as
// transient errors are not possible.
ReleaseIP(externalIPPool string, ip net.IP) error
// HasSynced indicates ExternalIPAllocator has finished syncing all ExternalIPPool resources.
HasSynced() bool
Expand Down

0 comments on commit 903fd99

Please sign in to comment.