diff --git a/pkg/controller/egress/controller.go b/pkg/controller/egress/controller.go index a6e6d814890..ea91873ddee 100644 --- a/pkg/controller/egress/controller.go +++ b/pkg/controller/egress/controller.go @@ -249,14 +249,23 @@ func (c *EgressController) setIPAllocation(egressName string, ip net.IP, poolNam func (c *EgressController) syncEgressIP(egress *egressv1beta1.Egress) (net.IP, *egressv1beta1.Egress, error) { prevIP, prevIPPool, exists := c.getIPAllocation(egress.Name) if exists { - // The EgressIP and the ExternalIPPool don't change, do nothing. - if prevIP.String() == egress.Spec.EgressIP && prevIPPool == egress.Spec.ExternalIPPool && c.externalIPAllocator.IPPoolExists(egress.Spec.ExternalIPPool) { - return prevIP, egress, nil + // The EgressIP and the ExternalIPPool haven't changed. + if prevIP.String() == egress.Spec.EgressIP && prevIPPool == egress.Spec.ExternalIPPool { + // If the EgressIP is still valid for the ExternalIPPool, nothing needs to be done. + if c.externalIPAllocator.IPPoolHasIP(prevIPPool, prevIP) { + return prevIP, egress, nil + } + // The ExternalIPPool may no longer exist, or the IP is not in range. + // Reclaim the IP from the Egress API. + klog.InfoS("Allocated EgressIP is no longer part of ExternalIPPool, releasing it", "egress", klog.KObj(egress), "ip", egress.Spec.EgressIP, "pool", egress.Spec.ExternalIPPool) + if updatedEgress, err := c.updateEgressIP(egress, ""); err != nil { + return nil, egress, err + } else { + egress = updatedEgress + } } // Either EgressIP or ExternalIPPool changes, release the previous one first. - if err := c.releaseEgressIP(egress.Name, prevIP, prevIPPool); err != nil { - return nil, egress, err - } + c.releaseEgressIP(egress.Name, prevIP, prevIPPool) } // Skip allocating EgressIP if ExternalIPPool is not specified and return whatever user specifies. @@ -326,20 +335,23 @@ func (c *EgressController) updateEgressIP(egress *egressv1beta1.Egress, ip strin } // releaseEgressIP removes the Egress's ipAllocation in the cache and releases the IP to the pool. -func (c *EgressController) releaseEgressIP(egressName string, egressIP net.IP, poolName string) error { +func (c *EgressController) releaseEgressIP(egressName string, egressIP net.IP, poolName string) { if err := c.externalIPAllocator.ReleaseIP(poolName, egressIP); err != nil { if err == externalippool.ErrExternalIPPoolNotFound { // Ignore the error since the external IP Pool could be deleted. klog.InfoS("Failed to release EgressIP because IP Pool does not exist", "egress", egressName, "ip", egressIP, "pool", poolName) } else { + // It is possible for the external IP Pool to have been deleted and + // recreated immediately with a different range, which would trigger this + // case. Transient errors in ReleaseIP are not possible, so there is no + // point in retrying. We should still delete our own state by calling + // deleteIPAllocation. klog.ErrorS(err, "Failed to release IP", "ip", egressIP, "pool", poolName) - return err } } else { klog.InfoS("Released EgressIP", "egress", egressName, "ip", egressIP, "pool", poolName) } c.deleteIPAllocation(egressName) - return nil } func (c *EgressController) syncEgress(key string) error { diff --git a/pkg/controller/egress/controller_test.go b/pkg/controller/egress/controller_test.go index 6c382280e63..c110b5d4a0a 100644 --- a/pkg/controller/egress/controller_test.go +++ b/pkg/controller/egress/controller_test.go @@ -546,6 +546,63 @@ func TestUpdateEgress(t *testing.T) { checkExternalIPPoolUsed(t, controller, eipFoo2.Name, 0) } +// TestRecreateExternalIPPoolWithNewRange tests the case where an ExternalIPPool is deleted, then +// immediately recreated with a different IP range. Specifically we test the scenario where +// syncEgress / syncEgressIP are called only once because the DELETE and CREATE events are merged in +// the workqueue. Ideally, the behavior observed by the user should be the same irrespective of +// whether the events are merged or not. +// Note that in an actual cluster, it is very unlikely that both events would be merged. +func TestRecreateExternalIPPoolWithNewRange(t *testing.T) { + stopCh := make(chan struct{}) + defer close(stopCh) + + eipFoo1 := newExternalIPPool("pool1", "1.1.1.0/24", "", "") + egress := &v1beta1.Egress{ + ObjectMeta: metav1.ObjectMeta{Name: "egressA", UID: "uidA"}, + Spec: v1beta1.EgressSpec{ + AppliedTo: v1beta1.AppliedTo{ + PodSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "foo"}, + }, + }, + EgressIP: "", + ExternalIPPool: eipFoo1.Name, + }, + } + + controller := newController(nil, []runtime.Object{eipFoo1, egress}) + controller.informerFactory.Start(stopCh) + controller.crdInformerFactory.Start(stopCh) + controller.informerFactory.WaitForCacheSync(stopCh) + controller.crdInformerFactory.WaitForCacheSync(stopCh) + go controller.externalIPAllocator.Run(stopCh) + require.True(t, cache.WaitForCacheSync(stopCh, controller.externalIPAllocator.HasSynced)) + controller.restoreIPAllocations([]*v1beta1.Egress{egress}) + + require.True(t, controller.externalIPAllocator.IPPoolExists(eipFoo1.Name)) + getEgressIP, egress, err := controller.syncEgressIP(egress) + require.NoError(t, err) + assert.Equal(t, net.ParseIP("1.1.1.1"), getEgressIP) + + // Delete and recreate the ExternalIPPool immediately with a different IP range. We do not + // call syncEgressIP in-between, so the Egress controller doesn't have a chance to process + // both changes independently. + controller.crdClient.CrdV1beta1().ExternalIPPools().Delete(context.TODO(), eipFoo1.Name, metav1.DeleteOptions{}) + require.EventuallyWithT(t, func(t *assert.CollectT) { + assert.False(t, controller.externalIPAllocator.IPPoolExists(eipFoo1.Name)) + }, 1*time.Second, 10*time.Millisecond) + + eipFoo1 = newExternalIPPool("pool1", "1.1.2.0/24", "", "") + controller.crdClient.CrdV1beta1().ExternalIPPools().Create(context.TODO(), eipFoo1, metav1.CreateOptions{}) + require.EventuallyWithT(t, func(t *assert.CollectT) { + assert.True(t, controller.externalIPAllocator.IPPoolExists(eipFoo1.Name)) + }, 1*time.Second, 10*time.Millisecond) + + getEgressIP, _, err = controller.syncEgressIP(egress) + require.NoError(t, err) + assert.Equal(t, net.ParseIP("1.1.2.1"), getEgressIP) +} + func TestSyncEgressIP(t *testing.T) { tests := []struct { name string diff --git a/pkg/controller/externalippool/controller.go b/pkg/controller/externalippool/controller.go index 2964fd53c23..feadec1761f 100644 --- a/pkg/controller/externalippool/controller.go +++ b/pkg/controller/externalippool/controller.go @@ -88,6 +88,10 @@ type ExternalIPAllocator interface { // UpdateIPAllocation marks the IP in the specified ExternalIPPool as occupied. UpdateIPAllocation(externalIPPool string, ip net.IP) error // ReleaseIP releases the IP to the IP pool. + // It returns ErrExternalIPPoolNotFound if the externalIPPool does not exist. + // Any other error indicates that the IP was not allocated, or is not currently allocated. + // In case of an error, there is no reason to try again with the same arguments, as + // transient errors are not possible. ReleaseIP(externalIPPool string, ip net.IP) error // HasSynced indicates ExternalIPAllocator has finished syncing all ExternalIPPool resources. HasSynced() bool