Skip to content

Commit

Permalink
Delete stale masquerade subnet resources if subnet gets changed at day 2
Browse files Browse the repository at this point in the history
This PR does following:

- Removes following linux resources if masquerade subnet gets changed
  (node side):

  * Removes old V4HostMasqueradeIP and V6HostMasqueradeIP from bridge.

  * Removes stale neighbour entries V4OVNMasqueradeIP, V6OVNMasqueradeIP,
V4DummyNextHopMasqueradeIP and V6DummyNextHopMasqueradeIP if exists.

  * Removes stale masquerade route added by addMasqueradeRoute() function
while starting up the gateway.

  * Removes stale iptables rules created for masquerade subnet based
on ipForwarding and Gateway mode.

- Removes following linux resources if masquerade subnet gets changed
  (ovnkube-controller to NBDB side):

  * Removes logical router static route used by gateway router and
referencing old masquerade subnet.

  * Removes static mac binding for gateway router's rtoe logical port
referencing old masquerade subnet.

Note, the node now sets an annotation to indicate its masquerade subnet
that it last configured. The node uses this at start up to determine if
there has been a change and cleanup is needed.

On the ovnkube-controller side, it also uses this annotation to
determine if the node has changed. However, it may be racy to rely on
this as the node thread may have already updated the annotation by the
time the ovnkube-controller side handles the cleanup. Therefore, in
addition to the annotation ovnkube-controller will additionally scan for
stale routes in NBDB and then derive the route and mac binding to remove
that way. In order to facilitate this, the masquerade route now has an
external_id present (same as the key used in the annotation) to
distinguish which routes are masquerade routes.

Failure to delete things is not usually an overall failure for OVNK.
Therefore upon failing to clean something up, the error is logged, but
startup continues.

Finally, kind.sh is updated to use a larger masquerade subnet by
default. OVN-Kubernetes defaults themselves remain unchanged. Helm
has also been updated to use a larger subnet.

Co-authored-by: Tim Rozet <trozet@redhat.com>
Signed-off-by: Arnab Ghosh <arnabghosh89@gmail.com>
  • Loading branch information
arghosh93 and trozet committed Aug 5, 2024
1 parent 22513fc commit 8462931
Show file tree
Hide file tree
Showing 22 changed files with 836 additions and 87 deletions.
10 changes: 7 additions & 3 deletions contrib/kind-helm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ set_default_params() {
# Setup KUBECONFIG patch based on cluster-name
export KUBECONFIG=${KUBECONFIG:-${HOME}/${KIND_CLUSTER_NAME}.conf}

# Validated params that work
export MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/16}
export MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112}

# Input not currently validated. Modify outside script at your own risk.
# These are the same values defaulted to in KIND code (kind/default.go).
# NOTE: KIND NET_CIDR_IPV6 default use a /64 but OVN have a /64 per host
Expand All @@ -43,8 +47,6 @@ set_default_params() {
export SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112}
export JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16}
export JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64}
export MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.169.0/29}
export MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/125}
export TRANSIT_SWITCH_SUBNET_IPV4=${TRANSIT_SWITCH_SUBNET_IPV4:-100.88.0.0/16}
export TRANSIT_SWITCH_SUBNET_IPV6=${TRANSIT_SWITCH_SUBNET_IPV6:-fd97::/64}
export METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16}
Expand Down Expand Up @@ -312,7 +314,9 @@ create_ovn_kubernetes() {
--set global.enableHybridOverlay=$(if [ "${OVN_HYBRID_OVERLAY_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \
--set global.emptyLbEvents=$(if [ "${OVN_EMPTY_LB_EVENTS}" == "true" ]; then echo "true"; else echo "false"; fi) \
--set tags.ovnkube-db-raft=$(if [ "${OVN_HA}" == "true" ]; then echo "true"; else echo "false"; fi) \
--set tags.ovnkube-db=$(if [ "${OVN_HA}" == "false" ]; then echo "true"; else echo "false"; fi)
--set tags.ovnkube-db=$(if [ "${OVN_HA}" == "false" ]; then echo "true"; else echo "false"; fi) \
--set global.v4MasqueradeSubnet=${MASQUERADE_SUBNET_IPV4} \
--set global.v6MasqueradeSubnet=${MASQUERADE_SUBNET_IPV6}
}

delete() {
Expand Down
4 changes: 2 additions & 2 deletions contrib/kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,8 @@ set_default_params() {
SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112}
JOIN_SUBNET_IPV4=${JOIN_SUBNET_IPV4:-100.64.0.0/16}
JOIN_SUBNET_IPV6=${JOIN_SUBNET_IPV6:-fd98::/64}
MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.169.0/29}
MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/125}
MASQUERADE_SUBNET_IPV4=${MASQUERADE_SUBNET_IPV4:-169.254.0.0/16}
MASQUERADE_SUBNET_IPV6=${MASQUERADE_SUBNET_IPV6:-fd69::/112}
TRANSIT_SWITCH_SUBNET_IPV4=${TRANSIT_SWITCH_SUBNET_IPV4:-100.88.0.0/16}
TRANSIT_SWITCH_SUBNET_IPV6=${TRANSIT_SWITCH_SUBNET_IPV6:-fd97::/64}
METALLB_CLIENT_NET_SUBNET_IPV4=${METALLB_CLIENT_NET_SUBNET_IPV4:-172.22.0.0/16}
Expand Down
4 changes: 4 additions & 0 deletions dist/images/daemonset.sh
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,8 @@ ovn_image=${ovnkube_image} \
ovn_disable_pkt_mtu_check=${ovn_disable_pkt_mtu_check} \
ovn_v4_join_subnet=${ovn_v4_join_subnet} \
ovn_v6_join_subnet=${ovn_v6_join_subnet} \
ovn_v4_masquerade_subnet=${ovn_v4_masquerade_subnet} \
ovn_v6_masquerade_subnet=${ovn_v6_masquerade_subnet} \
ovn_multicast_enable=${ovn_multicast_enable} \
ovn_admin_network_policy_enable=${ovn_admin_network_policy_enable} \
ovn_egress_ip_enable=${ovn_egress_ip_enable} \
Expand Down Expand Up @@ -880,6 +882,8 @@ ovn_image=${ovnkube_image} \
ovn_disable_pkt_mtu_check=${ovn_disable_pkt_mtu_check} \
ovn_v4_join_subnet=${ovn_v4_join_subnet} \
ovn_v6_join_subnet=${ovn_v6_join_subnet} \
ovn_v4_masquerade_subnet=${ovn_v4_masquerade_subnet} \
ovn_v6_masquerade_subnet=${ovn_v6_masquerade_subnet} \
ovn_multicast_enable=${ovn_multicast_enable} \
ovn_admin_network_policy_enable=${ovn_admin_network_policy_enable} \
ovn_egress_ip_enable=${ovn_egress_ip_enable} \
Expand Down
12 changes: 12 additions & 0 deletions dist/images/ovnkube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2497,6 +2497,16 @@ ovn-node() {
fi
echo "ovn_conntrack_zone_flag=${ovn_conntrack_zone_flag}"

ovn_v4_masquerade_subnet_opt=
if [[ -n ${ovn_v4_masquerade_subnet} ]]; then
ovn_v4_masquerade_subnet_opt="--gateway-v4-masquerade-subnet=${ovn_v4_masquerade_subnet}"
fi

ovn_v6_masquerade_subnet_opt=
if [[ -n ${ovn_v6_masquerade_subnet} ]]; then
ovn_v6_masquerade_subnet_opt="--gateway-v6-masquerade-subnet=${ovn_v6_masquerade_subnet}"
fi

echo "=============== ovn-node --init-node"
/usr/bin/ovnkube --init-node ${K8S_NODE} \
${anp_enabled_flag} \
Expand Down Expand Up @@ -2526,6 +2536,8 @@ ovn-node() {
${ovn_conntrack_zone_flag} \
${ovnkube_enable_interconnect_flag} \
${ovnkube_enable_multi_external_gateway_flag} \
${ovn_v4_masquerade_subnet_opt} \
${ovn_v6_masquerade_subnet_opt} \
${ovnkube_metrics_tls_opts} \
${ovnkube_node_certs_flags} \
${ovnkube_node_mgmt_port_netdev_flag} \
Expand Down
4 changes: 4 additions & 0 deletions dist/templates/ovnkube-single-node-zone.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,10 @@ spec:
value: "{{ ovn_v4_join_subnet }}"
- name: OVN_V6_JOIN_SUBNET
value: "{{ ovn_v6_join_subnet }}"
- name: OVN_V4_MASQUERADE_SUBNET
value: "{{ ovn_v4_masquerade_subnet }}"
- name: OVN_V6_MASQUERADE_SUBNET
value: "{{ ovn_v6_masquerade_subnet }}"
- name: OVN_MULTICAST_ENABLE
value: "{{ ovn_multicast_enable }}"
- name: OVN_UNPRIVILEGED_MODE
Expand Down
4 changes: 4 additions & 0 deletions dist/templates/ovnkube-zone-controller.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,10 @@ spec:
value: "{{ ovn_v4_join_subnet }}"
- name: OVN_V6_JOIN_SUBNET
value: "{{ ovn_v6_join_subnet }}"
- name: OVN_V4_MASQUERADE_SUBNET
value: "{{ ovn_v4_masquerade_subnet }}"
- name: OVN_V6_MASQUERADE_SUBNET
value: "{{ ovn_v6_masquerade_subnet }}"
- name: OVN_SSL_ENABLE
value: "{{ ovn_ssl_en }}"
- name: OVN_GATEWAY_MODE
Expand Down
4 changes: 2 additions & 2 deletions go-controller/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1877,15 +1877,15 @@ func completeGatewayConfig(allSubnets *configSubnets, masqueradeIPs *MasqueradeI
if err != nil || utilnet.IsIPv6(v4MasqueradeCIDR.IP) {
return fmt.Errorf("invalid gateway v4 masquerade subnet specified, subnet: %s: error: %v", Gateway.V4MasqueradeSubnet, err)
}
if err = allocateV4MasqueradeIPs(v4MasqueradeIP, masqueradeIPs); err != nil {
if err = AllocateV4MasqueradeIPs(v4MasqueradeIP, masqueradeIPs); err != nil {
return fmt.Errorf("unable to allocate V4MasqueradeIPs: %s", err)
}

v6MasqueradeIP, v6MasqueradeCIDR, err := net.ParseCIDR(Gateway.V6MasqueradeSubnet)
if err != nil || !utilnet.IsIPv6(v6MasqueradeCIDR.IP) {
return fmt.Errorf("invalid gateway v6 masquerade subnet specified, subnet: %s: error: %v", Gateway.V6MasqueradeSubnet, err)
}
if err = allocateV6MasqueradeIPs(v6MasqueradeIP, masqueradeIPs); err != nil {
if err = AllocateV6MasqueradeIPs(v6MasqueradeIP, masqueradeIPs); err != nil {
return fmt.Errorf("unable to allocate V6MasqueradeIPs: %s", err)
}

Expand Down
4 changes: 2 additions & 2 deletions go-controller/pkg/config/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ type MasqueradeIPsConfig struct {
// allocateV4/6MasqueradeIPs allocates the masqueradeIPs based off of the passed in masqueradeSubnet (.0)
// it does this by cascading down from the initial ip down to the .5 currently (more masqueradeIps may be added in the future)

func allocateV4MasqueradeIPs(masqueradeSubnetNetworkAddress net.IP, masqueradeIPs *MasqueradeIPsConfig) error {
func AllocateV4MasqueradeIPs(masqueradeSubnetNetworkAddress net.IP, masqueradeIPs *MasqueradeIPsConfig) error {
masqueradeIPs.V4OVNMasqueradeIP = iputils.NextIP(masqueradeSubnetNetworkAddress)
if masqueradeIPs.V4OVNMasqueradeIP == nil {
return fmt.Errorf("error setting V4OVNMasqueradeIP: %s", masqueradeSubnetNetworkAddress)
Expand All @@ -316,7 +316,7 @@ func allocateV4MasqueradeIPs(masqueradeSubnetNetworkAddress net.IP, masqueradeIP
return nil
}

func allocateV6MasqueradeIPs(masqueradeSubnetNetworkAddress net.IP, masqueradeIPs *MasqueradeIPsConfig) error {
func AllocateV6MasqueradeIPs(masqueradeSubnetNetworkAddress net.IP, masqueradeIPs *MasqueradeIPsConfig) error {
masqueradeIPs.V6OVNMasqueradeIP = iputils.NextIP(masqueradeSubnetNetworkAddress)
if masqueradeIPs.V6OVNMasqueradeIP == nil {
return fmt.Errorf("error setting V6OVNMasqueradeIP: %s", masqueradeSubnetNetworkAddress)
Expand Down
20 changes: 20 additions & 0 deletions go-controller/pkg/libovsdb/ops/mac_binding.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,23 @@ func DeleteStaticMacBindings(nbClient libovsdbclient.Client, smbs ...*nbdb.Stati
m := newModelClient(nbClient)
return m.Delete(opModels...)
}

type staticMACBindingPredicate func(*nbdb.StaticMACBinding) bool

// DeleteStaticMACBindingWithPredicate deletes a Static MAC entry for a logical port from the cache
func DeleteStaticMACBindingWithPredicate(nbClient libovsdbclient.Client, p staticMACBindingPredicate) error {
found := []*nbdb.StaticMACBinding{}
opModel := operationModel{
ModelPredicate: p,
ExistingResult: &found,
ErrNotFound: false,
BulkOp: false,
}

m := newModelClient(nbClient)
err := m.Delete(opModel)
if err != nil {
return err
}
return nil
}
12 changes: 12 additions & 0 deletions go-controller/pkg/node/gateway_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,13 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er
return err
}

// Delete stale masquerade resources if there are any. This is to make sure that there
// are no Linux resources with IP from old masquerade subnet when masquerade subnet
// gets changed as part of day2 operation.
if err := deleteStaleMasqueradeResources(gwIntf, nc.name, nc.watchFactory); err != nil {
return fmt.Errorf("failed to remove stale masquerade resources: %w", err)
}

if err := setNodeMasqueradeIPOnExtBridge(gwIntf); err != nil {
return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwIntf, err)
}
Expand All @@ -465,6 +472,11 @@ func (nc *DefaultNodeNetworkController) initGatewayDPUHost(kubeNodeIP net.IP) er
return fmt.Errorf("failed to set the node masquerade route to OVN: %v", err)
}

// Masquerade config mostly done on node, update annotation
if err := updateMasqueradeAnnotation(nc.name, nc.Kube); err != nil {
return fmt.Errorf("failed to update masquerade subnet annotation on node: %s, error: %v", nc.name, err)
}

err = configureSvcRouteViaInterface(nc.routeManager, gatewayIntf, gatewayNextHops)
if err != nil {
return err
Expand Down
79 changes: 67 additions & 12 deletions go-controller/pkg/node/gateway_init_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"context"
"errors"
"fmt"
utilnet "k8s.io/utils/net"
"net"
"runtime"
"strings"
Expand Down Expand Up @@ -88,15 +89,6 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS,
Action: func() error {
return testNS.Do(func(ns.NetNS) error {
defer GinkgoRecover()

// Create breth0 as a dummy link
err := netlink.LinkAdd(&netlink.Dummy{
LinkAttrs: netlink.LinkAttrs{
Name: "br" + eth0Name,
HardwareAddr: ovntest.MustParseMAC(eth0MAC),
},
})
Expect(err).NotTo(HaveOccurred())
_, err = netlink.LinkByName("br" + eth0Name)
Expect(err).NotTo(HaveOccurred())
return nil
Expand Down Expand Up @@ -192,6 +184,10 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS,
existingNode := v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
Annotations: map[string]string{
// add some fake previous subnets to force OVNK to try to clean it
util.OvnNodeMasqCIDR: "{\"ipv4\":\"170.254.0.0/16\",\"ipv6\":\"fa69::/112\"}",
},
},
}
if setNodeIP {
Expand Down Expand Up @@ -260,6 +256,39 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS,
err = testNS.Do(func(ns.NetNS) error {
defer GinkgoRecover()

// setup stale masquerade
// Create breth0 as a dummy link
err := netlink.LinkAdd(&netlink.Dummy{
LinkAttrs: netlink.LinkAttrs{
Name: "br" + eth0Name,
HardwareAddr: ovntest.MustParseMAC(eth0MAC),
},
})
Expect(err).NotTo(HaveOccurred())
link, err := netlink.LinkByName("br" + eth0Name)
Expect(err).NotTo(HaveOccurred())
err = netlink.LinkSetUp(link)
Expect(err).NotTo(HaveOccurred())
staleAddr, err := netlink.ParseAddr("170.254.0.2/32")
Expect(err).NotTo(HaveOccurred())
err = netlink.AddrAdd(link, staleAddr)
Expect(err).NotTo(HaveOccurred())
_, gw, err := net.ParseCIDR("170.254.0.1/32")
Expect(err).NotTo(HaveOccurred())
staleRoute := &netlink.Route{
LinkIndex: link.Attrs().Index,
Dst: gw,
}
err = netlink.RouteAdd(staleRoute)
Expect(err).NotTo(HaveOccurred())
// ensure stale route is present
r, err := util.LinkRouteGetFilteredRoute(
staleRoute,
netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC,
)
Expect(err).NotTo(HaveOccurred())
Expect(r).NotTo(BeNil())

gatewayNextHops, gatewayIntf, err := getGatewayNextHops()
Expect(err).NotTo(HaveOccurred())
ifAddrs := ovntest.MustParseIPNets(eth0CIDR)
Expand All @@ -284,16 +313,21 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS,
Expect(err).NotTo(HaveOccurred())
addrs, err := netlink.AddrList(l, syscall.AF_INET)
Expect(err).NotTo(HaveOccurred())
var found bool
var found, staleFound bool
expectedAddr, err := netlink.ParseAddr(eth0CIDR)
Expect(err).NotTo(HaveOccurred())
for _, a := range addrs {
// ensure stale masquerade IP was removed from the bridge
if a.IP.Equal(staleAddr.IP) && bytes.Equal(a.Mask, staleAddr.Mask) {
staleFound = true
}
// ensure code moved correct IP to bridge
if a.IP.Equal(expectedAddr.IP) && bytes.Equal(a.Mask, expectedAddr.Mask) {
found = true
break
}
}
Expect(found).To(BeTrue())
Expect(staleFound).To(BeFalse())

Expect(l.Attrs().HardwareAddr.String()).To(Equal(eth0MAC))

Expand All @@ -316,6 +350,13 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS,
}
return nil
}, 1*time.Second).ShouldNot(HaveOccurred())
// ensure stale masquerade route is no longer present
r, err = util.LinkRouteGetFilteredRoute(
staleRoute,
netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF|netlink.RT_FILTER_SRC,
)
Expect(err).NotTo(HaveOccurred())
Expect(r).To(BeNil())
return nil
})
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -384,6 +425,20 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS,
f6 := iptV6.(*util.FakeIPTables)
err = f6.MatchState(expectedTables, nil)
Expect(err).NotTo(HaveOccurred())

// check that masquerade subnet annotation got updated
node, err := wf.GetNode(nodeName)
Expect(err).NotTo(HaveOccurred())
subnets, err := util.ParseNodeMasqueradeSubnet(node)
Expect(err).NotTo(HaveOccurred())
for _, subnet := range subnets {
if utilnet.IsIPv4CIDR(subnet) {
Expect(subnet.String()).To(Equal(config.Gateway.V4MasqueradeSubnet))
} else if utilnet.IsIPv6CIDR(subnet) {
Expect(subnet.String()).To(Equal(config.Gateway.V6MasqueradeSubnet))
}
}

return nil
}

Expand Down Expand Up @@ -740,7 +795,7 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName,
ip, ipnet, err := net.ParseCIDR(hostIP + "/24")
Expect(err).NotTo(HaveOccurred())
ipnet.IP = ip
cnnci := NewCommonNodeNetworkControllerInfo(nil, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName)
cnnci := NewCommonNodeNetworkControllerInfo(kubeFakeClient, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName)
nc := newDefaultNodeNetworkController(cnnci, stop, wg)
// must run route manager manually which is usually started with nc.Start()
wg.Add(1)
Expand Down
Loading

0 comments on commit 8462931

Please sign in to comment.