Skip to content

Commit

Permalink
feat: Migration stateless cni (#2470)
Browse files Browse the repository at this point in the history
* 🌈 feat: adding flags for stateless cni (#2103)

feat: stateless cni

* feat: create stateless cni binary for swift (#2275)

* enabling CNS telemetry

* CNI Telemetry enabled on CNS

* Code changes for Statefull CNI Migration

* Making changes to the CNI state migration code.

* Make code changes for Stateless CNI migrations.

* Make changes to statless CNI migration branch.

* Stateless CNI migration code changes

* resolving migration issue

* remove cni changes

* Applying changes to CNIReonciler

* Addressing the comments.

* Addressing the comments

* addressing the latest comments

* Addressing Evan's comments

* Adding a MigrateSate() function to the cnireconciler

---------

Co-authored-by: Vipul Singh <vipul21sept@gmail.com>
  • Loading branch information
behzad-mir and vipul-21 authored Feb 8, 2024
1 parent e5b9e16 commit b22ac38
Show file tree
Hide file tree
Showing 11 changed files with 204 additions and 30 deletions.
3 changes: 3 additions & 0 deletions cni/linux.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-telemetry -trimpath
RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azure-vnet-ipam -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go
RUN GOOS=$OS CGO_ENABLED=0 go build -a -o /go/bin/azurecni-stateless -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/stateless/main.go

FROM scratch as bins
COPY --from=azure-vnet /go/bin/* /

FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 AS compressor
ARG OS
WORKDIR /payload
Expand Down
57 changes: 57 additions & 0 deletions cns/cnireconciler/podinfoprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ package cnireconciler

import (
"fmt"
"net"
"strings"

"github.com/Azure/azure-container-networking/cni/api"
"github.com/Azure/azure-container-networking/cni/client"
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/logger"
"github.com/Azure/azure-container-networking/cns/restserver"
"github.com/Azure/azure-container-networking/store"
"github.com/pkg/errors"
Expand Down Expand Up @@ -101,3 +104,57 @@ func endpointStateToPodInfoByIP(state map[string]*restserver.EndpointInfo) (map[
}
return podInfoByIP, nil
}

// MigrateCNISate returns an endpoint state of CNS by reading the CNI state file
func MigrateCNISate() (map[string]*restserver.EndpointInfo, error) {
return migrateCNISate(exec.New())
}

func migrateCNISate(exc exec.Interface) (map[string]*restserver.EndpointInfo, error) {
cli := client.New(exc)
state, err := cli.GetEndpointState()
if err != nil {
return nil, fmt.Errorf("failed to invoke CNI client.GetEndpointState(): %w", err)
}
endpointState := cniStateToCnsEndpointState(state)
return endpointState, nil
}

// cniStateToCnsEndpointState converts an AzureCNIState dumped from a CNI exec
// into a EndpointInfo map, using the containerID as keys in the map.
// The map then will be saved on CNS endpoint state
func cniStateToCnsEndpointState(state *api.AzureCNIState) map[string]*restserver.EndpointInfo {
logger.Printf("Generating CNS Endpoint State")
endpointState := map[string]*restserver.EndpointInfo{}
for epID, endpoint := range state.ContainerInterfaces {
endpointInfo := &restserver.EndpointInfo{PodName: endpoint.PodName, PodNamespace: endpoint.PodNamespace, IfnameToIPMap: make(map[string]*restserver.IPInfo)}
ipInfo := &restserver.IPInfo{}
for _, epIP := range endpoint.IPAddresses {
if epIP.IP.To4() == nil { // is an ipv6 address
ipconfig := net.IPNet{IP: epIP.IP, Mask: epIP.Mask}
ipInfo.IPv6 = append(ipInfo.IPv6, ipconfig)

} else {
ipconfig := net.IPNet{IP: epIP.IP, Mask: epIP.Mask}
ipInfo.IPv4 = append(ipInfo.IPv4, ipconfig)
}
}
endpointID, Ifname := extractEndpointInfo(epID, endpoint.ContainerID)
endpointInfo.IfnameToIPMap[Ifname] = ipInfo
endpointState[endpointID] = endpointInfo
logger.Printf("CNS endpoint state extracted from CNI: [%+v]", *endpointInfo)
}
return endpointState
}

// extractEndpointInfo extract Interface Name and endpointID for each endpoint based the CNI state
func extractEndpointInfo(epID, containerID string) (endpointID, interfaceName string) {
ifName := restserver.InterfaceName
if strings.Contains(epID, "-eth") {
ifName = epID[len(epID)-4:]
}
if containerID == "" {
return epID, ifName
}
return containerID, ifName
}
27 changes: 26 additions & 1 deletion cns/cnireconciler/podinfoprovider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,39 @@ func TestNewCNIPodInfoProvider(t *testing.T) {
{
name: "good",
exec: newCNIStateFakeExec(
`{"ContainerInterfaces":{"3f813b02-eth0":{"PodName":"metrics-server-77c8679d7d-6ksdh","PodNamespace":"kube-system","PodEndpointID":"3f813b02-eth0","ContainerID":"3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46","IPAddresses":[{"IP":"10.241.0.17","Mask":"//8AAA=="}]},"6e688597-eth0":{"PodName":"tunnelfront-5d96f9b987-65xbn","PodNamespace":"kube-system","PodEndpointID":"6e688597-eth0","ContainerID":"6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed","IPAddresses":[{"IP":"10.241.0.13","Mask":"//8AAA=="}]}}}`,
`{"ContainerInterfaces":{"3f813b02-eth0":{"PodName":"metrics-server-77c8679d7d-6ksdh","IfName":"eth0",
"PodNamespace":"kube-system","PodEndpointID":"3f813b02-eth0",
"ContainerID":"3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46",
"IPAddresses":[{"IP":"10.241.0.17","Mask":"//8AAA=="}]},
"6e688597-eth0":{"PodName":"tunnelfront-5d96f9b987-65xbn","IfName":"eth0","PodNamespace":"kube-system",
"PodEndpointID":"6e688597-eth0","ContainerID":"6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed",
"IPAddresses":[{"IP":"10.241.0.13","Mask":"//8AAA=="}]}}}`,
),
want: map[string]cns.PodInfo{
"10.241.0.13": cns.NewPodInfo("6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed", "6e688597-eth0", "tunnelfront-5d96f9b987-65xbn", "kube-system"),
"10.241.0.17": cns.NewPodInfo("3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46", "3f813b02-eth0", "metrics-server-77c8679d7d-6ksdh", "kube-system"),
},
wantErr: false,
},
{
name: "dualstack",
exec: newCNIStateFakeExec(
`{"ContainerInterfaces":{"3f813b02-eth0":{"PodName":"metrics-server-77c8679d7d-6ksdh","IfName":"eth0",
"PodNamespace":"kube-system","PodEndpointID":"3f813b02-eth0",
"ContainerID":"3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46",
"IPAddresses":[{"IP":"10.241.0.17","Mask":"//8AAA=="},{"IP":"2001:0db8:abcd:0015::0","Mask":"//8AAA=="}]},
"6e688597-eth0":{"PodName":"tunnelfront-5d96f9b987-65xbn","IfName":"eth0","PodNamespace":"kube-system",
"PodEndpointID":"6e688597-eth0","ContainerID":"6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed",
"IPAddresses":[{"IP":"10.241.0.13","Mask":"//8AAA=="},{"IP":"2001:0db8:abcd:0014::0","Mask":"//8AAA=="}]}}}`,
),
want: map[string]cns.PodInfo{
"2001:db8:abcd:15::": cns.NewPodInfo("3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46", "3f813b02-eth0", "metrics-server-77c8679d7d-6ksdh", "kube-system"),
"2001:db8:abcd:14::": cns.NewPodInfo("6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed", "6e688597-eth0", "tunnelfront-5d96f9b987-65xbn", "kube-system"),
"10.241.0.17": cns.NewPodInfo("3f813b029429b4e41a09ab33b6f6d365d2ed704017524c78d1d0dece33cdaf46", "3f813b02-eth0", "metrics-server-77c8679d7d-6ksdh", "kube-system"),
"10.241.0.13": cns.NewPodInfo("6e688597eafb97c83c84e402cc72b299bfb8aeb02021e4c99307a037352c0bed", "6e688597-eth0", "tunnelfront-5d96f9b987-65xbn", "kube-system"),
},
wantErr: false,
},
{
name: "empty CNI response",
exec: newCNIStateFakeExec(
Expand Down
1 change: 1 addition & 0 deletions cns/configuration/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type CNSConfig struct {
EnableCNIConflistGeneration bool
EnableIPAMv2 bool
EnablePprof bool
EnableStateMigration bool
EnableSubnetScarcity bool
EnableSwiftV2 bool
InitializeFromCNI bool
Expand Down
15 changes: 14 additions & 1 deletion cns/restserver/ipam.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ var (
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
)

const (
ContainerIDLength = 8
InterfaceName = "eth0"
)

// requestIPConfigHandlerHelper validates the request, assign IPs and return the IPConfigs
func (service *HTTPRestService) requestIPConfigHandlerHelper(ctx context.Context, ipconfigsRequest cns.IPConfigsRequest) (*cns.IPConfigsResponse, error) {
// For SWIFT v2 scenario, the validator function will also modify the ipconfigsRequest.
Expand Down Expand Up @@ -1008,9 +1013,9 @@ func (service *HTTPRestService) EndpointHandlerAPI(w http.ResponseWriter, r *htt
// GetEndpointHandler handles the incoming GetEndpoint requests with http Get method
func (service *HTTPRestService) GetEndpointHandler(w http.ResponseWriter, r *http.Request) {
logger.Printf("[GetEndpointState] GetEndpoint for %s", r.URL.Path)

endpointID := strings.TrimPrefix(r.URL.Path, cns.EndpointPath)
endpointInfo, err := service.GetEndpointHelper(endpointID)
// Check if the request is valid
if err != nil {
response := GetEndpointResponse{
Response: Response{
Expand Down Expand Up @@ -1068,6 +1073,14 @@ func (service *HTTPRestService) GetEndpointHelper(endpointID string) (*EndpointI
logger.Warnf("[GetEndpointState] Found existing endpoint state for container %s", endpointID)
return endpointInfo, nil
}
// This part is a temprory fix if we have endpoint states belong to CNI version 1.4.X on Windows since the states don't have the containerID
// In case there was no endpoint founded with ContainerID as the key,
// then [First 8 character of containerid]-eth0 will be tried
legacyEndpointID := endpointID[:ContainerIDLength] + "-" + InterfaceName
if endpointInfo, ok := service.EndpointState[legacyEndpointID]; ok {
logger.Warnf("[GetEndpointState] Found existing endpoint state for container %s", legacyEndpointID)
return endpointInfo, nil
}
return nil, ErrEndpointStateNotFound
}

Expand Down
21 changes: 21 additions & 0 deletions cns/service/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,13 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
}
}

// perform state migration from CNI in case CNS is set to manage the endpoint state and has emty state
if cnsconfig.EnableStateMigration && !httpRestServiceImplementation.EndpointStateStore.Exists() {
if err = PopulateCNSEndpointState(httpRestServiceImplementation.EndpointStateStore); err != nil {
return errors.Wrap(err, "failed to create CNS EndpointState From CNI")
}
}

var podInfoByIPProvider cns.PodInfoByIPProvider
switch {
case cnsconfig.ManageEndpointState:
Expand Down Expand Up @@ -1516,3 +1523,17 @@ func createOrUpdateNodeInfoCRD(ctx context.Context, restConfig *rest.Config, nod

return nil
}

// PopulateCNSEndpointState initilizes CNS Endpoint State by Migrating the CNI state.
func PopulateCNSEndpointState(endpointStateStore store.KeyValueStore) error {
logger.Printf("State Migration is enabled")
endpointState, err := cnireconciler.MigrateCNISate()
if err != nil {
return errors.Wrap(err, "failed to create CNS Endpoint state from CNI")
}
err = endpointStateStore.Write(restserver.EndpointStoreKey, endpointState)
if err != nil {
return fmt.Errorf("failed to write endpoint state to store: %w", err)
}
return nil
}
11 changes: 11 additions & 0 deletions network/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type EndpointInfo struct {
NICType cns.NICType
SkipDefaultRoutes bool
HNSEndpointID string
HostIfName string
}

// RouteInfo contains information about an IP route.
Expand Down Expand Up @@ -267,6 +268,8 @@ func (ep *endpoint) getInfo() *EndpointInfo {
PODName: ep.PODName,
PODNameSpace: ep.PODNameSpace,
NetworkContainerID: ep.NetworkContainerID,
HNSEndpointID: ep.HnsId,
HostIfName: ep.HostIfName,
}

info.Routes = append(info.Routes, ep.Routes...)
Expand Down Expand Up @@ -350,3 +353,11 @@ func GetPodNameWithoutSuffix(podName string) string {
logger.Info("Pod name after splitting based on", zap.Any("nameSplit", nameSplit))
return strings.Join(nameSplit, "-")
}

// IsEndpointStateInComplete returns true if both HNSEndpointID and HostVethName are missing.
func (epInfo *EndpointInfo) IsEndpointStateIncomplete() bool {
if epInfo.HNSEndpointID == "" && epInfo.IfName == "" {
return true
}
return false
}
6 changes: 6 additions & 0 deletions network/endpoint_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,3 +531,9 @@ func getDefaultGateway(routes []RouteInfo) net.IP {

return nil
}

// GetEndpointInfoByIPImpl returns an endpointInfo that contains corresponding HostVethName.
// TODO: It needs to be tested to see if HostVethName is required for SingleTenancy, WorkItem: 26606939
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) {
return epInfo, nil
}
26 changes: 26 additions & 0 deletions network/endpoint_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/Azure/azure-container-networking/platform"
"github.com/Microsoft/hcsshim"
"github.com/Microsoft/hcsshim/hcn"
"github.com/pkg/errors"
"go.uber.org/zap"
)

Expand Down Expand Up @@ -495,3 +496,28 @@ func (ep *endpoint) getInfoImpl(epInfo *EndpointInfo) {
func (nm *networkManager) updateEndpointImpl(nw *network, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) (*endpoint, error) {
return nil, nil
}

// GetEndpointInfoByIPImpl returns an endpointInfo with the corrsponding HNS Endpoint ID that matches an specific IP Address.
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(ipAddresses []net.IPNet, networkID string) (*EndpointInfo, error) {
// check if network exists, only create the network does not exist
hnsResponse, err := Hnsv2.GetNetworkByName(networkID)
if err != nil {
return epInfo, errors.Wrapf(err, "HNS Network not found")
}
hcnEndpoints, err := Hnsv2.ListEndpointsOfNetwork(hnsResponse.Id)
if err != nil {
return epInfo, errors.Wrapf(err, "failed to fetch HNS endpoints for the given network")
}
for i := range hcnEndpoints {
for _, ipConfiguration := range hcnEndpoints[i].IpConfigurations {
for _, ipAddress := range ipAddresses {
prefixLength, _ := ipAddress.Mask.Size()
if ipConfiguration.IpAddress == ipAddress.IP.String() && ipConfiguration.PrefixLength == uint8(prefixLength) {
epInfo.HNSEndpointID = hcnEndpoints[i].Id
return epInfo, nil
}
}
}
}
return epInfo, errors.Wrapf(err, "No HNSEndpointID matches the IPAddress: "+ipAddresses[0].IP.String())
}
65 changes: 38 additions & 27 deletions network/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,39 @@ func (nm *networkManager) UpdateEndpointState(ep *endpoint) error {
return nil
}

// GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo
// TODO unit tests need to be added, WorkItem: 26606939
func (nm *networkManager) GetEndpointState(networkID, endpointID string) (*EndpointInfo, error) {
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), endpointID)
if err != nil {
return nil, errors.Wrapf(err, "Get endpoint API returend with error")
}
epInfo := &EndpointInfo{
Id: endpointID,
IfIndex: EndpointIfIndex, // Azure CNI supports only one interface
IfName: endpointResponse.EndpointInfo.HostVethName,
ContainerID: endpointID,
PODName: endpointResponse.EndpointInfo.PodName,
PODNameSpace: endpointResponse.EndpointInfo.PodNamespace,
NetworkContainerID: endpointID,
HNSEndpointID: endpointResponse.EndpointInfo.HnsEndpointID,
}

for _, ip := range endpointResponse.EndpointInfo.IfnameToIPMap {
epInfo.IPAddresses = ip.IPv4
epInfo.IPAddresses = append(epInfo.IPAddresses, ip.IPv6...)

}
if epInfo.IsEndpointStateIncomplete() {
epInfo, err = epInfo.GetEndpointInfoByIPImpl(epInfo.IPAddresses, networkID)
if err != nil {
return nil, errors.Wrapf(err, "Get endpoint API returend with error")
}
}
logger.Info("returning getEndpoint API with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", epInfo.HNSEndpointID))
return epInfo, nil
}

// DeleteEndpoint deletes an existing container endpoint.
func (nm *networkManager) DeleteEndpoint(networkID, endpointID string, epInfo *EndpointInfo) error {
nm.Lock()
Expand Down Expand Up @@ -475,45 +508,23 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
}

// GetEndpointInfo returns information about the given endpoint.
func (nm *networkManager) GetEndpointInfo(networkId string, endpointId string) (*EndpointInfo, error) {
func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*EndpointInfo, error) {
nm.Lock()
defer nm.Unlock()

if nm.IsStatelessCNIMode() {
logger.Info("calling cns getEndpoint API")
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), endpointId)
if err != nil {
return nil, errors.Wrapf(err, "Get endpoint API returend with error")
}
if endpointResponse.EndpointInfo.HnsEndpointID == "" && endpointResponse.EndpointInfo.HostVethName == "" {
return nil, errors.New("Get endpoint API returend with empty HNSEndpointID and HostVethName")
}
epInfo := &EndpointInfo{
Id: endpointId,
IfIndex: EndpointIfIndex, // Azure CNI supports only one interface
IfName: endpointResponse.EndpointInfo.HostVethName,
ContainerID: endpointId,
PODName: endpointResponse.EndpointInfo.PodName,
PODNameSpace: endpointResponse.EndpointInfo.PodNamespace,
NetworkContainerID: endpointId,
HNSEndpointID: endpointResponse.EndpointInfo.HnsEndpointID,
}
epInfo, err := nm.GetEndpointState(networkID, endpointID)

for _, ip := range endpointResponse.EndpointInfo.IfnameToIPMap {
epInfo.IPAddresses = ip.IPv4
epInfo.IPAddresses = append(epInfo.IPAddresses, ip.IPv6...)

}
logger.Info("returning getEndpoint API with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", epInfo.HNSEndpointID))
return epInfo, nil
return epInfo, err
}

nw, err := nm.getNetwork(networkId)
nw, err := nm.getNetwork(networkID)
if err != nil {
return nil, err
}

ep, err := nw.getEndpoint(endpointId)
ep, err := nw.getEndpoint(endpointID)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion network/manager_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func (nm *MockNetworkManager) GetAllEndpoints(networkID string) (map[string]*End
}

// GetEndpointInfo mock
func (nm *MockNetworkManager) GetEndpointInfo(networkID string, endpointID string) (*EndpointInfo, error) {
func (nm *MockNetworkManager) GetEndpointInfo(_, endpointID string) (*EndpointInfo, error) {
if info, exists := nm.TestEndpointInfoMap[endpointID]; exists {
return info, nil
}
Expand Down

0 comments on commit b22ac38

Please sign in to comment.