diff --git a/test/integration/datapath/datapath_windows_test.go b/test/integration/datapath/datapath_windows_test.go index b0057be9cf..feb7cfc16b 100644 --- a/test/integration/datapath/datapath_windows_test.go +++ b/test/integration/datapath/datapath_windows_test.go @@ -10,7 +10,6 @@ import ( "github.com/Azure/azure-container-networking/test/internal/datapath" "github.com/Azure/azure-container-networking/test/internal/kubernetes" - "github.com/Azure/azure-container-networking/test/validate" "github.com/stretchr/testify/require" apiv1 "k8s.io/api/core/v1" ) @@ -57,9 +56,15 @@ func setupWindowsEnvironment(t *testing.T) { clientset := kubernetes.MustGetClientset() if *restartKubeproxy { - validator, err := validate.CreateValidator(ctx, clientset, restConfig, *podNamespace, "cniv2", false, "windows") - require.NoError(t, err) - err = validator.RestartKubeProxyService(ctx) + privilegedDaemonSet := kubernetes.MustParseDaemonSet(kubernetes.PrivilegedDaemonSetPath) + daemonsetClient := clientset.AppsV1().DaemonSets(kubernetes.PrivilegedNamespace) + kubernetes.MustCreateDaemonset(ctx, daemonsetClient, privilegedDaemonSet) + + // Ensures that pods have been replaced if test is re-run after failure + if err := kubernetes.WaitForPodDaemonset(ctx, clientset, kubernetes.PrivilegedNamespace, privilegedDaemonSet.Name, kubernetes.PrivilegedLabelSelector); err != nil { + require.NoError(t, err) + } + err := kubernetes.RestartKubeProxyService(ctx, clientset, kubernetes.PrivilegedNamespace, kubernetes.PrivilegedLabelSelector, restConfig) require.NoError(t, err) } diff --git a/test/internal/kubernetes/utils.go b/test/internal/kubernetes/utils.go index 3a066d631d..194e5b5e60 100644 --- a/test/internal/kubernetes/utils.go +++ b/test/internal/kubernetes/utils.go @@ -34,10 +34,13 @@ const ( SubnetNameLabel = "kubernetes.azure.com/podnetwork-subnet" // RetryAttempts is the number of times to retry a test. - RetryAttempts = 90 - RetryDelay = 10 * time.Second - DeleteRetryAttempts = 12 - DeleteRetryDelay = 5 * time.Second + RetryAttempts = 90 + RetryDelay = 10 * time.Second + DeleteRetryAttempts = 12 + DeleteRetryDelay = 5 * time.Second + PrivilegedDaemonSetPath = "../manifests/load/privileged-daemonset-windows.yaml" + PrivilegedLabelSelector = "app=privileged-daemonset" + PrivilegedNamespace = "kube-system" ) var Kubeconfig = flag.String("test-kubeconfig", filepath.Join(homedir.HomeDir(), ".kube", "config"), "(optional) absolute path to the kubeconfig file") @@ -403,6 +406,9 @@ func ExecCmdOnPod(ctx context.Context, clientset *kubernetes.Clientset, namespac if err != nil { return []byte{}, errors.Wrapf(err, "error in executing command %s", cmd) } + if len(stdout.Bytes()) == 0 { + log.Printf("Warning: %v had 0 bytes returned from command - %v", podName, cmd) + } return stdout.Bytes(), nil } @@ -465,3 +471,36 @@ func MustRestartDaemonset(ctx context.Context, clientset *kubernetes.Clientset, _, err = clientset.AppsV1().DaemonSets(namespace).Update(ctx, ds, metav1.UpdateOptions{}) return errors.Wrapf(err, "failed to update ds %s", daemonsetName) } + +// Restarts kubeproxy on windows nodes from an existing privileged daemonset +func RestartKubeProxyService(ctx context.Context, clientset *kubernetes.Clientset, privilegedNamespace, privilegedLabelSelector string, config *rest.Config) error { + restartKubeProxyCmd := []string{"powershell", "Restart-service", "kubeproxy"} + + nodes, err := GetNodeList(ctx, clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + node := nodes.Items[index] + if node.Status.NodeInfo.OperatingSystem != string(corev1.Windows) { + continue + } + // get the privileged pod + pod, err := GetPodsByNode(ctx, clientset, privilegedNamespace, privilegedLabelSelector, node.Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod on node %s", node.Name) + } + + if len(pod.Items) == 0 { + return errors.Errorf("there are no privileged pods on node - %v", node.Name) + } + privilegedPod := pod.Items[0] + // exec into the pod and restart kubeproxy + _, err = ExecCmdOnPod(ctx, clientset, privilegedNamespace, privilegedPod.Name, restartKubeProxyCmd, config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod %s on node %s", privilegedPod.Name, node.Name) + } + } + return nil +} diff --git a/test/validate/linux_validate.go b/test/validate/linux_validate.go index a54ebed44e..ed9c919eb2 100644 --- a/test/validate/linux_validate.go +++ b/test/validate/linux_validate.go @@ -1,11 +1,14 @@ package validate import ( + "context" "encoding/json" "github.com/Azure/azure-container-networking/cns" restserver "github.com/Azure/azure-container-networking/cns/restserver" + acnk8s "github.com/Azure/azure-container-networking/test/internal/kubernetes" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" ) const ( @@ -15,12 +18,12 @@ const ( ) var ( - restartNetworkCmd = []string{"bash", "-c", "chroot /host /bin/bash -c systemctl restart systemd-networkd"} - cnsManagedStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-cns/azure-endpoints.json"} - azureVnetStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-vnet.json"} - azureVnetIpamStateCmd = []string{"bash", "-c", "cat /var/run/azure-vnet-ipam.json"} - ciliumStateFileCmd = []string{"bash", "-c", "cilium endpoint list -o json"} - cnsLocalCacheCmd = []string{"curl", "localhost:10090/debug/ipaddresses", "-d", "{\"IPConfigStateFilter\":[\"Assigned\"]}"} + restartNetworkCmd = []string{"bash", "-c", "chroot /host /bin/bash -c systemctl restart systemd-networkd"} + cnsManagedStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-cns/azure-endpoints.json"} + azureVnetStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-vnet.json"} + azureVnetIpamStateCmd = []string{"bash", "-c", "cat /var/run/azure-vnet-ipam.json"} + ciliumStateFileCmd = []string{"bash", "-c", "cilium endpoint list -o json"} + cnsCachedAssignedIPStateCmd = []string{"curl", "localhost:10090/debug/ipaddresses", "-d", "{\"IPConfigStateFilter\":[\"Assigned\"]}"} ) type stateFileIpsFunc func([]byte) (map[string]string, error) @@ -29,18 +32,18 @@ var linuxChecksMap = map[string][]check{ "cilium": { {"cns", cnsManagedStateFileIps, cnsLabelSelector, privilegedNamespace, cnsManagedStateFileCmd}, // cns configmap "ManageEndpointState": true, | Endpoints managed in CNS State File {"cilium", ciliumStateFileIps, ciliumLabelSelector, privilegedNamespace, ciliumStateFileCmd}, - {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsLocalCacheCmd}, + {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsCachedAssignedIPStateCmd}, }, "cniv1": { {"azure-vnet", azureVnetStateIps, privilegedLabelSelector, privilegedNamespace, azureVnetStateFileCmd}, {"azure-vnet-ipam", azureVnetIpamStateIps, privilegedLabelSelector, privilegedNamespace, azureVnetIpamStateCmd}, }, "cniv2": { - {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsLocalCacheCmd}, + {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsCachedAssignedIPStateCmd}, {"azure-vnet", azureVnetStateIps, privilegedLabelSelector, privilegedNamespace, azureVnetStateFileCmd}, // cns configmap "ManageEndpointState": false, | Endpoints managed in CNI State File }, "dualstack": { - {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsLocalCacheCmd}, + {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsCachedAssignedIPStateCmd}, {"azure dualstackoverlay", azureVnetStateIps, privilegedLabelSelector, privilegedNamespace, azureVnetStateFileCmd}, }, } @@ -156,21 +159,6 @@ func ciliumStateFileIps(result []byte) (map[string]string, error) { return ciliumPodIps, nil } -func cnsCacheStateFileIps(result []byte) (map[string]string, error) { - var cnsLocalCache CNSLocalCache - - err := json.Unmarshal(result, &cnsLocalCache) - if err != nil { - return nil, errors.Wrapf(err, "failed to unmarshal cns local cache") - } - - cnsPodIps := make(map[string]string) - for index := range cnsLocalCache.IPConfigurationStatus { - cnsPodIps[cnsLocalCache.IPConfigurationStatus[index].IPAddress] = cnsLocalCache.IPConfigurationStatus[index].PodInfo.Name() - } - return cnsPodIps, nil -} - func azureVnetStateIps(result []byte) (map[string]string, error) { var azureVnetResult AzureCniState err := json.Unmarshal(result, &azureVnetResult) @@ -212,3 +200,37 @@ func azureVnetIpamStateIps(result []byte) (map[string]string, error) { } return azureVnetIpamPodIps, nil } + +// Linux only function +func (v *Validator) validateRestartNetwork(ctx context.Context) error { + nodes, err := acnk8s.GetNodeList(ctx, v.clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + node := nodes.Items[index] + if node.Status.NodeInfo.OperatingSystem != string(corev1.Linux) { + continue + } + // get the privileged pod + pod, err := acnk8s.GetPodsByNode(ctx, v.clientset, privilegedNamespace, privilegedLabelSelector, node.Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + if len(pod.Items) == 0 { + return errors.Errorf("there are no privileged pods on node - %v", node.Name) + } + privilegedPod := pod.Items[0] + // exec into the pod to get the state file + _, err = acnk8s.ExecCmdOnPod(ctx, v.clientset, privilegedNamespace, privilegedPod.Name, restartNetworkCmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod %s on node %s", privilegedPod.Name, node.Name) + } + err = acnk8s.WaitForPodsRunning(ctx, v.clientset, "", "") + if err != nil { + return errors.Wrapf(err, "failed to wait for pods running") + } + } + return nil +} diff --git a/test/validate/utils.go b/test/validate/utils.go index cb3b5daa4c..b43a9161bc 100644 --- a/test/validate/utils.go +++ b/test/validate/utils.go @@ -11,14 +11,16 @@ import ( ) func compareIPs(expected map[string]string, actual []string) error { - if len(expected) != len(actual) { - return errors.Errorf("len of expected IPs != len of actual IPs, expected: %+v, actual: %+v", expected, actual) - } + expectedLen := len(expected) for _, ip := range actual { if _, ok := expected[ip]; !ok { return errors.Errorf("actual ip %s is unexpected, expected: %+v, actual: %+v", ip, expected, actual) } + delete(expected, ip) + } + if expectedLen != len(actual) { + return errors.Errorf("len of expected IPs != len of actual IPs, expected: %+v, actual: %+v | Remaining, potentially leaked, IP(s) on state file - %v", expectedLen, len(actual), expected) } return nil diff --git a/test/validate/validate.go b/test/validate/validate.go index d5db2815f4..5e5dc536f8 100644 --- a/test/validate/validate.go +++ b/test/validate/validate.go @@ -2,6 +2,7 @@ package validate import ( "context" + "encoding/json" "log" acnk8s "github.com/Azure/azure-container-networking/test/internal/kubernetes" @@ -72,6 +73,10 @@ func CreateValidator(ctx context.Context, clientset *kubernetes.Clientset, confi switch os { case "windows": checks = windowsChecksMap[cni] + err := acnk8s.RestartKubeProxyService(ctx, clientset, privilegedNamespace, privilegedLabelSelector, config) + if err != nil { + return nil, errors.Wrapf(err, "failed to restart kubeproxy") + } case "linux": checks = linuxChecksMap[cni] default: @@ -99,7 +104,7 @@ func (v *Validator) Validate(ctx context.Context) error { if v.os == "linux" { // We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706 log.Printf("Validating the restart network scenario") - err = v.ValidateRestartNetwork(ctx) + err = v.validateRestartNetwork(ctx) if err != nil { return errors.Wrapf(err, "failed to validate restart network scenario") } @@ -117,33 +122,6 @@ func (v *Validator) ValidateStateFile(ctx context.Context) error { return nil } -func (v *Validator) ValidateRestartNetwork(ctx context.Context) error { - nodes, err := acnk8s.GetNodeList(ctx, v.clientset) - if err != nil { - return errors.Wrapf(err, "failed to get node list") - } - - for index := range nodes.Items { - // get the privileged pod - pod, err := acnk8s.GetPodsByNode(ctx, v.clientset, privilegedNamespace, privilegedLabelSelector, nodes.Items[index].Name) - if err != nil { - return errors.Wrapf(err, "failed to get privileged pod") - } - - privelegedPod := pod.Items[0] - // exec into the pod to get the state file - _, err = acnk8s.ExecCmdOnPod(ctx, v.clientset, privilegedNamespace, privelegedPod.Name, restartNetworkCmd, v.config) - if err != nil { - return errors.Wrapf(err, "failed to exec into privileged pod - %s", privelegedPod.Name) - } - err = acnk8s.WaitForPodsRunning(ctx, v.clientset, "", "") - if err != nil { - return errors.Wrapf(err, "failed to wait for pods running") - } - } - return nil -} - func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFunc, cmd []string, checkType, namespace, labelSelector string) error { log.Printf("Validating %s state file", checkType) nodes, err := acnk8s.GetNodeListByLabelSelector(ctx, v.clientset, nodeSelectorMap[v.os]) @@ -157,6 +135,9 @@ func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFu if err != nil { return errors.Wrapf(err, "failed to get privileged pod") } + if len(pod.Items) == 0 { + return errors.Errorf("there are no privileged pods on node - %v", nodes.Items[index].Name) + } podName := pod.Items[0].Name // exec into the pod to get the state file result, err := acnk8s.ExecCmdOnPod(ctx, v.clientset, namespace, podName, cmd, v.config) @@ -165,7 +146,7 @@ func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFu } filePodIps, err := stateFileIps(result) if err != nil { - return errors.Wrapf(err, "failed to get pod ips from state file") + return errors.Wrapf(err, "failed to get pod ips from state file on node %v", nodes.Items[index].Name) } if len(filePodIps) == 0 && v.restartCase { log.Printf("No pods found on node %s", nodes.Items[index].Name) @@ -175,7 +156,7 @@ func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFu podIps := getPodIPsWithoutNodeIP(ctx, v.clientset, nodes.Items[index]) if err := compareIPs(filePodIps, podIps); err != nil { - return errors.Wrapf(errors.New("State file validation failed"), "for %s on node %s", checkType, nodes.Items[index].Name) + return errors.Wrapf(err, "State file validation failed for %s on node %s", checkType, nodes.Items[index].Name) } } log.Printf("State file validation for %s passed", checkType) @@ -257,36 +238,24 @@ func (v *Validator) ValidateDualStackControlPlane(ctx context.Context) error { return nil } -func (v *Validator) RestartKubeProxyService(ctx context.Context) error { - nodes, err := acnk8s.GetNodeList(ctx, v.clientset) - if err != nil { - return errors.Wrapf(err, "failed to get node list") - } - - for index := range nodes.Items { - node := nodes.Items[index] - if node.Status.NodeInfo.OperatingSystem != string(corev1.Windows) { - continue - } - // get the privileged pod - pod, err := acnk8s.GetPodsByNode(ctx, v.clientset, privilegedNamespace, privilegedLabelSelector, nodes.Items[index].Name) - if err != nil { - return errors.Wrapf(err, "failed to get privileged pod") - } - - privelegedPod := pod.Items[0] - // exec into the pod and restart kubeproxy - _, err = acnk8s.ExecCmdOnPod(ctx, v.clientset, privilegedNamespace, privelegedPod.Name, restartKubeProxyCmd, v.config) - if err != nil { - return errors.Wrapf(err, "failed to exec into privileged pod - %s", privelegedPod.Name) - } - } - return nil -} - func (v *Validator) Cleanup(ctx context.Context) { // deploy privileged pod privilegedDaemonSet := acnk8s.MustParseDaemonSet(privilegedDaemonSetPathMap[v.os]) daemonsetClient := v.clientset.AppsV1().DaemonSets(privilegedNamespace) acnk8s.MustDeleteDaemonset(ctx, daemonsetClient, privilegedDaemonSet) } + +func cnsCacheStateFileIps(result []byte) (map[string]string, error) { + var cnsLocalCache CNSLocalCache + + err := json.Unmarshal(result, &cnsLocalCache) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cns local cache") + } + + cnsPodIps := make(map[string]string) + for index := range cnsLocalCache.IPConfigurationStatus { + cnsPodIps[cnsLocalCache.IPConfigurationStatus[index].IPAddress] = cnsLocalCache.IPConfigurationStatus[index].PodInfo.Name() + } + return cnsPodIps, nil +} diff --git a/test/validate/windows_validate.go b/test/validate/windows_validate.go index 6cb1182e1a..437bb0a1ad 100644 --- a/test/validate/windows_validate.go +++ b/test/validate/windows_validate.go @@ -1,6 +1,7 @@ package validate import ( + "bytes" "context" "encoding/json" "log" @@ -13,12 +14,21 @@ import ( "k8s.io/client-go/rest" ) +const ( + cnsWinLabelSelector = "k8s-app=azure-cns-win" +) + var ( - hnsEndPointCmd = []string{"powershell", "-c", "Get-HnsEndpoint | ConvertTo-Json"} - hnsNetworkCmd = []string{"powershell", "-c", "Get-HnsNetwork | ConvertTo-Json"} - azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"} - azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"} - restartKubeProxyCmd = []string{"powershell", "Restart-service", "kubeproxy"} + hnsEndPointCmd = []string{"powershell", "-c", "Get-HnsEndpoint | ConvertTo-Json"} + hnsNetworkCmd = []string{"powershell", "-c", "Get-HnsNetwork | ConvertTo-Json"} + azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"} + azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"} + cnsWinCachedAssignedIPStateCmd = []string{ + "powershell", "Invoke-WebRequest -Uri 127.0.0.1:10090/debug/ipaddresses", + "-Method Post -ContentType application/x-www-form-urlencoded", + "-Body \"{`\"IPConfigStateFilter`\":[`\"Assigned`\"]}\"", + "-UseBasicParsing | Select-Object -Expand Content", + } ) var windowsChecksMap = map[string][]check{ @@ -28,14 +38,16 @@ var windowsChecksMap = map[string][]check{ {"azure-vnet-ipam", azureVnetIpamIps, privilegedLabelSelector, privilegedNamespace, azureVnetIpamCmd}, }, "cniv2": { + {"hns", hnsStateFileIps, privilegedLabelSelector, privilegedNamespace, hnsEndPointCmd}, {"azure-vnet", azureVnetIps, privilegedLabelSelector, privilegedNamespace, azureVnetCmd}, + {"cns cache", cnsCacheStateFileIps, cnsWinLabelSelector, privilegedNamespace, cnsWinCachedAssignedIPStateCmd}, }, } type HNSEndpoint struct { MacAddress string `json:"MacAddress"` IPAddress net.IP `json:"IPAddress"` - IPv6Address net.IP `json:",omitempty"` + IPv6Address net.IP `json:"IPv6Address"` IsRemoteEndpoint bool `json:",omitempty"` } @@ -90,18 +102,45 @@ type AddressRecord struct { } func hnsStateFileIps(result []byte) (map[string]string, error) { - var hnsResult []HNSEndpoint - err := json.Unmarshal(result, &hnsResult) - if err != nil { - return nil, errors.Wrapf(err, "failed to unmarshal hns endpoint list") - } - + jsonType := bytes.TrimLeft(result, " \t\r\n") + isObject := jsonType[0] == '{' + isArray := jsonType[0] == '[' hnsPodIps := make(map[string]string) - for _, v := range hnsResult { - if !v.IsRemoteEndpoint { - hnsPodIps[v.IPAddress.String()] = v.MacAddress + + switch { + case isObject: + var hnsResult HNSEndpoint + err := json.Unmarshal(result, &hnsResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal hns endpoint list") + } + if !hnsResult.IsRemoteEndpoint { + hnsPodIps[hnsResult.IPAddress.String()] = hnsResult.MacAddress + if hnsResult.IPv6Address.String() != "" { + hnsPodIps[hnsResult.IPv6Address.String()] = hnsResult.MacAddress + } } + case isArray: + var hnsResult []HNSEndpoint + err := json.Unmarshal(result, &hnsResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal hns endpoint list") + } + + for _, v := range hnsResult { + if !v.IsRemoteEndpoint { + hnsPodIps[v.IPAddress.String()] = v.MacAddress + if v.IPv6Address.String() != "" { + hnsPodIps[v.IPv6Address.String()] = v.MacAddress + } + + } + } + default: + log.Printf("Leading character is - %v", jsonType[0]) + return nil, errors.New("json is malformed and does not have correct leading character") } + return hnsPodIps, nil } @@ -165,7 +204,9 @@ func validateHNSNetworkState(ctx context.Context, nodes *corev1.NodeList, client if err != nil { return errors.Wrap(err, "failed to get privileged pod") } - + if len(pod.Items) == 0 { + return errors.Errorf("there are no privileged pods on node - %v", nodes.Items[index].Name) + } podName := pod.Items[0].Name // exec into the pod to get the state file result, err := acnk8s.ExecCmdOnPod(ctx, clientset, privilegedNamespace, podName, hnsNetworkCmd, restConfig)