From 78a577c11b4577e76945318c4957a4e342a172b1 Mon Sep 17 00:00:00 2001 From: Paul Yu <129891899+paulyufan2@users.noreply.github.com> Date: Wed, 27 Sep 2023 19:36:48 -0400 Subject: [PATCH] v4overlay windows test cases (#2187) * feat: adding in v4overlay windows tests * chore: address feedback * fix: addressing feedback --------- Co-authored-by: Paul Johnston --- .../azure-cni-overlay-e2e-step-template.yaml | 38 ++++++ hack/aks/Makefile | 2 +- hack/aks/README.md | 1 + .../datapath/datapath_windows_test.go | 12 ++ test/integration/load/load_test.go | 96 ++++++++++++++ test/internal/kubernetes/utils.go | 34 +++++ test/internal/kubernetes/utils_create.go | 119 +++++++++++++----- test/validate/linux_validate.go | 6 - test/validate/utils.go | 9 +- test/validate/validate.go | 93 +++++++++++--- test/validate/windows_validate.go | 71 ++++++++++- 11 files changed, 419 insertions(+), 62 deletions(-) diff --git a/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-step-template.yaml index 01501e9f83..1c4329d8a3 100644 --- a/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-step-template.yaml @@ -172,3 +172,41 @@ steps: name: "Cleanupartifactdir" displayName: "Cleanup artifact dir" condition: always() + + - task: AzureCLI@2 + inputs: + azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -e + make -C ./hack/aks windows-nodepool-up AZCLI=az SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) CLUSTER=${{ parameters.clusterName }} + echo "Windows node are successfully added to v4 Overlay Cluster" + kubectl cluster-info + kubectl get node -owide + kubectl get po -owide -A + name: "Add_Windows_Node" + displayName: "Add windows node on v4 overlay cluster" + + - script: | + nodeList=`kubectl get node -owide | grep Windows | awk '{print $1}'` + for node in $nodeList; do + taint=`kubectl describe node $node | grep Taints | awk '{print $2}'` + if [ $taint == "node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule" ]; then + kubectl taint nodes $node node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule- + fi + done + CNS=$(make cns-version) + sudo -E env "PATH=$PATH" make test-load SCALE_UP=32 OS_TYPE=windows CNI_TYPE=cniv2 VALIDATE_STATEFILE=true INSTALL_CNS=true INSTALL_AZURE_CNI_OVERLAY=true VALIDATE_V4OVERLAY=true CNS_VERSION=${CNS} CNI_DROPGZ_VERSION=$(dropgzVersion) TEST_DROPGZ=${{ parameters.testDropgz }} CLEANUP=true + name: "WindowsOverlayControlPlaneScaleTests" + displayName: "Windows v4Overlay ControlPlane Scale Tests" + retryCountOnTaskFailure: 3 + + - script: | + echo "IPv4 Overlay DataPath Test" + cd test/integration/datapath + sudo -E env "PATH=$PATH" go test -count=1 datapath_windows_test.go -timeout 3m -tags connection -restartKubeproxy true -run ^TestDatapathWin$ + name: "WindowsV4OverlayDatapathTests" + displayName: "Windows v4Overlay Datapath Tests" + retryCountOnTaskFailure: 3 diff --git a/hack/aks/Makefile b/hack/aks/Makefile index 85f3ec2d05..45156412ae 100644 --- a/hack/aks/Makefile +++ b/hack/aks/Makefile @@ -94,7 +94,7 @@ overlay-byocni-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster --node-os-upgrade-channel $(NODEUPGRADE) \ --node-count $(NODE_COUNT) \ --node-vm-size $(VM_SIZE) \ - --load-balancer-sku basic \ + --load-balancer-sku standard \ --network-plugin none \ --network-plugin-mode overlay \ --pod-cidr 192.168.0.0/16 \ diff --git a/hack/aks/README.md b/hack/aks/README.md index cfcb8154b9..e1884d05d4 100644 --- a/hack/aks/README.md +++ b/hack/aks/README.md @@ -35,6 +35,7 @@ AKS Clusters windows-cniv1-up Bring up a Windows AzCNIv1 cluster linux-cniv1-up Bring up a Linux AzCNIv1 cluster dualstack-overlay-byocni-up Bring up an dualstack overlay cluster without CNS and CNI installed + windows-nodepool-up Add windows node pool down Delete the cluster vmss-restart Restart the nodes of the cluster ``` diff --git a/test/integration/datapath/datapath_windows_test.go b/test/integration/datapath/datapath_windows_test.go index 8ce2fdf9b7..1b82a32b3b 100644 --- a/test/integration/datapath/datapath_windows_test.go +++ b/test/integration/datapath/datapath_windows_test.go @@ -10,6 +10,7 @@ import ( "github.com/Azure/azure-container-networking/test/internal/datapath" "github.com/Azure/azure-container-networking/test/internal/kubernetes" + "github.com/Azure/azure-container-networking/test/validate" "github.com/stretchr/testify/require" apiv1 "k8s.io/api/core/v1" ) @@ -25,6 +26,7 @@ var ( podPrefix = flag.String("podName", "datapod", "Prefix for test pods") podNamespace = flag.String("namespace", "windows-datapath-test", "Namespace for test pods") nodepoolSelector = flag.String("nodepoolSelector", "npwin", "Provides nodepool as a windows Node-Selector for pods") + restartKubeproxy = flag.Bool("restartKubeproxy", false, "restarts kubeproxy on the windows node") ) /* @@ -48,12 +50,22 @@ Timeout context is controled by the -timeout flag. func setupWindowsEnvironment(t *testing.T) { ctx := context.Background() + t.Log("Get REST config") + restConfig := kubernetes.MustGetRestConfig(t) + t.Log("Create Clientset") clientset, err := kubernetes.MustGetClientset() if err != nil { t.Fatal(err) } + if *restartKubeproxy { + validator, err := validate.CreateValidator(ctx, clientset, restConfig, *podNamespace, "cniv2", false, "windows") + require.NoError(t, err) + err = validator.RestartKubeProxyService(ctx) + require.NoError(t, err) + } + t.Log("Create Label Selectors") podLabelSelector := kubernetes.CreateLabelSelector(podLabelKey, podPrefix) nodeLabelSelector := kubernetes.CreateLabelSelector(nodepoolKey, nodepoolSelector) diff --git a/test/integration/load/load_test.go b/test/integration/load/load_test.go index 6ddbb68506..894420ba35 100644 --- a/test/integration/load/load_test.go +++ b/test/integration/load/load_test.go @@ -21,6 +21,7 @@ type TestConfig struct { Replicas int `env:"REPLICAS" default:"1"` ValidateStateFile bool `env:"VALIDATE_STATEFILE" default:"false"` ValidateDualStack bool `env:"VALIDATE_DUALSTACK" default:"false"` + ValidateV4Overlay bool `env:"VALIDATE_V4OVERLAY" default:"false"` SkipWait bool `env:"SKIP_WAIT" default:"false"` RestartCase bool `env:"RESTART_CASE" default:"false"` Cleanup bool `env:"CLEANUP" default:"false"` @@ -107,6 +108,10 @@ func TestLoad(t *testing.T) { t.Run("Validate state file", TestValidateState) } + if testConfig.ValidateV4Overlay { + t.Run("Validate v4overlay", TestV4OverlayProperties) + } + if testConfig.ValidateDualStack { t.Run("Validate dualstack overlay", TestDualStackProperties) } @@ -114,6 +119,8 @@ func TestLoad(t *testing.T) { if testConfig.Cleanup { err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment) require.NoError(t, err, "error deleteing load deployment") + err = kubernetes.WaitForPodsDelete(ctx, clientset, namespace, podLabelSelector) + require.NoError(t, err, "error waiting for pods to delete") } } @@ -171,7 +178,96 @@ func TestScaleDeployment(t *testing.T) { if testConfig.Cleanup { err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment) require.NoError(t, err, "error deleteing load deployment") + err = kubernetes.WaitForPodsDelete(ctx, clientset, namespace, podLabelSelector) + require.NoError(t, err, "error waiting for pods to delete") + } +} + +// TestValidCNSStateDuringScaleAndCNSRestartToTriggerDropgzInstall +// tests that dropgz install during a pod scaling event, does not crash cns +func TestValidCNSStateDuringScaleAndCNSRestartToTriggerDropgzInstall(t *testing.T) { + clientset, err := kubernetes.MustGetClientset() + require.NoError(t, err) + + config := kubernetes.MustGetRestConfig(t) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + validator, err := validate.CreateValidator(ctx, clientset, config, namespace, testConfig.CNIType, testConfig.RestartCase, testConfig.OSType) + require.NoError(t, err) + + err = validator.Validate(ctx) + require.NoError(t, err) + + deployment, err := kubernetes.MustParseDeployment(noopDeploymentMap[testConfig.OSType]) + require.NoError(t, err) + deploymentsClient := clientset.AppsV1().Deployments(namespace) + + if testConfig.Cleanup { + // Create a deployment + err = kubernetes.MustCreateDeployment(ctx, deploymentsClient, deployment) + require.NoError(t, err) } + + // Scale it up and "skipWait", so CNS restart can happen immediately after scale call is made (while pods are still creating) + skipWait := true + err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.ScaleUpReplicas, skipWait) + require.NoError(t, err) + + // restart linux CNS (linux, windows) + err = kubernetes.RestartCNSDaemonset(ctx, clientset) + require.NoError(t, err) + + // wait for pods to settle before checking cns state (otherwise, race between getting pods in creating state, and getting CNS state file) + err = kubernetes.WaitForPodDeployment(ctx, clientset, namespace, deployment.Name, podLabelSelector, testConfig.ScaleUpReplicas) + require.NoError(t, err) + + // Validate the CNS state + err = validator.Validate(ctx) + require.NoError(t, err) + + // Scale it down + err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.ScaleDownReplicas, skipWait) + require.NoError(t, err) + + // restart linux CNS (linux, windows) + err = kubernetes.RestartCNSDaemonset(ctx, clientset) + require.NoError(t, err) + + // wait for pods to settle before checking cns state (otherwise, race between getting pods in terminating state, and getting CNS state file) + err = kubernetes.WaitForPodDeployment(ctx, clientset, namespace, deployment.Name, podLabelSelector, testConfig.ScaleDownReplicas) + require.NoError(t, err) + + // Validate the CNS state + err = validator.Validate(ctx) + require.NoError(t, err) + + if testConfig.Cleanup { + err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment) + require.NoError(t, err, "error deleteing load deployment") + err = kubernetes.WaitForPodsDelete(ctx, clientset, namespace, podLabelSelector) + require.NoError(t, err, "error waiting for pods to delete") + } +} + +func TestV4OverlayProperties(t *testing.T) { + if !testConfig.ValidateV4Overlay { + return + } + clientset, err := kubernetes.MustGetClientset() + require.NoError(t, err) + + config := kubernetes.MustGetRestConfig(t) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + validator, err := validate.CreateValidator(ctx, clientset, config, namespace, testConfig.CNIType, testConfig.RestartCase, testConfig.OSType) + require.NoError(t, err) + + // validate IPv4 overlay scenarios + t.Log("Validating v4Overlay node labels") + err = validator.ValidateV4OverlayControlPlane(ctx) + require.NoError(t, err) } func TestDualStackProperties(t *testing.T) { diff --git a/test/internal/kubernetes/utils.go b/test/internal/kubernetes/utils.go index 4d63043444..73a0cdcc7b 100644 --- a/test/internal/kubernetes/utils.go +++ b/test/internal/kubernetes/utils.go @@ -233,6 +233,24 @@ func WaitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, na return errors.Wrap(retrier.Do(ctx, checkPodIPsFn), "failed to check if pods were running") } +func WaitForPodsDelete(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector string) error { + podsClient := clientset.CoreV1().Pods(namespace) + + checkPodsDeleted := func() error { + podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: labelselector}) + if err != nil { + return errors.Wrapf(err, "could not list pods with label selector %s", labelselector) + } + if len(podList.Items) != 0 { + return errors.Errorf("%d pods still present", len(podList.Items)) + } + return nil + } + + retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay} + return errors.Wrap(retrier.Do(ctx, checkPodsDeleted), "failed to wait for pods to delete") +} + func WaitForPodDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, deploymentName, podLabelSelector string, replicas int) error { podsClient := clientset.CoreV1().Pods(namespace) deploymentsClient := clientset.AppsV1().Deployments(namespace) @@ -431,3 +449,19 @@ func HasWindowsNodes(ctx context.Context, clientset *kubernetes.Clientset) (bool } return false, nil } + +func MustRestartDaemonset(ctx context.Context, clientset *kubernetes.Clientset, namespace, daemonsetName string) error { + ds, err := clientset.AppsV1().DaemonSets(namespace).Get(ctx, daemonsetName, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "failed to get daemonset %s", daemonsetName) + } + + if ds.Spec.Template.ObjectMeta.Annotations == nil { + ds.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + + ds.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + + _, err = clientset.AppsV1().DaemonSets(namespace).Update(ctx, ds, metav1.UpdateOptions{}) + return errors.Wrapf(err, "failed to update ds %s", daemonsetName) +} diff --git a/test/internal/kubernetes/utils_create.go b/test/internal/kubernetes/utils_create.go index 3f82fca174..a369667e8d 100644 --- a/test/internal/kubernetes/utils_create.go +++ b/test/internal/kubernetes/utils_create.go @@ -255,10 +255,50 @@ func InstallCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l return cleanupds, nil } -func loadCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, cnsVersion, cniDropgzVersion string, nodeOS corev1.OSName) (appsv1.DaemonSet, cnsDetails, error) { +func RestartCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset) error { + cniDropgzVersion := os.Getenv(envCNIDropgzVersion) + cnsVersion := os.Getenv(envCNSVersion) + cnsScenarioMap, err := initCNSScenarioVars() + if err != nil { + return errors.Wrap(err, "failed to initialize cns scenario map") + } + + oses := []corev1.OSName{corev1.Linux} + hasWinNodes, err := HasWindowsNodes(ctx, clientset) + if err != nil { + return errors.Wrap(err, "failed to check if cluster has windows nodes") + } + + if hasWinNodes { + // prepend windows so it's first os to restart, if present + oses = append([]corev1.OSName{corev1.Windows}, oses...) + } + + restartErrors := []error{} + for _, nodeOS := range oses { + cns, _, err := parseCNSDaemonset(cnsVersion, cniDropgzVersion, cnsScenarioMap, nodeOS) + if err != nil { + restartErrors = append(restartErrors, err) + } + + err = MustRestartDaemonset(ctx, clientset, cns.Namespace, cns.Name) + if err != nil { + restartErrors = append(restartErrors, err) + } + + } + + if len(restartErrors) > 0 { + log.Printf("Saw errors %+v", restartErrors) + return restartErrors[0] + } + return nil +} + +func initCNSScenarioVars() (map[CNSScenario]map[corev1.OSName]cnsDetails, error) { _, b, _, ok := runtime.Caller(0) if !ok { - return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(ErrPathNotFound, "could not get path to caller") + return map[CNSScenario]map[corev1.OSName]cnsDetails{}, errors.Wrap(ErrPathNotFound, "could not get path to caller") } basepath := filepath.Dir(b) cnsManifestFolder := path.Join(basepath, "../../integration/manifests/cns") @@ -380,6 +420,14 @@ func loadCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, cnsV }, } + return cnsScenarioMap, nil +} + +func loadCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, cnsVersion, cniDropgzVersion string, nodeOS corev1.OSName) (appsv1.DaemonSet, cnsDetails, error) { + cnsScenarioMap, err := initCNSScenarioVars() + if err != nil { + return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to initialize cns scenario map") + } cns, cnsScenarioDetails, err := setupCNSDaemonset(ctx, clientset, cnsVersion, cniDropgzVersion, cnsScenarioMap, nodeOS) if err != nil { return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to setup cns daemonset") @@ -396,6 +444,44 @@ func setupCNSDaemonset( cnsVersion, cniDropgzVersion string, cnsScenarioMap map[CNSScenario]map[corev1.OSName]cnsDetails, nodeOS corev1.OSName, +) (appsv1.DaemonSet, cnsDetails, error) { + cns, cnsScenarioDetails, err := parseCNSDaemonset(cnsVersion, cniDropgzVersion, cnsScenarioMap, nodeOS) + if err != nil { + return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to parse cns daemonset") + } + + log.Printf("Installing CNS with image %s", cns.Spec.Template.Spec.Containers[0].Image) + cnsDaemonsetClient := clientset.AppsV1().DaemonSets(cns.Namespace) + + // setup the CNS configmap + if err := MustSetupConfigMap(ctx, clientset, cnsScenarioDetails.configMapPath); err != nil { + return cns, cnsDetails{}, errors.Wrapf(err, "failed to setup CNS %s configMap", cnsScenarioDetails.configMapPath) + } + + // setup common RBAC, ClusteerRole, ClusterRoleBinding, ServiceAccount + if _, err := MustSetUpClusterRBAC(ctx, clientset, cnsScenarioDetails.clusterRolePath, cnsScenarioDetails.clusterRoleBindingPath, cnsScenarioDetails.serviceAccountPath); err != nil { + return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to setup common RBAC, ClusteerRole, ClusterRoleBinding and ServiceAccount") + } + + // setup RBAC, Role, RoleBinding + if err := MustSetUpRBAC(ctx, clientset, cnsScenarioDetails.rolePath, cnsScenarioDetails.roleBindingPath); err != nil { + return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to setup RBAC, Role and RoleBinding") + } + + if err := MustCreateDaemonset(ctx, cnsDaemonsetClient, cns); err != nil { + return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to create daemonset") + } + + if err := WaitForPodDaemonset(ctx, clientset, cns.Namespace, cns.Name, cnsScenarioDetails.labelSelector); err != nil { + return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to check daemonset running") + } + return cns, cnsScenarioDetails, nil +} + +// parseCNSDaemonset just parses the appropriate cns daemonset +func parseCNSDaemonset(cnsVersion, cniDropgzVersion string, + cnsScenarioMap map[CNSScenario]map[corev1.OSName]cnsDetails, + nodeOS corev1.OSName, ) (appsv1.DaemonSet, cnsDetails, error) { for scenario := range cnsScenarioMap { if ok, err := strconv.ParseBool(os.Getenv(string(scenario))); err != nil || !ok { @@ -403,7 +489,7 @@ func setupCNSDaemonset( continue } - log.Printf("installing %s", scenario) + log.Printf("%s set to 'true'", scenario) cnsScenarioDetails, ok := cnsScenarioMap[scenario][nodeOS] if !ok { @@ -434,33 +520,6 @@ func setupCNSDaemonset( if len(cnsScenarioDetails.containerVolumeMounts) > 0 { cns.Spec.Template.Spec.Containers[0].VolumeMounts = cnsScenarioDetails.containerVolumeMounts } - - // setup the CNS configmap - if err := MustSetupConfigMap(ctx, clientset, cnsScenarioDetails.configMapPath); err != nil { - return cns, cnsDetails{}, errors.Wrapf(err, "failed to setup CNS %s configMap", cnsScenarioDetails.configMapPath) - } - - cnsDaemonsetClient := clientset.AppsV1().DaemonSets(cns.Namespace) - - log.Printf("Installing CNS with image %s", cns.Spec.Template.Spec.Containers[0].Image) - - // setup common RBAC, ClusteerRole, ClusterRoleBinding, ServiceAccount - if _, err := MustSetUpClusterRBAC(ctx, clientset, cnsScenarioDetails.clusterRolePath, cnsScenarioDetails.clusterRoleBindingPath, cnsScenarioDetails.serviceAccountPath); err != nil { - return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to setup common RBAC, ClusteerRole, ClusterRoleBinding and ServiceAccount") - } - - // setup RBAC, Role, RoleBinding - if err := MustSetUpRBAC(ctx, clientset, cnsScenarioDetails.rolePath, cnsScenarioDetails.roleBindingPath); err != nil { - return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to setup RBAC, Role and RoleBinding") - } - - if err := MustCreateDaemonset(ctx, cnsDaemonsetClient, cns); err != nil { - return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to create daemonset") - } - - if err := WaitForPodDaemonset(ctx, clientset, cns.Namespace, cns.Name, cnsScenarioDetails.labelSelector); err != nil { - return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(err, "failed to check daemonset running") - } return cns, cnsScenarioDetails, nil } return appsv1.DaemonSet{}, cnsDetails{}, errors.Wrap(ErrNoCNSScenarioDefined, "no CNSSCenario env vars set to true, must explicitly set one to true") diff --git a/test/validate/linux_validate.go b/test/validate/linux_validate.go index e3f2768adc..a54ebed44e 100644 --- a/test/validate/linux_validate.go +++ b/test/validate/linux_validate.go @@ -23,12 +23,6 @@ var ( cnsLocalCacheCmd = []string{"curl", "localhost:10090/debug/ipaddresses", "-d", "{\"IPConfigStateFilter\":[\"Assigned\"]}"} ) -// dualstack overlay Linux and windows nodes must have these labels -var dualstackOverlayNodeLabels = map[string]string{ - "kubernetes.azure.com/podnetwork-type": "overlay", - "kubernetes.azure.com/podv6network-type": "overlay", -} - type stateFileIpsFunc func([]byte) (map[string]string, error) var linuxChecksMap = map[string][]check{ diff --git a/test/validate/utils.go b/test/validate/utils.go index 70d4d699d9..cb3b5daa4c 100644 --- a/test/validate/utils.go +++ b/test/validate/utils.go @@ -5,22 +5,23 @@ import ( "reflect" acnk8s "github.com/Azure/azure-container-networking/test/internal/kubernetes" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes" ) -func compareIPs(expected map[string]string, actual []string) bool { +func compareIPs(expected map[string]string, actual []string) error { if len(expected) != len(actual) { - return false + return errors.Errorf("len of expected IPs != len of actual IPs, expected: %+v, actual: %+v", expected, actual) } for _, ip := range actual { if _, ok := expected[ip]; !ok { - return false + return errors.Errorf("actual ip %s is unexpected, expected: %+v, actual: %+v", ip, expected, actual) } } - return true + return nil } // func to get the pods ip without the node ip (ie. host network as false) diff --git a/test/validate/validate.go b/test/validate/validate.go index 1adc50e2bc..944c5ba274 100644 --- a/test/validate/validate.go +++ b/test/validate/validate.go @@ -21,9 +21,22 @@ var nodeSelectorMap = map[string]string{ "linux": "kubernetes.io/os=linux", } +// IPv4 overlay Linux and windows nodes must have this label +var v4OverlayNodeLabels = map[string]string{ + "kubernetes.azure.com/podnetwork-type": "overlay", +} + +// dualstack overlay Linux and windows nodes must have these labels +var dualstackOverlayNodeLabels = map[string]string{ + "kubernetes.azure.com/podnetwork-type": "overlay", + "kubernetes.azure.com/podv6network-type": "overlay", +} + const ( - privilegedLabelSelector = "app=privileged-daemonset" - privilegedNamespace = "kube-system" + privilegedLabelSelector = "app=privileged-daemonset" + privilegedNamespace = "kube-system" + IPv4ExpectedIPCount = 1 + DualstackExpectedIPCount = 2 ) type Validator struct { @@ -165,9 +178,7 @@ func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFu // get the pod ips podIps := getPodIPsWithoutNodeIP(ctx, v.clientset, nodes.Items[index]) - check := compareIPs(filePodIps, podIps) - - if !check { + if err := compareIPs(filePodIps, podIps); err != nil { return errors.Wrapf(errors.New("State file validation failed"), "for %s on node %s", checkType, nodes.Items[index].Name) } } @@ -175,12 +186,8 @@ func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFu return nil } -func (v *Validator) validateDualStackNodeProperties(ctx context.Context) error { - log.Print("Validating Dualstack Overlay Node properties") - nodes, err := acnk8s.GetNodeListByLabelSelector(ctx, v.clientset, nodeSelectorMap[v.os]) - if err != nil { - return errors.Wrapf(err, "failed to get node list") - } +func validateNodeProperties(nodes *corev1.NodeList, labels map[string]string, expectedIPCount int) error { + log.Print("Validating Node properties") for index := range nodes.Items { nodeName := nodes.Items[index].ObjectMeta.Name @@ -188,29 +195,47 @@ func (v *Validator) validateDualStackNodeProperties(ctx context.Context) error { // nodes status should be ready after cluster is created nodeConditions := nodes.Items[index].Status.Conditions if nodeConditions[len(nodeConditions)-1].Type != corev1.NodeReady { - return errors.Wrapf(err, "node %s status is not ready", nodeName) + return errors.Errorf("node %s status is not ready", nodeName) } // get node labels nodeLabels := nodes.Items[index].ObjectMeta.GetLabels() for key := range nodeLabels { - if label, ok := dualstackOverlayNodeLabels[key]; ok { + if label, ok := labels[key]; ok { log.Printf("label %s is correctly shown on the node %+v", key, nodeName) if label != overlayClusterLabelName { - return errors.Wrapf(err, "node %s overlay label name is wrong; expected label:%s but actual label:%s", nodeName, overlayClusterLabelName, label) + return errors.Errorf("node %s overlay label name is wrong; expected label:%s but actual label:%s", nodeName, overlayClusterLabelName, label) } } } - // check if node has two internal IPs(one is IPv4 and another is IPv6) + // check if node has correct number of internal IPs internalIPCount := 0 for _, address := range nodes.Items[index].Status.Addresses { if address.Type == "InternalIP" { internalIPCount++ } } - if internalIPCount != 2 { //nolint - return errors.Wrap(err, "node does not have two internal IPs") + if internalIPCount != expectedIPCount { + return errors.Errorf("number of node internal IPs: %d does not match expected number of IPs %d", internalIPCount, expectedIPCount) + } + } + return nil +} + +func (v *Validator) ValidateV4OverlayControlPlane(ctx context.Context) error { + nodes, err := acnk8s.GetNodeListByLabelSelector(ctx, v.clientset, nodeSelectorMap[v.os]) + if err != nil { + return errors.Wrap(err, "failed to get node list") + } + + if err := validateNodeProperties(nodes, v4OverlayNodeLabels, IPv4ExpectedIPCount); err != nil { + return errors.Wrap(err, "failed to validate IPv4 overlay node properties") + } + + if v.os == "windows" { + if err := validateHNSNetworkState(ctx, nodes, v.clientset, v.config); err != nil { + return errors.Wrap(err, "failed to validate IPv4 overlay HNS network state") } } @@ -218,13 +243,45 @@ func (v *Validator) validateDualStackNodeProperties(ctx context.Context) error { } func (v *Validator) ValidateDualStackControlPlane(ctx context.Context) error { - if err := v.validateDualStackNodeProperties(ctx); err != nil { + nodes, err := acnk8s.GetNodeListByLabelSelector(ctx, v.clientset, nodeSelectorMap[v.os]) + if err != nil { + return errors.Wrap(err, "failed to get node list") + } + + if err := validateNodeProperties(nodes, dualstackOverlayNodeLabels, DualstackExpectedIPCount); err != nil { return errors.Wrap(err, "failed to validate dualstack overlay node properties") } return nil } +func (v *Validator) RestartKubeProxyService(ctx context.Context) error { + nodes, err := acnk8s.GetNodeList(ctx, v.clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + node := nodes.Items[index] + if node.Status.NodeInfo.OperatingSystem != string(corev1.Windows) { + continue + } + // get the privileged pod + pod, err := acnk8s.GetPodsByNode(ctx, v.clientset, privilegedNamespace, privilegedLabelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + + privelegedPod := pod.Items[0] + // exec into the pod and restart kubeproxy + _, err = acnk8s.ExecCmdOnPod(ctx, v.clientset, privilegedNamespace, privelegedPod.Name, restartKubeProxyCmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod - %s", privelegedPod.Name) + } + } + return nil +} + func (v *Validator) Cleanup(ctx context.Context) error { // deploy privileged pod privilegedDaemonSet, err := acnk8s.MustParseDaemonSet(privilegedDaemonSetPathMap[v.os]) diff --git a/test/validate/windows_validate.go b/test/validate/windows_validate.go index 5ebc0f4834..dd36634fd4 100644 --- a/test/validate/windows_validate.go +++ b/test/validate/windows_validate.go @@ -1,16 +1,24 @@ package validate import ( + "context" "encoding/json" + "log" "net" + acnk8s "github.com/Azure/azure-container-networking/test/internal/kubernetes" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" ) var ( - hnsEndPointCmd = []string{"powershell", "-c", "Get-HnsEndpoint | ConvertTo-Json"} - azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"} - azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"} + hnsEndPointCmd = []string{"powershell", "-c", "Get-HnsEndpoint | ConvertTo-Json"} + hnsNetworkCmd = []string{"powershell", "-c", "Get-HnsNetwork | ConvertTo-Json"} + azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"} + azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"} + restartKubeProxyCmd = []string{"powershell", "Restart-service", "kubeproxy"} ) var windowsChecksMap = map[string][]check{ @@ -31,6 +39,14 @@ type HNSEndpoint struct { IsRemoteEndpoint bool `json:",omitempty"` } +type HNSNetwork struct { + Name string `json:"Name"` + IPv6 bool `json:"IPv6"` + ManagementIP string `json:"ManagementIP"` + ManagementIPv6 string `json:"ManagementIPv6"` + State int `json:"State"` +} + type AzureVnet struct { NetworkInfo NetworkInfo `json:"Network"` } @@ -89,6 +105,17 @@ func hnsStateFileIps(result []byte) (map[string]string, error) { return hnsPodIps, nil } +// return windows HNS network state +func hnsNetworkState(result []byte) ([]HNSNetwork, error) { + var hnsNetworkResult []HNSNetwork + err := json.Unmarshal(result, &hnsNetworkResult) + if err != nil { + return nil, errors.Wrap(err, "failed to unmarshal HNS network state file") + } + + return hnsNetworkResult, nil +} + func azureVnetIps(result []byte) (map[string]string, error) { var azureVnetResult AzureVnet err := json.Unmarshal(result, &azureVnetResult) @@ -130,3 +157,41 @@ func azureVnetIpamIps(result []byte) (map[string]string, error) { } return azureVnetIpamPodIps, nil } + +func validateHNSNetworkState(ctx context.Context, nodes *corev1.NodeList, clientset *kubernetes.Clientset, restConfig *rest.Config) error { + // check windows HNS network state + for index := range nodes.Items { + pod, err := acnk8s.GetPodsByNode(ctx, clientset, privilegedNamespace, privilegedLabelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrap(err, "failed to get privileged pod") + } + + podName := pod.Items[0].Name + // exec into the pod to get the state file + result, err := acnk8s.ExecCmdOnPod(ctx, clientset, privilegedNamespace, podName, hnsNetworkCmd, restConfig) + if err != nil { + return errors.Wrap(err, "failed to exec into privileged pod") + } + + hnsNetwork, err := hnsNetworkState(result) + log.Printf("hnsNetwork: %+v", hnsNetwork) + if err != nil { + return errors.Wrap(err, "failed to unmarshal HNS state file") + } + + // check hns properties + if len(hnsNetwork) == 1 { + return errors.New("HNS default ext network or azure network does not exist") + } + + for _, network := range hnsNetwork { + if network.State != 1 { + return errors.New("windows HNS network state is not correct") + } + if network.ManagementIP == "" { + return errors.New("windows HNS network is missing ipv4 management IP") + } + } + } + return nil +}