Skip to content

Commit

Permalink
v4overlay windows test cases (#2187)
Browse files Browse the repository at this point in the history
* feat: adding in v4overlay windows tests

* chore: address feedback

* fix: addressing feedback

---------

Co-authored-by: Paul Johnston <johnstonpaul801@gmail.com>
  • Loading branch information
paulyufan2 and pjohnst5 authored Sep 27, 2023
1 parent a3e6682 commit 78a577c
Show file tree
Hide file tree
Showing 11 changed files with 419 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,41 @@ steps:
name: "Cleanupartifactdir"
displayName: "Cleanup artifact dir"
condition: always()
- task: AzureCLI@2
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptLocation: "inlineScript"
scriptType: "bash"
addSpnToEnvironment: true
inlineScript: |
set -e
make -C ./hack/aks windows-nodepool-up AZCLI=az SUB=$(SUB_AZURE_NETWORK_AGENT_BUILD_VALIDATIONS) CLUSTER=${{ parameters.clusterName }}
echo "Windows node are successfully added to v4 Overlay Cluster"
kubectl cluster-info
kubectl get node -owide
kubectl get po -owide -A
name: "Add_Windows_Node"
displayName: "Add windows node on v4 overlay cluster"

- script: |
nodeList=`kubectl get node -owide | grep Windows | awk '{print $1}'`
for node in $nodeList; do
taint=`kubectl describe node $node | grep Taints | awk '{print $2}'`
if [ $taint == "node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule" ]; then
kubectl taint nodes $node node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule-
fi
done
CNS=$(make cns-version)
sudo -E env "PATH=$PATH" make test-load SCALE_UP=32 OS_TYPE=windows CNI_TYPE=cniv2 VALIDATE_STATEFILE=true INSTALL_CNS=true INSTALL_AZURE_CNI_OVERLAY=true VALIDATE_V4OVERLAY=true CNS_VERSION=${CNS} CNI_DROPGZ_VERSION=$(dropgzVersion) TEST_DROPGZ=${{ parameters.testDropgz }} CLEANUP=true
name: "WindowsOverlayControlPlaneScaleTests"
displayName: "Windows v4Overlay ControlPlane Scale Tests"
retryCountOnTaskFailure: 3
- script: |
echo "IPv4 Overlay DataPath Test"
cd test/integration/datapath
sudo -E env "PATH=$PATH" go test -count=1 datapath_windows_test.go -timeout 3m -tags connection -restartKubeproxy true -run ^TestDatapathWin$
name: "WindowsV4OverlayDatapathTests"
displayName: "Windows v4Overlay Datapath Tests"
retryCountOnTaskFailure: 3
2 changes: 1 addition & 1 deletion hack/aks/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ overlay-byocni-up: rg-up overlay-net-up ## Brings up an Overlay BYO CNI cluster
--node-os-upgrade-channel $(NODEUPGRADE) \
--node-count $(NODE_COUNT) \
--node-vm-size $(VM_SIZE) \
--load-balancer-sku basic \
--load-balancer-sku standard \
--network-plugin none \
--network-plugin-mode overlay \
--pod-cidr 192.168.0.0/16 \
Expand Down
1 change: 1 addition & 0 deletions hack/aks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ AKS Clusters
windows-cniv1-up Bring up a Windows AzCNIv1 cluster
linux-cniv1-up Bring up a Linux AzCNIv1 cluster
dualstack-overlay-byocni-up Bring up an dualstack overlay cluster without CNS and CNI installed
windows-nodepool-up Add windows node pool
down Delete the cluster
vmss-restart Restart the nodes of the cluster
```
12 changes: 12 additions & 0 deletions test/integration/datapath/datapath_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/Azure/azure-container-networking/test/internal/datapath"
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
"github.com/Azure/azure-container-networking/test/validate"
"github.com/stretchr/testify/require"
apiv1 "k8s.io/api/core/v1"
)
Expand All @@ -25,6 +26,7 @@ var (
podPrefix = flag.String("podName", "datapod", "Prefix for test pods")
podNamespace = flag.String("namespace", "windows-datapath-test", "Namespace for test pods")
nodepoolSelector = flag.String("nodepoolSelector", "npwin", "Provides nodepool as a windows Node-Selector for pods")
restartKubeproxy = flag.Bool("restartKubeproxy", false, "restarts kubeproxy on the windows node")
)

/*
Expand All @@ -48,12 +50,22 @@ Timeout context is controled by the -timeout flag.
func setupWindowsEnvironment(t *testing.T) {
ctx := context.Background()

t.Log("Get REST config")
restConfig := kubernetes.MustGetRestConfig(t)

t.Log("Create Clientset")
clientset, err := kubernetes.MustGetClientset()
if err != nil {
t.Fatal(err)
}

if *restartKubeproxy {
validator, err := validate.CreateValidator(ctx, clientset, restConfig, *podNamespace, "cniv2", false, "windows")
require.NoError(t, err)
err = validator.RestartKubeProxyService(ctx)
require.NoError(t, err)
}

t.Log("Create Label Selectors")
podLabelSelector := kubernetes.CreateLabelSelector(podLabelKey, podPrefix)
nodeLabelSelector := kubernetes.CreateLabelSelector(nodepoolKey, nodepoolSelector)
Expand Down
96 changes: 96 additions & 0 deletions test/integration/load/load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type TestConfig struct {
Replicas int `env:"REPLICAS" default:"1"`
ValidateStateFile bool `env:"VALIDATE_STATEFILE" default:"false"`
ValidateDualStack bool `env:"VALIDATE_DUALSTACK" default:"false"`
ValidateV4Overlay bool `env:"VALIDATE_V4OVERLAY" default:"false"`
SkipWait bool `env:"SKIP_WAIT" default:"false"`
RestartCase bool `env:"RESTART_CASE" default:"false"`
Cleanup bool `env:"CLEANUP" default:"false"`
Expand Down Expand Up @@ -107,13 +108,19 @@ func TestLoad(t *testing.T) {
t.Run("Validate state file", TestValidateState)
}

if testConfig.ValidateV4Overlay {
t.Run("Validate v4overlay", TestV4OverlayProperties)
}

if testConfig.ValidateDualStack {
t.Run("Validate dualstack overlay", TestDualStackProperties)
}

if testConfig.Cleanup {
err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err, "error deleteing load deployment")
err = kubernetes.WaitForPodsDelete(ctx, clientset, namespace, podLabelSelector)
require.NoError(t, err, "error waiting for pods to delete")
}
}

Expand Down Expand Up @@ -171,7 +178,96 @@ func TestScaleDeployment(t *testing.T) {
if testConfig.Cleanup {
err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err, "error deleteing load deployment")
err = kubernetes.WaitForPodsDelete(ctx, clientset, namespace, podLabelSelector)
require.NoError(t, err, "error waiting for pods to delete")
}
}

// TestValidCNSStateDuringScaleAndCNSRestartToTriggerDropgzInstall
// tests that dropgz install during a pod scaling event, does not crash cns
func TestValidCNSStateDuringScaleAndCNSRestartToTriggerDropgzInstall(t *testing.T) {
clientset, err := kubernetes.MustGetClientset()
require.NoError(t, err)

config := kubernetes.MustGetRestConfig(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()

validator, err := validate.CreateValidator(ctx, clientset, config, namespace, testConfig.CNIType, testConfig.RestartCase, testConfig.OSType)
require.NoError(t, err)

err = validator.Validate(ctx)
require.NoError(t, err)

deployment, err := kubernetes.MustParseDeployment(noopDeploymentMap[testConfig.OSType])
require.NoError(t, err)
deploymentsClient := clientset.AppsV1().Deployments(namespace)

if testConfig.Cleanup {
// Create a deployment
err = kubernetes.MustCreateDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err)
}

// Scale it up and "skipWait", so CNS restart can happen immediately after scale call is made (while pods are still creating)
skipWait := true
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.ScaleUpReplicas, skipWait)
require.NoError(t, err)

// restart linux CNS (linux, windows)
err = kubernetes.RestartCNSDaemonset(ctx, clientset)
require.NoError(t, err)

// wait for pods to settle before checking cns state (otherwise, race between getting pods in creating state, and getting CNS state file)
err = kubernetes.WaitForPodDeployment(ctx, clientset, namespace, deployment.Name, podLabelSelector, testConfig.ScaleUpReplicas)
require.NoError(t, err)

// Validate the CNS state
err = validator.Validate(ctx)
require.NoError(t, err)

// Scale it down
err = kubernetes.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, testConfig.ScaleDownReplicas, skipWait)
require.NoError(t, err)

// restart linux CNS (linux, windows)
err = kubernetes.RestartCNSDaemonset(ctx, clientset)
require.NoError(t, err)

// wait for pods to settle before checking cns state (otherwise, race between getting pods in terminating state, and getting CNS state file)
err = kubernetes.WaitForPodDeployment(ctx, clientset, namespace, deployment.Name, podLabelSelector, testConfig.ScaleDownReplicas)
require.NoError(t, err)

// Validate the CNS state
err = validator.Validate(ctx)
require.NoError(t, err)

if testConfig.Cleanup {
err = kubernetes.MustDeleteDeployment(ctx, deploymentsClient, deployment)
require.NoError(t, err, "error deleteing load deployment")
err = kubernetes.WaitForPodsDelete(ctx, clientset, namespace, podLabelSelector)
require.NoError(t, err, "error waiting for pods to delete")
}
}

func TestV4OverlayProperties(t *testing.T) {
if !testConfig.ValidateV4Overlay {
return
}
clientset, err := kubernetes.MustGetClientset()
require.NoError(t, err)

config := kubernetes.MustGetRestConfig(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()

validator, err := validate.CreateValidator(ctx, clientset, config, namespace, testConfig.CNIType, testConfig.RestartCase, testConfig.OSType)
require.NoError(t, err)

// validate IPv4 overlay scenarios
t.Log("Validating v4Overlay node labels")
err = validator.ValidateV4OverlayControlPlane(ctx)
require.NoError(t, err)
}

func TestDualStackProperties(t *testing.T) {
Expand Down
34 changes: 34 additions & 0 deletions test/internal/kubernetes/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,24 @@ func WaitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, na
return errors.Wrap(retrier.Do(ctx, checkPodIPsFn), "failed to check if pods were running")
}

func WaitForPodsDelete(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector string) error {
podsClient := clientset.CoreV1().Pods(namespace)

checkPodsDeleted := func() error {
podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: labelselector})
if err != nil {
return errors.Wrapf(err, "could not list pods with label selector %s", labelselector)
}
if len(podList.Items) != 0 {
return errors.Errorf("%d pods still present", len(podList.Items))
}
return nil
}

retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay}
return errors.Wrap(retrier.Do(ctx, checkPodsDeleted), "failed to wait for pods to delete")
}

func WaitForPodDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, deploymentName, podLabelSelector string, replicas int) error {
podsClient := clientset.CoreV1().Pods(namespace)
deploymentsClient := clientset.AppsV1().Deployments(namespace)
Expand Down Expand Up @@ -431,3 +449,19 @@ func HasWindowsNodes(ctx context.Context, clientset *kubernetes.Clientset) (bool
}
return false, nil
}

func MustRestartDaemonset(ctx context.Context, clientset *kubernetes.Clientset, namespace, daemonsetName string) error {
ds, err := clientset.AppsV1().DaemonSets(namespace).Get(ctx, daemonsetName, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "failed to get daemonset %s", daemonsetName)
}

if ds.Spec.Template.ObjectMeta.Annotations == nil {
ds.Spec.Template.ObjectMeta.Annotations = make(map[string]string)
}

ds.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339)

_, err = clientset.AppsV1().DaemonSets(namespace).Update(ctx, ds, metav1.UpdateOptions{})
return errors.Wrapf(err, "failed to update ds %s", daemonsetName)
}
Loading

0 comments on commit 78a577c

Please sign in to comment.