From ef2ae4be38fe8d582e10140f7064504e3cf3abbb Mon Sep 17 00:00:00 2001 From: Mike Zappa Date: Thu, 25 Jan 2024 10:53:55 -0700 Subject: [PATCH] Enable Hubble in PR pipeline with additional stage (#2534) enable hubble in pr with additional stage --- .pipelines/pipeline.yaml | 12 + .../cilium-overlay-e2e-job-template.yaml | 86 ++++++++ .../cilium-overlay-e2e-step-template.yaml | 206 ++++++++++++++++++ .../cilium-agent/templates/daemonset.tpl | 14 +- .../cilium-operator/templates/deployment.tpl | 2 +- 5 files changed, 312 insertions(+), 8 deletions(-) create mode 100644 .pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-job-template.yaml create mode 100644 .pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml diff --git a/.pipelines/pipeline.yaml b/.pipelines/pipeline.yaml index 1a9d8c3583..8603f749bb 100644 --- a/.pipelines/pipeline.yaml +++ b/.pipelines/pipeline.yaml @@ -420,6 +420,18 @@ stages: k8sVersion: "" dependsOn: "test" + # Cilium Overlay with hubble E2E tests + - template: singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-job-template.yaml + parameters: + name: "cilium_h_overlay_e2e" + displayName: Cilium on AKS Overlay with Hubble + clusterType: overlay-byocni-nokubeproxy-up + clusterName: "cilwhleovere2e" + vmSize: Standard_B2ms + k8sVersion: "" + dependsOn: "test" + testHubble: true + # Azure Overlay E2E tests - template: singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-job-template.yaml parameters: diff --git a/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-job-template.yaml b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-job-template.yaml new file mode 100644 index 0000000000..01ab0823eb --- /dev/null +++ b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-job-template.yaml @@ -0,0 +1,86 @@ +parameters: + name: "" + displayName: "" + clusterType: "" + clusterName: "" + vmSize: "" + k8sVersion: "" + dependsOn: "" + os: "linux" + testHubble: false + +stages: + - stage: ${{ parameters.clusterName }} + displayName: Create Cluster - ${{ parameters.displayName }} + dependsOn: + - ${{ parameters.dependsOn }} + - setup + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + variables: + commitID: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.commitID'] ] + jobs: + - template: ../../templates/create-cluster.yaml + parameters: + name: ${{ parameters.name }} + displayName: ${{ parameters.displayName }} + clusterType: ${{ parameters.clusterType }} + clusterName: ${{ parameters.clusterName }}-$(commitID) + vmSize: ${{ parameters.vmSize }} + k8sVersion: ${{ parameters.k8sVersion }} + dependsOn: ${{ parameters.dependsOn }} + region: $(REGION_AKS_CLUSTER_TEST) + + - stage: ${{ parameters.name }} + displayName: E2E - ${{ parameters.displayName }} + dependsOn: + - setup + - publish + - ${{ parameters.clusterName }} + variables: + commitID: $[ stagedependencies.setup.env.outputs['EnvironmentalVariables.commitID'] ] + GOPATH: "$(Agent.TempDirectory)/go" # Go workspace path + GOBIN: "$(GOPATH)/bin" # Go binaries path + modulePath: "$(GOPATH)/src/github.com/Azure/azure-container-networking" + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + jobs: + - job: ${{ parameters.name }} + displayName: Cilium Overlay Test Suite - (${{ parameters.name }}) + timeoutInMinutes: 120 + pool: + name: $(BUILD_POOL_NAME_DEFAULT) + demands: + - agent.os -equals Linux + - Role -equals $(CUSTOM_E2E_ROLE) + steps: + - template: cilium-overlay-e2e-step-template.yaml + parameters: + name: ${{ parameters.name }} + clusterName: ${{ parameters.clusterName }}-$(commitID) + testHubble: ${{ parameters.testHubble }} + + - template: ../../cni/k8s-e2e/k8s-e2e-job-template.yaml + parameters: + sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cilium + dependsOn: ${{ parameters.name }} + datapath: true + dns: true + portforward: true + service: true + + - job: failedE2ELogs + displayName: "Failure Logs" + dependsOn: + - ${{ parameters.name }} + - cni_${{ parameters.os }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cilium diff --git a/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml new file mode 100644 index 0000000000..0e59910c8b --- /dev/null +++ b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml @@ -0,0 +1,206 @@ +parameters: + name: "" + clusterName: "" + testHubble: false + +steps: + - bash: | + echo $UID + sudo rm -rf $(System.DefaultWorkingDirectory)/* + displayName: "Set up OS environment" + + - checkout: self + + - bash: | + go version + go env + mkdir -p '$(GOBIN)' + mkdir -p '$(GOPATH)/pkg' + mkdir -p '$(modulePath)' + echo '##vso[task.prependpath]$(GOBIN)' + echo '##vso[task.prependpath]$(GOROOT)/bin' + name: "GoEnv" + displayName: "Set up the Go environment" + + - task: KubectlInstaller@0 + inputs: + kubectlVersion: latest + + - task: AzureCLI@1 + inputs: + azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -e + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }} + ls -lah + kubectl apply -f test/integration/manifests/cilium/v1.14.4/cilium-config/cilium-config-hubble.yaml + kubectl apply -f test/integration/manifests/cilium/v1.14.4/cilium-agent/files + kubectl apply -f test/integration/manifests/cilium/v1.14.4/cilium-operator/files + envsubst '${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/v1.14.4/cilium-agent/templates/daemonset.tpl | kubectl apply -f - + envsubst '${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/v1.14.4/cilium-operator/templates/deployment.tpl | kubectl apply -f - + # Use different file directories for nightly and current cilium version + name: "installCilium" + displayName: "Install Cilium on AKS Overlay" + + - script: | + echo "install cilium CLI" + if [[ ${CILIUM_VERSION_TAG} =~ ^1.1[1-3].[0-9]{1,2} ]]; then + echo "Cilium Agent Version ${BASH_REMATCH[0]}" + CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable-v0.14.txt) + else + echo "Cilium Agent Version ${CILIUM_VERSION_TAG}" + CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/master/stable.txt) + fi + CLI_ARCH=amd64 + if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi + curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} + sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum + sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin + rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} + cilium status + cilium version + name: "installCiliumCLI" + displayName: "Install Cilium CLI" + + - script: | + echo "Start Azilium E2E Tests on Overlay Cluster" + if [ "$CILIUM_VERSION_TAG" = "cilium-nightly-pipeline" ] + then + CNS=$(CNS_VERSION) IPAM=$(AZURE_IPAM_VERSION) && echo "Running nightly" + else + CNS=$(make cns-version) IPAM=$(make azure-ipam-version) + fi + sudo -E env "PATH=$PATH" make test-integration AZURE_IPAM_VERSION=${IPAM} CNS_VERSION=${CNS} INSTALL_CNS=true INSTALL_OVERLAY=true + retryCountOnTaskFailure: 3 + name: "aziliumTest" + displayName: "Run Azilium E2E on AKS Overlay" + + - script: | + echo "Status of the nodes and pods after the test" + kubectl get nodes -o wide + kubectl get pods -A -o wide + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + sudo cp test/integration/logs/* $ARTIFACT_DIR + name: "GetLogs" + displayName: "Get logs" + condition: always() + + - task: PublishBuildArtifacts@1 + inputs: + artifactName: test-output + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output" + condition: always() + + - script: | + kubectl get pods -A + echo "Waiting < 2 minutes for cilium to be ready" + # Ensure Cilium is ready Xm\Xs + cilium status --wait --wait-duration 2m + retryCountOnTaskFailure: 3 + name: "CiliumStatus" + displayName: "Cilium Status" + + - script: | + echo "Run Cilium Connectivity Tests" + cilium status + cilium connectivity test --connect-timeout 4s --request-timeout 30s --test '!pod-to-pod-encryption,!node-to-node-encryption' + retryCountOnTaskFailure: 3 + name: "ciliumConnectivityTests" + displayName: "Run Cilium Connectivity Tests" + + - ${{ if eq( parameters['testHubble'], true) }}: + - script: | + echo "enable Hubble metrics server" + kubectl apply -f test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml + kubectl apply -f test/integration/manifests/cilium/v1.14.4/cilium-config/cilium-config-hubble.yaml + kubectl rollout restart ds cilium -n kube-system + echo "wait <3 minutes for pods to be ready after restart" + kubectl rollout status ds cilium -n kube-system --timeout=3m + kubectl get pods -Aowide + echo "verify Hubble metrics endpoint is usable" + go test ./test/integration/networkobservability -v -tags=networkobservability + retryCountOnTaskFailure: 3 + name: "HubbleConnectivityTests" + displayName: "Run Hubble Connectivity Tests" + + - script: | + echo "validate pod IP assignment and check systemd-networkd restart" + kubectl get pod -owide -A + # Deleting echo-external-node deployment until cilium version matches TODO. https://github.com/cilium/cilium-cli/issues/67 is addressing the change. + # Saves 17 minutes + kubectl delete deploy -n cilium-test echo-external-node + if [ "$CILIUM_VERSION_TAG" = "cilium-nightly-pipeline" ]; then + echo "Check cilium identities in cilium-test namepsace during nightly run" + echo "expect the identities to be deleted when the namespace is deleted" + kubectl get ciliumidentity | grep cilium-test + fi + make test-validate-state + echo "delete cilium connectivity test resources and re-validate state" + kubectl delete ns cilium-test + kubectl get pod -owide -A + make test-validate-state + name: "validatePods" + displayName: "Validate Pods" + + - script: | + if [ "$CILIUM_VERSION_TAG" = "cilium-nightly-pipeline" ]; then + kubectl get pod -owide -n cilium-test + echo "wait for pod and cilium identity deletion in cilium-test namespace" + ns="cilium-test" + while true; do + pods=$(kubectl get pods -n $ns --no-headers=true 2>/dev/null) + if [[ -z "$pods" ]]; then + echo "No pods found" + break + fi + sleep 2s + done + sleep 20s + echo "Verify cilium identities are deleted from cilium-test" + checkIdentity="$(kubectl get ciliumidentity -o json | grep cilium-test | jq -e 'length == 0')" + if [[ -n $checkIdentity ]]; then + echo "##[error]Cilium Identities still present in cilium-test namespace" + else + printf -- "Identities deleted from cilium-test namespace\n" + fi + else + echo "skip cilium identities check for PR pipeline" + fi + name: "CiliumIdentities" + displayName: "Verify Cilium Identities Deletion" + + - script: | + echo "validate pod IP assignment before CNS restart" + kubectl get pod -owide -A + make test-validate-state + echo "restart CNS" + kubectl rollout restart ds azure-cns -n kube-system + kubectl rollout status ds azure-cns -n kube-system + kubectl get pod -owide -A + echo "validate pods after CNS restart" + make test-validate-state + name: "restartCNS" + displayName: "Restart CNS and validate pods" + + - script: | + echo "Run wireserver and metadata connectivity Tests" + bash test/network/wireserver_metadata_test.sh + retryCountOnTaskFailure: 3 + name: "WireserverMetadataConnectivityTests" + displayName: "Run Wireserver and Metadata Connectivity Tests" + + - script: | + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo rm -rf test/integration/logs + name: "Cleanupartifactdir" + displayName: "Cleanup artifact dir" + condition: always() diff --git a/test/integration/manifests/cilium/v1.14.4/cilium-agent/templates/daemonset.tpl b/test/integration/manifests/cilium/v1.14.4/cilium-agent/templates/daemonset.tpl index a710c23360..964461d0a9 100644 --- a/test/integration/manifests/cilium/v1.14.4/cilium-agent/templates/daemonset.tpl +++ b/test/integration/manifests/cilium/v1.14.4/cilium-agent/templates/daemonset.tpl @@ -66,7 +66,7 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CLUSTERMESH_CONFIG value: /var/lib/cilium/clustermesh/ - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 10 @@ -163,7 +163,7 @@ spec: hostNetwork: true initContainers: - name: install-cni-binaries - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent command: - "/install-plugin.sh" @@ -192,7 +192,7 @@ spec: value: /run/cilium/cgroupv2 - name: BIN_PATH value: /opt/cni/bin - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent name: mount-cgroup resources: {} @@ -224,7 +224,7 @@ spec: env: - name: BIN_PATH value: /opt/cni/bin - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent name: apply-sysctl-overwrites resources: {} @@ -252,7 +252,7 @@ spec: - /bin/bash - -c - -- - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent name: mount-bpf-fs resources: {} @@ -279,7 +279,7 @@ spec: key: clean-cilium-bpf-state name: cilium-config optional: true - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent name: clean-cilium-state resources: @@ -338,7 +338,7 @@ spec: name: host-usr-lib readOnly: true - name: block-wireserver - image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/cilium:1.14.4 imagePullPolicy: IfNotPresent command: - /bin/bash diff --git a/test/integration/manifests/cilium/v1.14.4/cilium-operator/templates/deployment.tpl b/test/integration/manifests/cilium/v1.14.4/cilium-operator/templates/deployment.tpl index 2842221eee..f613016372 100644 --- a/test/integration/manifests/cilium/v1.14.4/cilium-operator/templates/deployment.tpl +++ b/test/integration/manifests/cilium/v1.14.4/cilium-operator/templates/deployment.tpl @@ -29,7 +29,7 @@ spec: spec: containers: - name: cilium-operator - image: $CILIUM_IMAGE_REGISTRY/cilium/operator-generic:$CILIUM_VERSION_TAG + image: $CILIUM_IMAGE_REGISTRY/cilium/operator-generic:1.14.4 imagePullPolicy: IfNotPresent command: - cilium-operator-generic