Skip to content

Conformance EKS (ci-eks) #593

Conformance EKS (ci-eks)

Conformance EKS (ci-eks) #593

name: Conformance EKS (ci-eks)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
push:
branches:
- 'renovate/main-**'
# Run every 8 hours
schedule:
- cron: '0 1/8 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To read actions state with catchpoint/workflow-telemetry-action
actions: read
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'push' && github.sha) ||
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
clusterName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}
cilium_cli_ci_version:
# renovate: datasource=github-releases depName=eksctl-io/eksctl
eksctl_version: v0.190.0
# renovate: datasource=github-releases depName=kubernetes/kubernetes
kubectl_version: v1.31.1
jobs:
echo-inputs:
if: ${{ github.event_name == 'workflow_dispatch' }}
name: Echo Workflow Dispatch Inputs
runs-on: ubuntu-22.04
steps:
- name: Echo Workflow Dispatch Inputs
run: |
echo '${{ tojson(inputs) }}'
commit-status-start:
name: Commit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
generate-matrix:
name: Generate Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
empty: ${{ steps.set-matrix.outputs.empty }}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Convert YAML to JSON
run: |
work_dir=".github/actions/eks"
destination_directory="/tmp/generated/eks"
mkdir -p "${destination_directory}"
yq -o=json "${work_dir}/k8s-versions.yaml" | jq . > "${destination_directory}/eks.json"
- name: Generate Matrix
run: |
cd /tmp/generated/eks
# Use complete matrix in case of scheduled run
# main -> event_name = schedule
# other stable branches -> PR-number starting with v (e.g. v1.14)
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.PR-number }}" == v* ]];then
cp eks.json /tmp/matrix.json
else
jq '{ "include": [ .include[] | select(.default) ] }' eks.json > /tmp/matrix.json
fi
echo "Generated matrix:"
cat /tmp/matrix.json
# We use latest eksctl just to fetch recent supported versions.
# We don't use that eksctl to create cluster.
# Eksctl has hardcoded list of supported versions in the binary.
# This is hack until https://github.com/aws/containers-roadmap/issues/982 is resolved.
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Filter Matrix
id: set-matrix
run: |
cp /tmp/matrix.json /tmp/result.json
jq -c '.include[]' /tmp/matrix.json | while read i; do
VERSION=$(echo $i | jq -r '.version')
eksctl version -o json | jq -r '.EKSServerSupportedVersions[]' > /tmp/output
if grep -q -F $VERSION /tmp/output; then
echo "Version $VERSION is supported"
else
echo "::notice::Removing version $VERSION as it's not supported"
jq 'del(.include[] | select(.version == "'$VERSION'"))' /tmp/result.json > /tmp/result.json.tmp
mv /tmp/result.json.tmp /tmp/result.json
fi
done
echo "Filtered matrix:"
cat /tmp/result.json
echo "matrix=$(jq -c . < /tmp/result.json)" >> $GITHUB_OUTPUT
echo "empty=$(jq '(.include | length) == 0' /tmp/result.json)" >> $GITHUB_OUTPUT
installation-and-connectivity:
name: Installation and Connectivity Test
needs: generate-matrix
if: ${{ needs.generate-matrix.outputs.empty == 'false' }}
runs-on: ubuntu-latest
timeout-minutes: 90
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Collect Workflow Telemetry
uses: catchpoint/workflow-telemetry-action@94c3c3d9567a0205de6da68a76c428ce4e769af1 # v2.0.0
with:
comment_on_pr: false
- name: Checkout context ref (trusted)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Cleanup Disk space in runner
uses: ./.github/actions/disk-cleanup
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ inputs.SHA }}
chart-dir: ./untrusted/install/kubernetes/cilium
- name: Set up job variables
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
OWNER="${{ inputs.PR-number }}"
else
OWNER="${{ github.ref_name }}"
OWNER="${OWNER//[.\/]/-}"
fi
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--helm-set=cluster.name=${{ env.clusterName }} \
--helm-set=hubble.relay.enabled=true \
--helm-set loadBalancer.l7.backend=envoy \
--helm-set tls.secretsBackend=k8s \
--helm-set=bpf.monitorAggregation=none \
--wait=false"
if [[ "${{ matrix.ipsec }}" == "true" ]]; then
CILIUM_INSTALL_DEFAULTS+=" --helm-set encryption.enabled=true --helm-set encryption.type=ipsec"
fi
if [[ "${{ matrix.kpr }}" == "true" ]]; then
CILIUM_INSTALL_DEFAULTS+=" --helm-set kubeProxyReplacement=true"
fi
CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --test-concurrency=3 \
--collect-sysdump-on-failure --external-target amazon.com."
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT
echo owner=${OWNER} >> $GITHUB_OUTPUT
- name: Install kubectl
run: |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl"
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256"
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/download/${{ env.eksctl_version }}/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Set up AWS CLI credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
role-to-assume: ${{ secrets.AWS_PR_ASSUME_ROLE }}
aws-region: ${{ matrix.region }}
- name: Run aws configure
run: |
aws configure set aws_access_key_id ${{ env.AWS_ACCESS_KEY_ID }}
aws configure set aws_secret_access_key ${{ env.AWS_SECRET_ACCESS_KEY }}
aws configure set aws_session_token ${{ env.AWS_SESSION_TOKEN }}
aws configure set default.region ${{ env.AWS_REGION }}
- name: Create EKS cluster
uses: ./.github/actions/setup-eks-cluster
with:
cluster_name: ${{ env.clusterName }}
region: ${{ matrix.region }}
owner: "${{ steps.vars.outputs.owner }}"
version: ${{ matrix.version }}
spot: false
- name: Install Cilium CLI
uses: cilium/cilium-cli@fff38e882846c03f1720dad476e459323275ab9c # v0.16.17
with:
skip-build: ${{ env.CILIUM_CLI_SKIP_BUILD }}
image-repo: ${{ env.CILIUM_CLI_IMAGE_REPO }}
image-tag: ${{ steps.vars.outputs.sha }}
- name: Create IPsec key
if: ${{ matrix.ipsec == true }}
shell: bash
run: |
KEYID=15
kubectl create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${KEYID} rfc4106(gcm(aes)) $(dd if=/dev/urandom count=20 bs=1 2> /dev/null | xxd -p -c 64) 128"
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-aws-ci hubble-relay-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
- name: Make sure images available from cluster
run: |
kubectl create -f - <<EOF
apiVersion: batch/v1
kind: Job
metadata:
name: wait-for-images
spec:
completions: 1
backoffLimit: 3
template:
spec:
containers:
- name: wait-for-images
image: quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/cilium-ci:${{ steps.vars.outputs.sha }}
command: ["true"]
tolerations:
- key: "node.cilium.io/agent-not-ready"
operator: "Equal"
value: "true"
effect: "NoExecute"
restartPolicy: Never
EOF
kubectl wait --for=condition=complete --timeout=10m job/wait-for-images
# This is a workaround for flake #16938.
- name: Remove AWS-CNI
run: |
kubectl -n kube-system delete daemonset aws-node
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
path: untrusted
sparse-checkout: |
install/kubernetes/cilium
- name: Install Cilium
id: install-cilium
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Wait for Cilium to be ready
run: |
cilium status --wait --wait-duration=10m
kubectl get pods -n kube-system
- name: Check that AWS leftover iptables chains have been removed
run: |
for pod in $(kubectl get po -n kube-system -l app.kubernetes.io/name=cilium-agent -o name); do
echo "Checking ${pod}"
if kubectl exec -n kube-system ${pod} -c cilium-agent -- iptables-save | grep --silent ':AWS'; then
echo "Unexpected AWS leftover iptables chains"
kubectl exec -n kube-system ds/cilium -- iptables-save | grep ':AWS'
exit 1
fi
done
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}) - 1.xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Setup conn-disrupt-test before rotating (${{ join(matrix.*, ', ') }})
if: ${{ matrix.ipsec == true }}
uses: ./.github/actions/conn-disrupt-test-setup
- name: Run IPsec key rotation tests (${{ join(matrix.*, ', ') }})
if: ${{ matrix.ipsec == true }}
uses: ./.github/actions/ipsec-key-rotate
with:
key-algo: "gcm(aes)"
key-type-one: ""
key-type-two: ""
- name: Check conn-disrupt-test after rotating (${{ join(matrix.*, ', ') }})
if: ${{ matrix.ipsec == true }}
uses: ./.github/actions/conn-disrupt-test-check
with:
full-test: 'true'
extra-connectivity-test-flags: ${{ steps.vars.outputs.connectivity_test_defaults }}
- name: Post-test information gathering
if: ${{ !success() && steps.install-cilium.outcome != 'skipped' }}
run: |
kubectl get pods --all-namespaces -o wide
cilium status
cilium sysdump --output-filename cilium-sysdump-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: cilium-sysdumps-${{ matrix.version }}
path: cilium-sysdump-*.zip
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: cilium-junits-${{ matrix.version }}
path: cilium-junits/*.xml
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
merge-upload:
if: ${{ always() && needs.installation-and-connectivity.result != 'skipped' }}
name: Merge and Upload Artifacts
runs-on: ubuntu-latest
needs: installation-and-connectivity
steps:
- name: Merge Sysdumps
if: ${{ needs.installation-and-connectivity.result == 'failure' }}
uses: actions/upload-artifact/merge@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: cilium-sysdumps
pattern: cilium-sysdumps-*
retention-days: 5
delete-merged: true
continue-on-error: true
- name: Merge JUnits
uses: actions/upload-artifact/merge@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: cilium-junits
pattern: cilium-junits-*
retention-days: 5
delete-merged: true
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
if: ${{ needs.installation-and-connectivity.result != 'skipped' }}
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}
- name: Set final commit status
if: ${{ needs.installation-and-connectivity.result == 'skipped' }}
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ github.event_name != 'schedule' && 'success' || 'failure' }}
description: 'Skipped'
cleanup:
name: Cleanup EKS Clusters
if: ${{ always() && needs.generate-matrix.outputs.empty == 'false' }}
continue-on-error: true
needs: [generate-matrix, installation-and-connectivity]
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/download/${{ env.eksctl_version }}/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Set up AWS CLI credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
role-to-assume: ${{ secrets.AWS_PR_ASSUME_ROLE }}
aws-region: ${{ matrix.region }}
- name: Clean up EKS
run: |
eksctl delete cluster --name ${{ env.clusterName }} --region ${{ matrix.region }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently