Skip to content

Node Throughput Test #164

Node Throughput Test

Node Throughput Test #164

name: Node Throughput Test
on:
schedule:
- cron: '39 0 * * *'
# For testing uncomment following lines:
# push:
# branches:
# - your_branch_name
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
# renovate: datasource=golang-version depName=go
go_version: 1.23.1
# Adding k8s.local to the end makes kops happy-
# has stricter DNS naming requirements.
test_name: node-throughput
cluster_name: ${{ github.run_id }}-${{ github.run_attempt }}
GCP_PERF_RESULTS_BUCKET: gs://cilium-scale-results
# renovate: datasource=docker depName=google/cloud-sdk
gcloud_version: 494.0.0
jobs:
install-and-scaletest:
runs-on: ubuntu-latest
name: Install and Scale Test
timeout-minutes: 120
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ github.sha }}
- name: Set up job variables
id: vars
run: |
SHA="${{ github.sha }}"
# Setup Cilium install options
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--helm-set=cluster.name=${{ env.cluster_name }} \
--wait=false"
# Adding k8s.local to the end makes kops happy
# has stricter DNS naming requirements.
CLUSTER_NAME="${{ env.test_name }}-${{ env.cluster_name }}.k8s.local"
echo SHA=${SHA} >> $GITHUB_OUTPUT
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo CLUSTER_NAME=${CLUSTER_NAME} >> $GITHUB_OUTPUT
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.SHA }} &> /dev/null; do sleep 45s; done
done
- name: Install Go
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: ${{ env.go_version }}
- name: Install Kops
uses: cilium/scale-tests-action/install-kops@d3ecfd83003f3e9c98ba125ca14933401d44918f # main
- name: Setup gcloud credentials
uses: google-github-actions/auth@62cf5bd3e4211a0a0b51f2c6d6a37129d828611d # v2.1.5
with:
workload_identity_provider: ${{ secrets.GCP_PERF_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_PERF_SA }}
create_credentials_file: true
export_environment_variables: true
- name: Setup gcloud CLI
uses: google-github-actions/setup-gcloud@f0990588f1e5b5af6827153b93673613abdc6ec7 # v2.1.1
with:
project_id: ${{ secrets.GCP_PERF_PROJECT_ID }}
version: ${{ env.gcloud_version }}
- name: Clone ClusterLoader2
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
with:
repository: kubernetes/perf-tests
# Avoid using renovate to update this dependency because: (1)
# perf-tests does not tag or release, so renovate will pull
# all updates to the default branch and (2) continually
# updating CL2 may impact the stability of the scale test
# results.
ref: 920c39ef245a81bd8fb39d7fecf39eb35820d9ef
persist-credentials: false
sparse-checkout: clusterloader2
path: perf-tests
- name: Deploy cluster
id: deploy-cluster
uses: cilium/scale-tests-action/create-cluster@d3ecfd83003f3e9c98ba125ca14933401d44918f # main
timeout-minutes: 30
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
control_plane_size: n2-standard-4
control_plane_count: 1
node_size: e2-standard-8
node_count: 1
kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
project_id: ${{ secrets.GCP_PERF_PROJECT_ID }}
- name: Install Cilium CLI
uses: cilium/cilium-cli@c39ea5e50210fde2ccfe54d07122c48fd680ac8d # v0.16.18
with:
skip-build: ${{ env.CILIUM_CLI_SKIP_BUILD }}
image-repo: ${{ env.CILIUM_CLI_IMAGE_REPO }}
image-tag: ${{ steps.vars.outputs.SHA }}
- name: Display version info of installed tools
run: |
echo "--- go ---"
go version
echo "--- cilium-cli ---"
cilium version --client
echo "--- kops ---"
./kops version
echo "--- gcloud ---"
gcloud version
- name: Setup firewall rules
uses: cilium/scale-tests-action/setup-firewall@d3ecfd83003f3e9c98ba125ca14933401d44918f # main
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
- name: Install Cilium
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Wait for cluster to be ready
uses: cilium/scale-tests-action/validate-cluster@d3ecfd83003f3e9c98ba125ca14933401d44918f # main
timeout-minutes: 20
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
- name: Wait for Cilium status to be ready
run: |
cilium status --wait
- name: Run CL2
id: run-cl2
working-directory: ./perf-tests/clusterloader2
timeout-minutes: 30
shell: bash
# --enable-exec-service=false to reduce number of pods so 100 pods can fit in node
# POD_STARTUP_LATENCY_THRESHOLD=60s so the test doesn't fail, currently we have ~30s pods startup latency
run: |
mkdir ./report
echo POD_STARTUP_LATENCY_THRESHOLD: 60s >> ./testoverrides.yaml
echo POD_COUNT: 98 >> ./testoverrides.yaml
go run ./cmd/clusterloader.go \
-v=4 \
--testconfig=./testing/node-throughput/config.yaml \
--testoverrides=./testoverrides.yaml \
--enable-exec-service=false \
--provider=gce \
--enable-prometheus-server \
--testoverrides=./testing/prometheus/not-scrape-kube-proxy.yaml \
--tear-down-prometheus-server=false \
--report-dir=./report \
--kubeconfig=$HOME/.kube/config \
2>&1 | tee cl2-output.txt
- name: Get sysdump
if: ${{ always() && steps.run-cl2.outcome != 'skipped' }}
run: |
cilium status
cilium sysdump --output-filename cilium-sysdump-final
- name: Cleanup cluster
if: ${{ always() && steps.deploy-cluster.outcome != 'skipped' }}
uses: cilium/scale-tests-action/cleanup-cluster@d3ecfd83003f3e9c98ba125ca14933401d44918f # main
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
- name: Export results and sysdump to GS bucket
if: ${{ always() && steps.run-cl2.outcome != 'skipped' }}
uses: cilium/scale-tests-action/export-results@d3ecfd83003f3e9c98ba125ca14933401d44918f # main
with:
test_name: ${{ env.test_name }}
results_bucket: ${{ env.GCP_PERF_RESULTS_BUCKET }}
artifacts: ./perf-tests/clusterloader2/report/*
other_files: cilium-sysdump-final.zip ./perf-tests/clusterloader2/cl2-output.txt