From ca7155faf587577d1e69d6d96c7cc5312b7a16ab Mon Sep 17 00:00:00 2001 From: Viet Nguyen Duc Date: Sat, 30 Mar 2024 09:24:35 +0000 Subject: [PATCH] fix(chart): node preStop - refresh node status in loop Signed-off-by: Viet Nguyen Duc --- README.md | 10 +++++++--- .../selenium-grid/configs/node/nodePreStop.sh | 18 +++++++++++++++--- tests/bootstrap.sh | 1 + tests/charts/ci/base-auth-ingress-values.yaml | 1 + 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b4b4f0145..87acf6ff5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ -![Build & test](https://github.com/SeleniumHQ/docker-selenium/workflows/Build%20&%20test/badge.svg?branch=trunk) -![Deployments](https://github.com/SeleniumHQ/docker-selenium/workflows/Deploys/badge.svg) -![Helm Charts](https://github.com/SeleniumHQ/docker-selenium/workflows/Lint%20and%20Test%20Helm%20Charts/badge.svg) +[![Build & test](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/build-test.yml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/build-test.yml) +[![Test Docker Selenium](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/test-video.yml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/test-video.yml) +[![Test Helm Charts](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/helm-chart-test.yml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/helm-chart-test.yml) +[![Deploys](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/deploy.yml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/deploy.yml) +[![Release Charts](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/helm-chart-release.yml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/helm-chart-release.yml) +[![Nightly](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/nightly.yaml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/nightly.yaml) +[![Update Dev/Beta Browser Images](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/update-dev-beta-browser-images.yml/badge.svg)](https://github.com/SeleniumHQ/docker-selenium/actions/workflows/update-dev-beta-browser-images.yml) # Docker images for the Selenium Grid Server diff --git a/charts/selenium-grid/configs/node/nodePreStop.sh b/charts/selenium-grid/configs/node/nodePreStop.sh index db4ccbdb2..43c04e0ee 100644 --- a/charts/selenium-grid/configs/node/nodePreStop.sh +++ b/charts/selenium-grid/configs/node/nodePreStop.sh @@ -3,12 +3,14 @@ probe_name="lifecycle.${1:-"preStop"}" max_time=3 +retry_time=5 ID=$(echo $RANDOM) tmp_node_file="/tmp/nodeProbe${ID}" function on_exit() { rm -rf ${tmp_node_file} + echo "$(date +%FT%T%Z) [${probe_name}] - Exiting Node preStop..." } trap on_exit EXIT @@ -89,8 +91,12 @@ if curl -m ${max_time} -sfk ${SE_SERVER_PROTOCOL}://127.0.0.1:${SE_NODE_PORT}/st fi signal_node_to_drain # Wait for the current session to be finished if any - while curl -m ${max_time} -sfk ${SE_SERVER_PROTOCOL}://127.0.0.1:${SE_NODE_PORT}/status -o ${tmp_node_file}; - do + while true; do + # Attempt the cURL request and capture the exit status + endpoint_http_code=$(curl --retry ${retry_time} -m ${max_time} -sfk ${SE_SERVER_PROTOCOL}://127.0.0.1:${SE_NODE_PORT}/status -o ${tmp_node_file} -w "%{http_code}") + endpoint_status=$? + echo "$(date +%FT%T%Z) [${probe_name}] - Fetch the Node status via cURL with exit status: ${endpoint_status}, HTTP code: ${endpoint_http_code}" + SLOT_HAS_SESSION=$(jq -e ".value.node.slots[]|select(.session != null).id.id" ${tmp_node_file} | tr -d '"' || "") if [ -z "${SLOT_HAS_SESSION}" ]; then echo "$(date +%FT%T%Z) [${probe_name}] - There is no session running. Node is ready to be terminated." @@ -99,7 +105,13 @@ if curl -m ${max_time} -sfk ${SE_SERVER_PROTOCOL}://127.0.0.1:${SE_NODE_PORT}/st exit 0 else echo "$(date +%FT%T%Z) [${probe_name}] - Node preStop is waiting for current session on slot ${SLOT_HAS_SESSION} to be finished. Node details: message: $(jq -r '.value.message' ${tmp_node_file} || "unknown"), availability: $(jq -r '.value.node.availability' ${tmp_node_file} || "unknown")" - sleep 1; + sleep 2; + fi + + # If the cURL command failed, break the loop + if [ ${endpoint_status} -ne 0 ] || [ "${endpoint_http_code}" != "200" ]; then + echo "$(date +%FT%T%Z) [${probe_name}] - Node endpoint returned status ${endpoint_http_code:-"exit ${endpoint_status}"}, probably Node draining complete!" + break fi done else diff --git a/tests/bootstrap.sh b/tests/bootstrap.sh index 3e36412e6..3391e0828 100755 --- a/tests/bootstrap.sh +++ b/tests/bootstrap.sh @@ -9,6 +9,7 @@ fi python -m pip install selenium==4.19.0 \ docker===7.0.0 \ + chardet \ | grep -v 'Requirement already satisfied' if [ "${SELENIUM_GRID_PROTOCOL}" = "https" ]; then diff --git a/tests/charts/ci/base-auth-ingress-values.yaml b/tests/charts/ci/base-auth-ingress-values.yaml index e55c649b0..8c17eafda 100644 --- a/tests/charts/ci/base-auth-ingress-values.yaml +++ b/tests/charts/ci/base-auth-ingress-values.yaml @@ -1,6 +1,7 @@ global: seleniumGrid: logLevel: INFO + stdoutProbeLog: true ingress: className: nginx