From 2eca4bbea12157928fdc3cd14decd2503456670b Mon Sep 17 00:00:00 2001 From: Viet Nguyen Duc Date: Wed, 27 Mar 2024 04:12:40 +0000 Subject: [PATCH] feat(chart): Configure fixed-sized thread pool for the Distributor in autoscaling Configure fixed-sized thread pool for the Distributor to create new sessions based on sum of maxReplicaCount of all enabled Nodes in autoscaling Signed-off-by: Viet Nguyen Duc --- .github/workflows/helm-chart-test.yml | 7 +++++++ charts/selenium-grid/README.md | 11 ++++++++++ charts/selenium-grid/templates/_helpers.tpl | 21 +++++++++++++++++++ .../templates/distributor-deployment.yaml | 7 +++++-- .../templates/hub-deployment.yaml | 7 +++++-- charts/selenium-grid/values.yaml | 6 +++--- .../ci/DeploymentAutoscaling-values.yaml | 2 +- tests/charts/ci/JobAutoscaling-values.yaml | 4 ++-- tests/charts/ci/base-resources-values.yaml | 16 +++++++------- tests/charts/make/chart_test.sh | 18 ++++++++-------- tests/charts/templates/render/dummy.yaml | 1 + .../templates/render/dummy_solution.yaml | 1 + tests/charts/templates/test.py | 12 +++++++++++ 13 files changed, 86 insertions(+), 27 deletions(-) diff --git a/.github/workflows/helm-chart-test.yml b/.github/workflows/helm-chart-test.yml index 8679ad07e..e84318917 100644 --- a/.github/workflows/helm-chart-test.yml +++ b/.github/workflows/helm-chart-test.yml @@ -15,6 +15,10 @@ on: description: 'Test parameter for different request timeout' required: false default: '370' + max-replicas-count: + description: 'Test parameter for autoscaling to set maxReplicaCount' + required: false + default: '30' log-level: description: 'Test parameter for different log level' required: false @@ -116,8 +120,11 @@ jobs: if: (matrix.test-strategy == 'job' || matrix.test-strategy == 'deployment') && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') run: | echo "AUTOSCALING_POLL_INTERVAL=${AUTOSCALING_POLL_INTERVAL}" >> $GITHUB_ENV + echo "SET_MAX_REPLICAS=${SET_MAX_REPLICAS}" >> $GITHUB_ENV + echo "LOG_LEVEL=${LOG_LEVEL}" >> $GITHUB_ENV env: AUTOSCALING_POLL_INTERVAL: ${{ github.event.inputs.request-timeout || '370' }} + SET_MAX_REPLICAS: ${{ github.event.inputs.max-replicas-count || '30' }} LOG_LEVEL: ${{ github.event.inputs.log-level || 'FINE' }} - name: Test Selenium Grid on Kubernetes ${{ matrix.k8s-version }} with Autoscaling ${{ matrix.test-strategy }} uses: nick-invision/retry@master diff --git a/charts/selenium-grid/README.md b/charts/selenium-grid/README.md index 68f18a2c1..0cb3ccd57 100644 --- a/charts/selenium-grid/README.md +++ b/charts/selenium-grid/README.md @@ -14,6 +14,7 @@ This chart enables the creation of a Selenium Grid Server in Kubernetes. * [Settings common for both `job` and `deployment` scalingType](#settings-common-for-both-job-and-deployment-scalingtype) * [Settings when scalingType with `deployment`](#settings-when-scalingtype-with-deployment-) * [Settings when scalingType with `job`](#settings-when-scalingtype-with-job) + * [Settings fixed-sized thread pool for the Distributor to create new sessions](#settings-fixed-sized-thread-pool-for-the-distributor-to-create-new-sessions) * [Updating Selenium-Grid release](#updating-selenium-grid-release) * [Uninstalling Selenium Grid release](#uninstalling-selenium-grid-release) * [Ingress Configuration](#ingress-configuration) @@ -195,6 +196,16 @@ autoscaling: Settings that KEDA [ScaledJob spec](https://keda.sh/docs/latest/concepts/scaling-jobs/#scaledjob-spec) supports can be set via `autoscaling.scaledJobOptions`. +### Settings fixed-sized thread pool for the Distributor to create new sessions + +When enabling autoscaling, the Distributor might be under a high workload with parallelism tests, which are many requests incoming and nodes scaling up simultaneously. (Refer to: [SeleniumHQ/selenium#13723](https://github.com/SeleniumHQ/selenium/issues/13723)). + +By default, the Distributor uses a fixed-sized thread pool with default value is `no. of available processors * 3`. + +In autoscaling, by default, it will calculate based on `no. of node types * maxReplicaCount`. For example: `autoscaling.scaledOptions.maxReplicaCount=50`, 3 node types (`Chrome, Firefox, Edge` enabled), the value is `50 * 3 + 1 = 151` is set to environment variable `SE_NEW_SESSION_THREAD_POOL_SIZE` to adjust the Distributor config `--newsession-threadpool-size` + +You can override the default calculation by another value via `components.distributor.newSessionThreadPoolSize` (in full distributed mode) or `hub.newSessionThreadPoolSize` (in basic mode). + ## Updating Selenium-Grid release Once you have a new chart version, you can update your selenium-grid running: diff --git a/charts/selenium-grid/templates/_helpers.tpl b/charts/selenium-grid/templates/_helpers.tpl index a3770985a..6b2cd9c63 100644 --- a/charts/selenium-grid/templates/_helpers.tpl +++ b/charts/selenium-grid/templates/_helpers.tpl @@ -127,6 +127,27 @@ Is tracing enabled {{- or .Values.tracing.enabled .Values.tracing.enabledWithExistingEndpoint | ternary "true" "" -}} {{- end -}} +{{/* +Configure fixed-sized thread pool for the Distributor to create new sessions +based on sum of maxReplicaCount of all enabled Nodes in autoscaling +*/}} +{{- define "seleniumGrid.autoscaling.distributor.threadPoolSize" -}} +{{- $threadPoolSize := 1 -}} +{{- if .Values.chromeNode.enabled -}} +{{- $maxReplicaCount := default .Values.autoscaling.scaledOptions.maxReplicaCount (.Values.chromeNode.scaledOptions).maxReplicaCount -}} +{{- $threadPoolSize = add $threadPoolSize $maxReplicaCount -}} +{{- end -}} +{{- if $.Values.firefoxNode.enabled -}} +{{- $maxReplicaCount := default .Values.autoscaling.scaledOptions.maxReplicaCount (.Values.firefoxNode.scaledOptions).maxReplicaCount -}} +{{- $threadPoolSize = add $threadPoolSize $maxReplicaCount -}} +{{- end -}} +{{- if $.Values.edgeNode.enabled -}} +{{- $maxReplicaCount := default .Values.autoscaling.scaledOptions.maxReplicaCount (.Values.edgeNode.scaledOptions).maxReplicaCount -}} +{{- $threadPoolSize = add $threadPoolSize $maxReplicaCount -}} +{{- end -}} +{{- $threadPoolSize -}} +{{- end -}} + {{/* Common autoscaling spec template */}} diff --git a/charts/selenium-grid/templates/distributor-deployment.yaml b/charts/selenium-grid/templates/distributor-deployment.yaml index c1816626f..ec0994fe4 100644 --- a/charts/selenium-grid/templates/distributor-deployment.yaml +++ b/charts/selenium-grid/templates/distributor-deployment.yaml @@ -52,9 +52,12 @@ spec: value: '{{ template "seleniumGrid.sessionQueue.fullname" . }}.{{ .Release.Namespace }}' - name: SE_SESSION_QUEUE_PORT value: {{ .Values.components.sessionQueue.port | quote }} - {{- with .Values.components.distributor.newSessionThreadPoolSize }} + {{- if .Values.components.distributor.newSessionThreadPoolSize }} - name: SE_NEW_SESSION_THREAD_POOL_SIZE - value: {{ . | quote }} + value: {{ .Values.components.distributor.newSessionThreadPoolSize | quote }} + {{- else if (eq (include "seleniumGrid.useKEDA" $) "true") }} + - name: SE_NEW_SESSION_THREAD_POOL_SIZE + value: '{{ template "seleniumGrid.autoscaling.distributor.threadPoolSize" $ }}' {{- end }} {{- with .Values.components.extraEnvironmentVariables }} {{- tpl (toYaml .) $ | nindent 12 }} diff --git a/charts/selenium-grid/templates/hub-deployment.yaml b/charts/selenium-grid/templates/hub-deployment.yaml index d746e7b3c..c611d62e4 100644 --- a/charts/selenium-grid/templates/hub-deployment.yaml +++ b/charts/selenium-grid/templates/hub-deployment.yaml @@ -109,9 +109,12 @@ spec: - name: SE_DISABLE_UI value: {{ .Values.hub.disableUI | quote }} {{- end }} - {{- with .Values.hub.newSessionThreadPoolSize }} + {{- if .Values.hub.newSessionThreadPoolSize }} - name: SE_NEW_SESSION_THREAD_POOL_SIZE - value: {{ . | quote }} + value: {{ .Values.hub.newSessionThreadPoolSize | quote }} + {{- else if (eq (include "seleniumGrid.useKEDA" $) "true") }} + - name: SE_NEW_SESSION_THREAD_POOL_SIZE + value: '{{ template "seleniumGrid.autoscaling.distributor.threadPoolSize" $ }}' {{- end }} {{- with .Values.hub.extraEnvironmentVariables }} {{- tpl (toYaml .) $ | nindent 12 }} diff --git a/charts/selenium-grid/values.yaml b/charts/selenium-grid/values.yaml index 6a4bc31f5..64db8a1e6 100644 --- a/charts/selenium-grid/values.yaml +++ b/charts/selenium-grid/values.yaml @@ -722,7 +722,7 @@ chromeNode: url: '{{ template "seleniumGrid.graphqlURL" . }}' browserName: 'chrome' sessionBrowserName: 'chrome' - platformName: 'Linux' + platformName: 'linux' # browserVersion: '91.0' # Optional. Only required when supporting multiple versions of browser in your Selenium Grid. unsafeSsl: '{{ template "seleniumGrid.graphqlURL.unsafeSsl" . }}' # Optional @@ -883,7 +883,7 @@ firefoxNode: url: '{{ template "seleniumGrid.graphqlURL" . }}' browserName: 'firefox' sessionBrowserName: 'firefox' - platformName: 'Linux' + platformName: 'linux' unsafeSsl: '{{ template "seleniumGrid.graphqlURL.unsafeSsl" . }}' # Optional # It is used to add initContainers in the same pod of the browser node. @@ -1042,7 +1042,7 @@ edgeNode: url: '{{ template "seleniumGrid.graphqlURL" . }}' browserName: 'MicrosoftEdge' sessionBrowserName: 'msedge' - platformName: 'Linux' + platformName: 'linux' unsafeSsl: '{{ template "seleniumGrid.graphqlURL.unsafeSsl" . }}' # Optional # It is used to add initContainers in the same pod of the browser node. diff --git a/tests/charts/ci/DeploymentAutoscaling-values.yaml b/tests/charts/ci/DeploymentAutoscaling-values.yaml index 26da41501..e2256a43a 100644 --- a/tests/charts/ci/DeploymentAutoscaling-values.yaml +++ b/tests/charts/ci/DeploymentAutoscaling-values.yaml @@ -3,7 +3,7 @@ autoscaling: scaledOptions: minReplicaCount: 0 maxReplicaCount: 3 - pollingInterval: 20 + pollingInterval: 10 scaledObjectOptions: cooldownPeriod: 30 terminationGracePeriodSeconds: 360 diff --git a/tests/charts/ci/JobAutoscaling-values.yaml b/tests/charts/ci/JobAutoscaling-values.yaml index 04c3dc6ff..5389fef50 100644 --- a/tests/charts/ci/JobAutoscaling-values.yaml +++ b/tests/charts/ci/JobAutoscaling-values.yaml @@ -7,8 +7,8 @@ autoscaling: strategy: default scaledOptions: minReplicaCount: 0 - maxReplicaCount: 30 - pollingInterval: 20 + maxReplicaCount: 7 + pollingInterval: 10 # Configuration for chrome nodes chromeNode: nameOverride: my-chrome-name diff --git a/tests/charts/ci/base-resources-values.yaml b/tests/charts/ci/base-resources-values.yaml index 4ec462921..fbeb383ff 100644 --- a/tests/charts/ci/base-resources-values.yaml +++ b/tests/charts/ci/base-resources-values.yaml @@ -10,11 +10,11 @@ components: distributor: resources: requests: - cpu: 100m - memory: 256Mi + cpu: "1" + memory: 1Gi limits: - cpu: 200m - memory: 2500Mi + cpu: "2" + memory: 2Gi eventBus: resources: requests: @@ -43,11 +43,11 @@ components: hub: resources: requests: - cpu: 100m - memory: 256Mi + cpu: "1" + memory: 1Gi limits: - cpu: 500m - memory: 2500Mi + cpu: "2" + memory: 2Gi chromeNode: resources: diff --git a/tests/charts/make/chart_test.sh b/tests/charts/make/chart_test.sh index 7737cf498..1a6db4cf2 100755 --- a/tests/charts/make/chart_test.sh +++ b/tests/charts/make/chart_test.sh @@ -118,6 +118,15 @@ if [ "${CHART_ENABLE_INGRESS_HOSTNAME}" = "true" ]; then if [[ ! $(cat /etc/hosts) == *"${HOSTNAME_ADDRESS}"* ]]; then sudo -- sh -c -e "echo \"$(hostname -i) ${HOSTNAME_ADDRESS}\" >> /etc/hosts" fi + if [[ ! $(cat /etc/hosts) == *"alertmanager.${HOSTNAME_ADDRESS}"* ]]; then + sudo -- sh -c -e "echo \"$(hostname -i) alertmanager.${HOSTNAME_ADDRESS}\" >> /etc/hosts" + fi + if [[ ! $(cat /etc/hosts) == *"grafana.${HOSTNAME_ADDRESS}"* ]]; then + sudo -- sh -c -e "echo \"$(hostname -i) grafana.${HOSTNAME_ADDRESS}\" >> /etc/hosts" + fi + if [[ ! $(cat /etc/hosts) == *"pts.${HOSTNAME_ADDRESS}"* ]]; then + sudo -- sh -c -e "echo \"$(hostname -i) pts.${HOSTNAME_ADDRESS}\" >> /etc/hosts" + fi ping -c 2 ${HOSTNAME_ADDRESS} HELM_COMMAND_SET_IMAGES="${HELM_COMMAND_SET_IMAGES} \ --set ingress.hostname=${HOSTNAME_ADDRESS} \ @@ -127,15 +136,6 @@ else HELM_COMMAND_SET_IMAGES="${HELM_COMMAND_SET_IMAGES} \ --set global.K8S_PUBLIC_IP=${SELENIUM_GRID_HOST} \ " - if [[ ! $(cat /etc/hosts) == *"alertmanager.selenium-grid.prod"* ]]; then - sudo -- sh -c -e "echo \"$(hostname -i) alertmanager.selenium-grid.prod\" >> /etc/hosts" - fi - if [[ ! $(cat /etc/hosts) == *"grafana.selenium-grid.prod"* ]]; then - sudo -- sh -c -e "echo \"$(hostname -i) grafana.selenium-grid.prod\" >> /etc/hosts" - fi - if [[ ! $(cat /etc/hosts) == *"pts.selenium-grid.prod"* ]]; then - sudo -- sh -c -e "echo \"$(hostname -i) pts.selenium-grid.prod\" >> /etc/hosts" - fi fi if [ "${CHART_ENABLE_BASIC_AUTH}" = "true" ]; then diff --git a/tests/charts/templates/render/dummy.yaml b/tests/charts/templates/render/dummy.yaml index b4a33567d..911c3d99a 100644 --- a/tests/charts/templates/render/dummy.yaml +++ b/tests/charts/templates/render/dummy.yaml @@ -68,6 +68,7 @@ components: "restartOnUpdate": "true" serviceType: NodePort distributor: + newSessionThreadPoolSize: 24 annotations: "restartOnUpdate": "true" serviceType: NodePort diff --git a/tests/charts/templates/render/dummy_solution.yaml b/tests/charts/templates/render/dummy_solution.yaml index 28e7d7b6d..97850b3e7 100644 --- a/tests/charts/templates/render/dummy_solution.yaml +++ b/tests/charts/templates/render/dummy_solution.yaml @@ -67,6 +67,7 @@ selenium-grid: disableUI: true serviceType: NodePort distributor: + newSessionThreadPoolSize: 24 serviceType: NodePort eventBus: serviceType: NodePort diff --git a/tests/charts/templates/test.py b/tests/charts/templates/test.py index ef8297380..9880a3e63 100644 --- a/tests/charts/templates/test.py +++ b/tests/charts/templates/test.py @@ -73,6 +73,18 @@ def test_sub_path_set_to_grid_env_var(self): is_present = True self.assertTrue(is_present, "ENV variable SE_SUB_PATH is not populated") + def test_distributor_new_session_thread_pool_size(self): + resources_name = ['{0}selenium-distributor'.format(RELEASE_NAME)] + is_present = False + for doc in LIST_OF_DOCUMENTS: + if doc['metadata']['name'] in resources_name and doc['kind'] == 'Deployment': + logger.info(f"Assert newSessionThreadPoolSize is set to Distributor env SE_NEW_SESSION_THREAD_POOL_SIZE") + list_env = doc['spec']['template']['spec']['containers'][0]['env'] + for env in list_env: + if env['name'] == 'SE_NEW_SESSION_THREAD_POOL_SIZE' and env['value'] == '24': + is_present = True + self.assertTrue(is_present, "ENV variable SE_NEW_SESSION_THREAD_POOL_SIZE is not populated") + def test_disable_ui_set_to_grid_env_var(self): resources_name = ['{0}selenium-router'.format(RELEASE_NAME)] is_present = False