Skip to content

Commit

Permalink
Add configuration setting for default pool num nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
Obliviour committed Apr 16, 2024
1 parent eff0dd0 commit e37dcfd
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ jobs:
install_components: 'beta,gke-gcloud-auth-plugin'
- name: Verify gcp setup
run: gcloud info
- name: Create a Pathways-enabled XPK Cluster with 2x v4-8 nodepools.
run: python xpk.py cluster create --cluster $TPU_CLUSTER_NAME --enable-pathways --device-type=v4-8 --num-slices=2 --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}'
- name: Create a Pathways-enabled XPK Cluster with 2x v4-8 nodepools. Larger num-nodes to avoid master resizing.
run: python xpk.py cluster create --cluster $TPU_CLUSTER_NAME --enable-pathways --device-type=v4-8 --num-slices=2 --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --default-pool-cpu-num-nodes=16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}'
- name: Authenticate Docker
run: gcloud auth configure-docker --quiet
- name: Create test script to execute in workloads
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nightly_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:
env:
# Names must be unique in parallel running tests.
EMPTY_CLUSTER_NAME: nightly-xpk-zero-nodepools
TPU_CLUSTER_NAME: nightly-xpk-2-v4-8-nodepools
TPU_CLUSTER_NAME: nightly-xpk-2-v4-8-nodepools
WORKLOAD_NAME: xpktest-nightly-${{ github.run_attempt }}
PATHWAYS_TPU_CLUSTER_NAME: pw-nightly-test-2-v4-8-nodepools
PATHWAYS_WORKLOAD_NAME: xpkpw-nightly-${{ github.run_attempt }}
Expand Down Expand Up @@ -90,7 +90,7 @@ jobs:
version: '>= 363.0.0'
install_components: 'beta,gke-gcloud-auth-plugin'
- name: Create an Pathways-enabled XPK Cluster with 2 x v4-8 nodepools
run: python xpk.py cluster create --cluster $PATHWAYS_TPU_CLUSTER_NAME --device-type=v4-8 --num-slices=2 --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}'
run: python xpk.py cluster create --cluster $PATHWAYS_TPU_CLUSTER_NAME --device-type=v4-8 --num-slices=2 --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --default-pool-cpu-num-nodes=16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}'
- name: Create test script to execute in workloads
run: echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > test.sh
- name: Run a Pathways workload on Ubuntu base image
Expand Down
13 changes: 12 additions & 1 deletion xpk.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,7 +1935,8 @@ def run_gke_cluster_create_command(args) -> int:
f' --cluster-version={args.gke_version}'
f' --machine-type={machine_type}'
' --enable-autoscaling'
' --total-min-nodes 1 --total-max-nodes 1000 --num-nodes 6'
' --total-min-nodes 1 --total-max-nodes 1000'
f' --num-nodes {args.default_pool_cpu_num_nodes}'
f' {args.custom_cluster_arguments}'
)

Expand Down Expand Up @@ -5216,6 +5217,16 @@ def directory_path_type(value):
' regional clusters, all zones must support the machine type.'
)
)
cluster_create_optional_arguments.add_argument(
'--default-pool-cpu-num-nodes',
type=int,
default=6,
help=(
'Set the number of nodes within the default cpu node pool. This is'
' set to 6 by default. Autoscaling is enabled to scale this value over'
' time.'
)
)
cluster_create_optional_arguments.add_argument(
'--cluster-cpu-machine-type',
type=str,
Expand Down

0 comments on commit e37dcfd

Please sign in to comment.