Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
michelle-yooh committed Dec 13, 2023
1 parent 4c93fa5 commit 348e43b
Showing 1 changed file with 24 additions and 18 deletions.
42 changes: 24 additions & 18 deletions xpk.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@
schedulerName: {args.scheduler}
restartPolicy: Never
nodeSelector:
cloud.google.com/gke-tpu-accelerator: {system.gke_accelerator}
cloud.google.com/gke-tpu-topology: {system.topology}
cloud.google.com/gke-accelerator: {system.gke_accelerator}
cloud.google.com/gke-accelerator: {system.gce_machine_type}
# cloud.google.com/gke-tpu-topology: {system.topology}
priorityClassName: {args.priority}
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
Expand Down Expand Up @@ -257,6 +258,10 @@ class SystemCharacteristics:
MODIFICATIONS TO UserFacingNameToSystemCharacteristics IN MaxText/accelerator_to_spec_map.py !!!!! """
# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
UserFacingNameToSystemCharacteristics = {
# H100-80gb
'h100-80gb-8': SystemCharacteristics(
'N/A', 1, 'nvidia-h100-80gb', 'a3-highgpu-8g', 8
),
# v5p
'v5p-8': SystemCharacteristics(
'2x2x1', 1, 'tpu-v5p-slice', 'ct5p-hightpu-4t', 4
Expand Down Expand Up @@ -976,8 +981,9 @@ def zone_to_region(zone) -> str:
Returns:
The region name.
"""
zone_terms = zone.split('-')
return zone_terms[0] + '-' + zone_terms[1]
return zone
# zone_terms = zone.split('-')
# return zone_terms[0] + '-' + zone_terms[1]


def run_gke_cluster_create_command(args) -> int:
Expand Down Expand Up @@ -2142,13 +2148,13 @@ def directory_path_type(value):
),
required=True,
)
cluster_create_required_arguments.add_argument(
'--tpu-type',
type=str,
default='v5litepod-16',
help='The type of the TPU. v5litepod and v4 are the only supported types.',
required=True,
)
# cluster_create_required_arguments.add_argument(
# '--tpu-type',
# type=str,
# default='v5litepod-16',
# help='The type of the TPU. v5litepod and v4 are the only supported types.',
# required=True,
# )

# Capacity Arguments
cluster_create_capacity_arguments.add_argument(
Expand Down Expand Up @@ -2422,13 +2428,13 @@ def directory_path_type(value):
),
required=True,
)
workload_create_parser_required_arguments.add_argument(
'--tpu-type',
type=str,
default=None,
help='The tpu type to use, v5litepod-16, etc.',
required=True,
)
# workload_create_parser_required_arguments.add_argument(
# '--tpu-type',
# type=str,
# default=None,
# help='The tpu type to use, v5litepod-16, etc.',
# required=True,
# )
workload_create_parser_required_arguments.add_argument(
'--cluster',
type=str,
Expand Down

0 comments on commit 348e43b

Please sign in to comment.