From 348e43b6ee208c6a15813b599f4f792a57b86c44 Mon Sep 17 00:00:00 2001 From: michelle-yooh Date: Wed, 13 Dec 2023 00:24:23 +0000 Subject: [PATCH] initial commit --- xpk.py | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/xpk.py b/xpk.py index 5339293b..693f7d29 100644 --- a/xpk.py +++ b/xpk.py @@ -91,8 +91,9 @@ schedulerName: {args.scheduler} restartPolicy: Never nodeSelector: - cloud.google.com/gke-tpu-accelerator: {system.gke_accelerator} - cloud.google.com/gke-tpu-topology: {system.topology} + cloud.google.com/gke-accelerator: {system.gke_accelerator} + cloud.google.com/gke-accelerator: {system.gce_machine_type} + # cloud.google.com/gke-tpu-topology: {system.topology} priorityClassName: {args.priority} hostNetwork: true dnsPolicy: ClusterFirstWithHostNet @@ -257,6 +258,10 @@ class SystemCharacteristics: MODIFICATIONS TO UserFacingNameToSystemCharacteristics IN MaxText/accelerator_to_spec_map.py !!!!! """ # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv UserFacingNameToSystemCharacteristics = { + # H100-80gb + 'h100-80gb-8': SystemCharacteristics( + 'N/A', 1, 'nvidia-h100-80gb', 'a3-highgpu-8g', 8 + ), # v5p 'v5p-8': SystemCharacteristics( '2x2x1', 1, 'tpu-v5p-slice', 'ct5p-hightpu-4t', 4 @@ -976,8 +981,9 @@ def zone_to_region(zone) -> str: Returns: The region name. """ - zone_terms = zone.split('-') - return zone_terms[0] + '-' + zone_terms[1] + return zone + # zone_terms = zone.split('-') + # return zone_terms[0] + '-' + zone_terms[1] def run_gke_cluster_create_command(args) -> int: @@ -2142,13 +2148,13 @@ def directory_path_type(value): ), required=True, ) -cluster_create_required_arguments.add_argument( - '--tpu-type', - type=str, - default='v5litepod-16', - help='The type of the TPU. v5litepod and v4 are the only supported types.', - required=True, -) +# cluster_create_required_arguments.add_argument( +# '--tpu-type', +# type=str, +# default='v5litepod-16', +# help='The type of the TPU. v5litepod and v4 are the only supported types.', +# required=True, +# ) # Capacity Arguments cluster_create_capacity_arguments.add_argument( @@ -2422,13 +2428,13 @@ def directory_path_type(value): ), required=True, ) -workload_create_parser_required_arguments.add_argument( - '--tpu-type', - type=str, - default=None, - help='The tpu type to use, v5litepod-16, etc.', - required=True, -) +# workload_create_parser_required_arguments.add_argument( +# '--tpu-type', +# type=str, +# default=None, +# help='The tpu type to use, v5litepod-16, etc.', +# required=True, +# ) workload_create_parser_required_arguments.add_argument( '--cluster', type=str,