From 1cf4da2ecabc2815fbe6c1bf796a87f88e0f9d82 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Mon, 29 Jan 2024 11:37:47 +0000 Subject: [PATCH] remove searching port --- tests/helpers/utils.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py index bc3f2b78..a9a8aaaf 100644 --- a/tests/helpers/utils.py +++ b/tests/helpers/utils.py @@ -2,24 +2,24 @@ import os import uuid from typing import Any, Dict, List, Optional, Tuple -import random -import socket +# import random +# import socket import torch.cuda from nanotron.parallel import ParallelContext from torch.distributed.launcher import elastic_launch -def find_free_port(min_port: int = 2000, max_port: int = 65000) -> int: - while True: - port = random.randint(min_port, max_port) - try: - with socket.socket() as sock: - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind(("localhost", port)) - return port - except OSError as e: - raise e +# def find_free_port(min_port: int = 2000, max_port: int = 65000) -> int: +# while True: +# port = random.randint(min_port, max_port) +# try: +# with socket.socket() as sock: +# sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +# sock.bind(("localhost", port)) +# return port +# except OSError as e: +# raise e def available_gpus(): if not torch.cuda.is_available(): @@ -106,7 +106,7 @@ def _init_distributed(func): nb_gpus = tp * dp * pp run_id = uuid.uuid4() - port = find_free_port() + # port = find_free_port() config = torch.distributed.launcher.LaunchConfig( min_nodes=1, @@ -116,7 +116,8 @@ def _init_distributed(func): rdzv_configs={"timeout": 60}, # Setting port to `0` allows `torch` to randomly pick a port: https://pytorch.org/docs/stable/elastic/run.html#stacked-single-node-multi-worker # Works only for single node workload. - rdzv_endpoint=f"localhost:{port}", + # rdzv_endpoint=f"localhost:{port}", + rdzv_endpoint=f"localhost:0", run_id=str(run_id), max_restarts=0, # TODO @thomasw21: Tune as we increase the number of tests