Skip to content

Commit

Permalink
remove searching port
Browse files Browse the repository at this point in the history
  • Loading branch information
xrsrke committed Jan 29, 2024
1 parent 9a03a04 commit 1cf4da2
Showing 1 changed file with 15 additions and 14 deletions.
29 changes: 15 additions & 14 deletions tests/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
import os
import uuid
from typing import Any, Dict, List, Optional, Tuple
import random
import socket
# import random
# import socket

import torch.cuda
from nanotron.parallel import ParallelContext
from torch.distributed.launcher import elastic_launch


def find_free_port(min_port: int = 2000, max_port: int = 65000) -> int:
while True:
port = random.randint(min_port, max_port)
try:
with socket.socket() as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("localhost", port))
return port
except OSError as e:
raise e
# def find_free_port(min_port: int = 2000, max_port: int = 65000) -> int:
# while True:
# port = random.randint(min_port, max_port)
# try:
# with socket.socket() as sock:
# sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# sock.bind(("localhost", port))
# return port
# except OSError as e:
# raise e

def available_gpus():
if not torch.cuda.is_available():
Expand Down Expand Up @@ -106,7 +106,7 @@ def _init_distributed(func):
nb_gpus = tp * dp * pp
run_id = uuid.uuid4()

port = find_free_port()
# port = find_free_port()

config = torch.distributed.launcher.LaunchConfig(
min_nodes=1,
Expand All @@ -116,7 +116,8 @@ def _init_distributed(func):
rdzv_configs={"timeout": 60},
# Setting port to `0` allows `torch` to randomly pick a port: https://pytorch.org/docs/stable/elastic/run.html#stacked-single-node-multi-worker
# Works only for single node workload.
rdzv_endpoint=f"localhost:{port}",
# rdzv_endpoint=f"localhost:{port}",
rdzv_endpoint=f"localhost:0",
run_id=str(run_id),
max_restarts=0,
# TODO @thomasw21: Tune as we increase the number of tests
Expand Down

0 comments on commit 1cf4da2

Please sign in to comment.