Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Sep 22, 2024
1 parent f6201e0 commit 7f53dd6
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_misc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
run: |
pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install -e .[testing,timm,diffusers,codecarbon]
pip install -e .[testing]
- name: Run tests
run: pytest -s -k "cli and not (cpu or cuda or rocm or mps)"
10 changes: 10 additions & 0 deletions optimum_benchmark/launchers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def device_isolation(self, pid: int, device_ids: Optional[str] = None):

@contextmanager
def numactl_executable(self):
self.logger.info("\t+ Warming up multiprocessing context")
dummy_process = Process(target=dummy_target, daemon=False)
dummy_process.start()
dummy_process.join()
dummy_process.close()

self.logger.info("\t+ Creating numactl wrapper executable for multiprocessing")
python_path = sys.executable
numactl_path = shutil.which("numactl")
Expand All @@ -84,3 +90,7 @@ def numactl_executable(self):
self.logger.info("\t+ Resetting default multiprocessing executable")
os.unlink(numa_executable.name)
set_executable(sys.executable)


def dummy_target() -> None:
exit(0)
2 changes: 1 addition & 1 deletion optimum_benchmark/launchers/inline/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ def __init__(self, config: InlineConfig):
super().__init__(config)

def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
self.logger.warn("The inline launcher is only recommended for debugging purposes and not for benchmarking")
self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking")
report = worker(*worker_args)
return report
24 changes: 3 additions & 21 deletions optimum_benchmark/launchers/process/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ def __init__(self, config: ProcessConfig):
if get_start_method(allow_none=True) != self.config.start_method:
self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}")
set_start_method(self.config.start_method, force=True)
# creates the resource tracker with default executable
self.logger.info("\t+ Warming up multiprocessing context")
dummy_process = Process(target=dummy_target, daemon=False)
dummy_process.start()
dummy_process.join()
dummy_process.close()

def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
child_connection, parent_connection = Pipe()
Expand All @@ -48,7 +42,6 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
else:
raise RuntimeError("Could not synchronize with isolated process")

with ExitStack() as stack:
if self.config.device_isolation:
stack.enter_context(self.device_isolation(isolated_process.pid))

Expand Down Expand Up @@ -121,21 +114,10 @@ def target(


def sync_with_parent(child_connection: Connection) -> None:
if child_connection.poll():
response = child_connection.recv()
else:
raise RuntimeError("Received no response from main process")

if response == "SYNC":
return
else:
raise RuntimeError(f"Received an unexpected response from main process: {response}")
child_connection.recv()
child_connection.send(0)


def sync_with_child(parent_connection: Connection) -> None:
parent_connection.send("SYNC")
parent_connection.send(0)
parent_connection.recv()


def dummy_target() -> None:
exit(0)
24 changes: 3 additions & 21 deletions optimum_benchmark/launchers/torchrun/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ def __init__(self, config: TorchrunConfig):
if get_start_method(allow_none=True) != self.config.start_method:
self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}")
set_start_method(self.config.start_method, force=True)
self.logger.info("\t+ Warming up multiprocessing context")
# creates the resource tracker with default executable
dummy_process = Process(target=dummy_target, daemon=False)
dummy_process.start()
dummy_process.join()
dummy_process.close()

self.launch_config = LaunchConfig(
min_nodes=self.config.min_nodes,
Expand Down Expand Up @@ -68,7 +62,6 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
else:
raise RuntimeError("Could not synchronize with isolated process")

with ExitStack() as stack:
if self.config.device_isolation:
stack.enter_context(self.device_isolation(isolated_process.pid))

Expand Down Expand Up @@ -152,19 +145,12 @@ def target(


def sync_with_parent(child_connection: Connection) -> None:
if child_connection.poll():
response = child_connection.recv()
else:
raise RuntimeError("Received no response from main process")

if response == "SYNC":
return
else:
raise RuntimeError(f"Received an unexpected response from main process: {response}")
child_connection.recv()
child_connection.send(0)


def sync_with_child(parent_connection: Connection) -> None:
parent_connection.send("SYNC")
parent_connection.send(0)
parent_connection.recv()


Expand Down Expand Up @@ -200,7 +186,3 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
torch.distributed.destroy_process_group()
logger.info("\t+ Exiting rank process")
return output


def dummy_target() -> None:
exit(0)
5 changes: 1 addition & 4 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,6 @@ def test_cli_exit_code_0(launcher):

@pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
def test_cli_exit_code_1(launcher):
if launcher == "torchrun" and sys.platform != "linux":
pytest.skip("torchrun is only supported on Linux")

args_1 = [
"optimum-benchmark",
"--config-dir",
Expand All @@ -83,7 +80,7 @@ def test_cli_exit_code_1(launcher):
"_base_",
"name=test",
f"launcher={launcher}",
# incompatible task and model to trigger error
# incompatible task and model to trigger an error
"backend.task=image-classification",
"backend.model=bert-base-uncased",
"backend.device=cpu",
Expand Down

0 comments on commit 7f53dd6

Please sign in to comment.