Skip to content

Commit

Permalink
Torchbench Dynamo Runner: Enable DDP for perf test and traces (#113332)
Browse files Browse the repository at this point in the history
Summary:
- Removes an outdated assert that prevents perf tests from running DDP, we now have single node --multiprocess and perf tests are already wrapping the model using `deepcopy_and_maybe_ddp`
- Append rank name to traces to avoid all ranks trying to create the same file
- Renames `deepcopy_and_maybe_ddp` to `deepcopy_and_maybe_parallelize` to include FSDP

X-link: pytorch/pytorch#113332
Approved by: https://github.com/H-Huang, https://github.com/wconstab

Reviewed By: huydhn

Differential Revision: D52754061

Pulled By: xmfan

fbshipit-source-id: d34b8dcd484c795b62a2bf8f1611d3cf7e4d489f
  • Loading branch information
xmfan authored and facebook-github-bot committed Jan 15, 2024
1 parent 4ffb8a9 commit 6a8b941
Showing 1 changed file with 10 additions and 13 deletions.
23 changes: 10 additions & 13 deletions userbenchmark/dynamo/dynamobench/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,10 @@ def maybe_mark_profile(*args, **kwargs):
)

if args.export_profiler_trace:
name = args.profiler_trace_name + "_" + model.name + ".json"
name = args.profiler_trace_name + "_" + model.name
if hasattr(args, "rank"):
name += f"_rank_{args.rank}"
name += ".json"
name = os.path.join(torch._dynamo.config.base_dir, name)
p.export_chrome_trace(name)
median = np.median(timings, axis=0)
Expand Down Expand Up @@ -2233,7 +2236,7 @@ def get_fsdp_auto_wrap_policy(self, model_name: str) -> Optional[ModuleWrapPolic

return ModuleWrapPolicy(MODEL_FSDP_WRAP[model_name])

def deepcopy_and_maybe_ddp(self, model):
def deepcopy_and_maybe_parallelize(self, model):
model = self.deepcopy_model(model)
if self.args.ddp:
assert (
Expand Down Expand Up @@ -2329,7 +2332,7 @@ def record_status(accuracy_status, dynamo_start_stats):
inputs_fp64 = None
try:
model_fp64, inputs_fp64 = cast_to_fp64(
self.deepcopy_and_maybe_ddp(model),
self.deepcopy_and_maybe_parallelize(model),
clone_inputs(example_inputs),
)
self.init_optimizer(name, current_device, model_fp64.parameters())
Expand Down Expand Up @@ -2363,7 +2366,7 @@ def record_status(accuracy_status, dynamo_start_stats):
reset_rng_state()
model_copy = None
try:
model_copy = self.deepcopy_and_maybe_ddp(model)
model_copy = self.deepcopy_and_maybe_parallelize(model)
self.init_optimizer(name, current_device, model_copy.parameters())
correct_result = self.run_n_iterations(
model_copy, clone_inputs(example_inputs)
Expand All @@ -2384,7 +2387,7 @@ def record_status(accuracy_status, dynamo_start_stats):
reset_rng_state()
model_copy = None
try:
model_copy = self.deepcopy_and_maybe_ddp(model)
model_copy = self.deepcopy_and_maybe_parallelize(model)
self.init_optimizer(name, current_device, model_copy.parameters())
correct_rerun_result = self.run_n_iterations(
model_copy, clone_inputs(example_inputs)
Expand Down Expand Up @@ -2431,7 +2434,7 @@ def record_status(accuracy_status, dynamo_start_stats):
torch._dynamo.reset()
model_copy = None
try:
model_copy = self.deepcopy_and_maybe_ddp(model)
model_copy = self.deepcopy_and_maybe_parallelize(model)
self.init_optimizer(name, current_device, model_copy.parameters())
if self.args.export or self.args.export_aot_inductor:
# apply export on module directly
Expand Down Expand Up @@ -2615,7 +2618,7 @@ def warmup(fn, model, example_inputs, mode, niters=5):
model, example_inputs = self.maybe_cast(model, example_inputs)

# Use distributed wrapping as necessary
model = self.deepcopy_and_maybe_ddp(model)
model = self.deepcopy_and_maybe_parallelize(model)

self.init_optimizer(name, current_device, model.parameters())
with self.pick_grad(name, self.args.training):
Expand Down Expand Up @@ -3409,12 +3412,6 @@ def run(runner, args, original_dir=None):
CI, args.backend, training=args.training, dynamic=args.dynamic_shapes
)
if args.ddp:
# TODO: we could also hook DDP bench up to --speedup bench, _not_ for mgpu e2e perf,
# but just to measure impact on singlenode of performing graph-breaks.
# Left it as a follow up to keep this PR isolated.
assert (
args.accuracy
), "DDP benchmark is currently only hooked up to --accuracy bench"
assert args.training, "DDP benchmark requires --training mode"
if args.no_optimize_ddp:
torch._dynamo.config.optimize_ddp = False
Expand Down

0 comments on commit 6a8b941

Please sign in to comment.