Skip to content

Commit

Permalink
Fix a bug
Browse files Browse the repository at this point in the history
Summary:
When testing with:
  buck2 run mode/opt //pytorch/benchmark:run -- mimo_cmf_30x -d cuda -t train --torchdynamo inductor --profile --shrink_model True --bs 10 --profile-detailed

 {F1104965994}
It failed with AttributeError: 'NoneType' object has no attribute 'num_batch'. This is because `model` is not passed into `printResultSummaryTime` in `profile_one_step`.

In this diff, we pass `model` to `printResultSummaryTime` in `profile_one_step`.

Reviewed By: xuzhao9

Differential Revision: D49749228

fbshipit-source-id: d22f652723354966af6da68cd0bd48f2abfe637a
  • Loading branch information
Microve authored and facebook-github-bot committed Sep 28, 2023
1 parent d8b0cdd commit 9311e29
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def run_one_step_with_cudastreams(func, streamcount):
print('{:<20} {:>20}'.format("GPU Time:", "%.3f milliseconds" % start_event.elapsed_time(end_event)), sep='')


def printResultSummaryTime(result_summary, metrics_needed=[], model=None, flops_model_analyzer=None, model_flops=None, cpu_peak_mem=None, mem_device_id=None, gpu_peak_mem=None):
def printResultSummaryTime(result_summary, model, metrics_needed=[], flops_model_analyzer=None, model_flops=None, cpu_peak_mem=None, mem_device_id=None, gpu_peak_mem=None):
assert (model is not None), "model can not be None."
if args.device == "cuda":
gpu_time = np.median(list(map(lambda x: x[0], result_summary)))
cpu_walltime = np.median(list(map(lambda x: x[1], result_summary)))
Expand Down Expand Up @@ -111,7 +112,7 @@ def printResultSummaryTime(result_summary, metrics_needed=[], model=None, flops_
print('{:<20} {:>20}'.format("CPU Peak Memory:", "%.4f GB" % cpu_peak_mem, sep=''))


def run_one_step(func, nwarmup=WARMUP_ROUNDS, num_iter=10, model=None, export_metrics_file=None, stress=0, metrics_needed=[], metrics_gpu_backend=None):
def run_one_step(func, model, nwarmup=WARMUP_ROUNDS, num_iter=10, export_metrics_file=None, stress=0, metrics_needed=[], metrics_gpu_backend=None):
# Warm-up `nwarmup` rounds
for _i in range(nwarmup):
func()
Expand Down Expand Up @@ -184,10 +185,10 @@ def run_one_step(func, nwarmup=WARMUP_ROUNDS, num_iter=10, model=None, export_me
cpu_peak_mem, mem_device_id, gpu_peak_mem = get_peak_memory(func, model.device, export_metrics_file=export_metrics_file, metrics_needed=metrics_needed, metrics_gpu_backend=metrics_gpu_backend)
if 'model_flops' in metrics_needed:
model_flops = get_model_flops(model)
printResultSummaryTime(result_summary, metrics_needed, model, flops_model_analyzer, model_flops, cpu_peak_mem, mem_device_id, gpu_peak_mem)
printResultSummaryTime(result_summary, model, metrics_needed, flops_model_analyzer, model_flops, cpu_peak_mem, mem_device_id, gpu_peak_mem)


def profile_one_step(func, nwarmup=WARMUP_ROUNDS):
def profile_one_step(func, model, nwarmup=WARMUP_ROUNDS):
activity_groups = []
result_summary = []
device_to_activity = {'cuda': profiler.ProfilerActivity.CUDA, 'cpu': profiler.ProfilerActivity.CPU}
Expand Down Expand Up @@ -259,7 +260,7 @@ def profile_one_step(func, nwarmup=WARMUP_ROUNDS):
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=30))
print(f"Saved TensorBoard Profiler traces to {args.profile_folder}.")

printResultSummaryTime(result_summary)
printResultSummaryTime(result_summary, model=m)

def _validate_devices(devices: str):
devices_list = devices.split(",")
Expand Down Expand Up @@ -461,7 +462,10 @@ def _validate_profile_options(profile_options: str):
else:
export_metrics_file = None
if args.profile:
profile_one_step(test)
profile_one_step(
test,
model=m
)
elif args.cudastreams:
run_one_step_with_cudastreams(test, 10)
else:
Expand Down

0 comments on commit 9311e29

Please sign in to comment.