Skip to content

Commit

Permalink
added debug flag for running benchmarks as function calls so breakpoi…
Browse files Browse the repository at this point in the history
…nts in them work

added traceable exit codes
  • Loading branch information
SamRWest committed Feb 14, 2024
1 parent 552af5b commit 52ae6b5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 56 deletions.
47 changes: 29 additions & 18 deletions utils/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def parse_result(lastline):
)
if not m:
print(f"ERROR: could not parse output of run:\n{lastline}")
sys.exit(1)
sys.exit(2)
# return (accuracy, num_correct_rows, num_additional_rows)
return (float(m.groups()[0]), int(m.groups()[1]), int(m.groups()[3]))

Expand Down Expand Up @@ -62,7 +62,7 @@ def run_gams_gdxdiff(
print(res.stdout)
print(res.stderr if res.stderr is not None else "")
print(f"ERROR: GAMS failed on {benchmark['name']}")
sys.exit(1)
sys.exit(3)
if "error" in res.stdout.lower():
print(res.stdout)
print(f"ERROR: GAMS errored on {benchmark['name']}")
Expand Down Expand Up @@ -93,7 +93,7 @@ def run_gams_gdxdiff(
print(res.stdout)
print(res.stderr if res.stderr is not None else "")
print(f"ERROR: GAMS failed on {benchmark['name']} ground truth")
sys.exit(1)
sys.exit(4)
if "error" in res.stdout.lower():
print(res.stdout)
print(f"ERROR: GAMS errored on {benchmark['name']}")
Expand Down Expand Up @@ -129,6 +129,7 @@ def run_benchmark(
skip_csv: bool = False,
out_folder: str = "out",
verbose: bool = False,
debug: bool = False,
) -> Tuple[str, float, str, float, int, int]:
xl_folder = path.join(benchmarks_folder, "xlsx", benchmark["input_folder"])
dd_folder = path.join(benchmarks_folder, "dd", benchmark["dd_folder"])
Expand All @@ -149,15 +150,15 @@ def run_benchmark(
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
# windows needs this for subprocess to inherit venv:
# windows needs this for subprocess to inherit venv when calling python directly:
shell=True if os.name == "nt" else False,
)
if res.returncode != 0:
# Remove partial outputs
shutil.rmtree(csv_folder, ignore_errors=True)
print(res.stdout)
print(f"ERROR: dd_to_csv failed on {benchmark['name']}")
sys.exit(1)
sys.exit(5)
else:
# subprocesses use too much RAM in windows, just use function call in current process instead
from utils.dd_to_csv import main
Expand All @@ -166,7 +167,7 @@ def run_benchmark(

elif not path.exists(csv_folder):
print(f"ERROR: --skip_csv is true but {csv_folder} does not exist")
sys.exit(1)
sys.exit(6)

# Then run the tool
args = [
Expand All @@ -184,21 +185,20 @@ def run_benchmark(
args.append(xl_folder)
start = time.time()
res = None
if os.name != "nt":
if not debug:
res = subprocess.run(
["xl2times"] + args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
else:
# Subprocesses are heavyweight in windows use too much RAM, use function calls instead
from xl2times.__main__ import main
# If debug option is set, run as a function call to allow stepping with a debugger.
from xl2times.__main__ import run, parse_args

summary = main(args)
summary = run(parse_args(args))

# pack the results into a namedtuple pretending to be a return value from a subprocess call (as above).
# TODO Replace subprocess calls above with function calls?
res = namedtuple("stdout", ["stdout", "stderr", "returncode"])(summary, "", 0)

runtime = time.time() - start
Expand All @@ -213,7 +213,7 @@ def run_benchmark(
if res is not None and res.returncode != 0:
print(res.stdout)
print(f"ERROR: tool failed on {benchmark['name']}")
sys.exit(1)
sys.exit(7)
with open(path.join(out_folder, "stdout"), "w") as f:
f.write(res.stdout)

Expand All @@ -236,6 +236,7 @@ def run_all_benchmarks(
skip_main=False,
skip_regression=False,
verbose=False,
debug: bool = False,
):
print("Running benchmarks", end="", flush=True)
headers = ["Benchmark", "Time (s)", "GDX Diff", "Accuracy", "Correct", "Additional"]
Expand All @@ -246,6 +247,7 @@ def run_all_benchmarks(
skip_csv=skip_csv,
run_gams=run_gams,
verbose=verbose,
debug=debug,
)

with ProcessPoolExecutor(max_workers=max_workers) as executor:
Expand Down Expand Up @@ -289,7 +291,7 @@ def run_all_benchmarks(
else:
if repo.is_dirty():
print("Your working directory is not clean. Skipping regression tests.")
sys.exit(1)
sys.exit(8)

# Re-run benchmarks on main
repo.heads.main.checkout()
Expand All @@ -302,6 +304,7 @@ def run_all_benchmarks(
run_gams=run_gams,
out_folder="out-main",
verbose=verbose,
debug=debug,
)

with ProcessPoolExecutor(max_workers) as executor:
Expand Down Expand Up @@ -335,7 +338,7 @@ def run_all_benchmarks(
)
if df.isna().values.any():
print(f"ERROR: number of benchmarks changed:\n{df}")
sys.exit(1)
sys.exit(9)
accu_regressions = df[df["Correct"] < df["M Correct"]]["Benchmark"]
addi_regressions = df[df["Additional"] > df["M Additional"]]["Benchmark"]
time_regressions = df[df["Time (s)"] > 2 * df["M Time (s)"]]["Benchmark"]
Expand All @@ -355,7 +358,7 @@ def run_all_benchmarks(
print(f"ERROR: additional rows regressed on: {', '.join(addi_regressions)}")
if not time_regressions.empty:
print(f"ERROR: runtime regressed on: {', '.join(time_regressions)}")
sys.exit(1)
sys.exit(10)
# TODO also check if any new tables are missing?

print("No regressions. You're awesome!")
Expand Down Expand Up @@ -410,24 +413,30 @@ def run_all_benchmarks(
default=False,
help="Print output of run on each benchmark",
)
args_parser.add_argument(
"--debug",
action="store_true",
default=False,
help="Run each benchmark as a function call to allow a debugger to stop at breakpoints in benchmark runs.",
)
args = args_parser.parse_args()

spec = yaml.safe_load(open(args.benchmarks_yaml))
benchmarks_folder = spec["benchmarks_folder"]
benchmark_names = [b["name"] for b in spec["benchmarks"]]
if len(set(benchmark_names)) != len(benchmark_names):
print("ERROR: Found duplicate name in benchmarks YAML file")
sys.exit(1)
sys.exit(11)

if args.dd and args.times_dir is None:
print("ERROR: --times_dir is required when using --dd")
sys.exit(1)
sys.exit(12)

if args.run is not None:
benchmark = next((b for b in spec["benchmarks"] if b["name"] == args.run), None)
if benchmark is None:
print(f"ERROR: could not find {args.run} in {args.benchmarks_yaml}")
sys.exit(1)
sys.exit(13)

_, runtime, gms, acc, cor, add = run_benchmark(
benchmark,
Expand All @@ -436,6 +445,7 @@ def run_all_benchmarks(
run_gams=args.dd,
skip_csv=args.skip_csv,
verbose=args.verbose,
debug=args.debug,
)
print(
f"Ran {args.run} in {runtime:.2f}s. {acc}% ({cor} correct, {add} additional).\n"
Expand All @@ -451,4 +461,5 @@ def run_all_benchmarks(
skip_main=args.skip_main,
skip_regression=args.skip_regression,
verbose=args.verbose,
debug=args.debug,
)
86 changes: 48 additions & 38 deletions xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,42 +386,7 @@ def dump_tables(tables: List, filename: str) -> List:
return tables


def main(arg_list: None | list[str] = None) -> str:
args_parser = argparse.ArgumentParser()
args_parser.add_argument(
"input",
nargs="*",
help="Either an input directory, or a list of input xlsx/xlsm files to process",
)
args_parser.add_argument(
"--regions",
type=str,
default="",
help="Comma-separated list of regions to include in the model",
)
args_parser.add_argument(
"--output_dir", type=str, default="output", help="Output directory"
)
args_parser.add_argument(
"--ground_truth_dir",
type=str,
help="Ground truth directory to compare with output",
)
args_parser.add_argument("--dd", action="store_true", help="Output DD files")
args_parser.add_argument(
"--only_read",
action="store_true",
help="Read xlsx/xlsm files and stop after outputting raw_tables.txt",
)
args_parser.add_argument("--use_pkl", action="store_true")
args_parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Verbose mode: print tables after every transform",
)
args = args_parser.parse_args(arg_list)

def run(args) -> str | None:
config = datatypes.Config(
"times_mapping.txt",
"times-info.json",
Expand All @@ -434,7 +399,7 @@ def main(arg_list: None | list[str] = None) -> str:

if not isinstance(args.input, list) or len(args.input) < 1:
print(f"ERROR: expected at least 1 input. Got {args.input}")
sys.exit(1)
sys.exit(-1)
elif len(args.input) == 1:
assert os.path.isdir(args.input[0])
input_files = [
Expand Down Expand Up @@ -471,8 +436,53 @@ def main(arg_list: None | list[str] = None) -> str:
ground_truth = read_csv_tables(args.ground_truth_dir)
comparison = compare(tables, ground_truth, args.output_dir)
return comparison
return ""
else:
return None


def parse_args(arg_list: None | list[str]) -> argparse.Namespace:
args_parser = argparse.ArgumentParser()
args_parser.add_argument(
"input",
nargs="*",
help="Either an input directory, or a list of input xlsx/xlsm files to process",
)
args_parser.add_argument(
"--regions",
type=str,
default="",
help="Comma-separated list of regions to include in the model",
)
args_parser.add_argument(
"--output_dir", type=str, default="output", help="Output directory"
)
args_parser.add_argument(
"--ground_truth_dir",
type=str,
help="Ground truth directory to compare with output",
)
args_parser.add_argument("--dd", action="store_true", help="Output DD files")
args_parser.add_argument(
"--only_read",
action="store_true",
help="Read xlsx/xlsm files and stop after outputting raw_tables.txt",
)
args_parser.add_argument("--use_pkl", action="store_true")
args_parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Verbose mode: print tables after every transform",
)
args = args_parser.parse_args(arg_list)
return args


def main(arg_list: None | list[str] = None):
args = parse_args(arg_list)
run(args)


if __name__ == "__main__":
main()
sys.exit(0)

0 comments on commit 52ae6b5

Please sign in to comment.