From 5b03c2e0d17cd29622dfe10f2b28691801944d8d Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Mon, 3 Jun 2024 13:24:56 -0400 Subject: [PATCH 1/3] feat: Add --filter option to engine validation --- swebench/harness/engine_validation.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/swebench/harness/engine_validation.py b/swebench/harness/engine_validation.py index 4007b159..a3a9fa7f 100644 --- a/swebench/harness/engine_validation.py +++ b/swebench/harness/engine_validation.py @@ -121,6 +121,15 @@ def main(args): args.num_workers = cpu_count() task_instances = list(get_eval_refs(args.instances_path).values()) + + # filter by optional filter + if args.filter is not None: + task_instances = [ + task_instance + for task_instance in task_instances + if args.filter in task_instance["instance_id"] + ] + task_instances_groups = split_instances(task_instances, args.num_workers) data_groups = [ @@ -148,6 +157,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--instances_path", type=str, help="Path to candidate task instances file", required=True) + parser.add_argument("--filter", type=str, help="(Optional) Filter for task instances") parser.add_argument("--log_dir", type=str, help="Path to log directory", required=True) parser.add_argument("--conda_link", type=str, default=None, help="(Optional) URL to conda installation to use") parser.add_argument("--log_suffix", type=str, default=None, help="(Optional) Suffix to append to log file names") From 23f8db55575c01efc429025ffa21d99d7d7c42d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Rzepecki?= Date: Mon, 3 Jun 2024 17:16:22 +0000 Subject: [PATCH 2/3] Fix make_appmaps.py - Use appmap-python script to enable appmapping - Use subprocess.run directly to run the indexer --- appmap/make_appmaps.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/appmap/make_appmaps.py b/appmap/make_appmaps.py index 12bf2b94..88365108 100644 --- a/appmap/make_appmaps.py +++ b/appmap/make_appmaps.py @@ -1,4 +1,4 @@ -import argparse, glob, itertools, os, tarfile +import argparse, glob, itertools, os, tarfile, subprocess from multiprocessing import Pool, cpu_count from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK, PatchType @@ -84,6 +84,7 @@ def make_appmaps(data: dict): task_instance["repo"] ]}""" tcm.log.write("Running tests with appmap") + task_instance["test_cmd"] = f"appmap-python {task_instance['test_cmd']}" tcm.run_tests_task(task_instance) tcm.log.write("Uninstalling appmap") tcm.exec(["bash", "-c", f"{tcm.cmd_activate} && pip uninstall -y appmap"]) @@ -97,7 +98,7 @@ def make_appmaps(data: dict): return # index appmaps tcm.log.write(f"Indexing {len(appmaps)} appmaps") - tcm.exec([appmap_bin, "index", "-d", data_dict.testbed]) + subprocess.run([appmap_bin, "index", "-d", data_dict.testbed], check=True) # archive appmaps tcm.log.write(f"Archiving {len(appmaps)} appmaps to {archive_name}") with tarfile.open(archive_name, "w:xz") as tar: From 8069645e68b81618e766dda2e3ac3d524ce2f94b Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Mon, 3 Jun 2024 14:28:11 -0400 Subject: [PATCH 3/3] fix: Write issue descriptions into individual directories Propagate variables that are needed by the solver. Remove global variable. --- appmap/solve.py | 63 +++++++++++++++------------ swebench/harness/context_manager.py | 9 ++++ swebench/harness/engine_validation.py | 6 +++ 3 files changed, 50 insertions(+), 28 deletions(-) diff --git a/appmap/solve.py b/appmap/solve.py index 9612f7c0..f4a93d7f 100644 --- a/appmap/solve.py +++ b/appmap/solve.py @@ -12,7 +12,6 @@ from filelock import FileLock datasets_dir = Path(__file__).parent / "datasets" -output_file = None def load_data(dataset_name, split) -> tuple[DatasetDict, str]: @@ -29,33 +28,41 @@ def load_data(dataset_name, split) -> tuple[DatasetDict, str]: def solve_instance(data): + # Check that this is defined + output_file = data["output_file"] + for instance in data["task_instances"]: + # Create a temporary directory to store the problem statement and the working files + issue_dir = Path(data["testbed"]) / instance["instance_id"] + issue_dir.mkdir(parents=True, exist_ok=True) + issue_file = issue_dir / "issue.txt" + with open(issue_file, "w") as f: + f.write(instance["problem_statement"]) + try: - with NamedTemporaryFile(mode="w", dir=data["testbed"], prefix="issue_", suffix=".txt") as f: - f.write(instance["problem_statement"]) - f.flush() - run( - [ - "python", - abspath(args.solver_path), - data["testbed"], - f.name, - "--appmap-command", - args.appmap_command, - ], - check=True, - cwd=data["testbed"], - ) - output = run(["git", "--no-pager", "diff"], check=True, cwd=data["testbed"], capture_output=True, text=True) - if output.stdout: - instance["model_patch"] = output.stdout - instance["model_name_or_path"] = "navie" - with FileLock(f"{output_file}.lock"): - with open(output_file, "a+") as f: - f.write(json.dumps(instance) + "\n") + run( + [ + "python", + abspath(data["solver_path"]), + data["testbed"], + str(issue_file), + "--appmap-command", + data["appmap_command"] + ], + check=True, + cwd=data["testbed"], + ) + output = run(["git", "--no-pager", "diff"], check=True, cwd=data["testbed"], capture_output=True, text=True) + if output.stdout: + instance["model_patch"] = output.stdout + instance["model_name_or_path"] = "navie" + with FileLock(f"{output_file}.lock"): + with open(output_file, "a+") as f: + f.write(json.dumps(instance) + "\n") except Exception as e: - print(f"Error: {e}") - + import traceback + print(f"Error processing {instance['instance_id']}") + traceback.print_exc() def solve_instances(instances, args): if args.filter is not None: @@ -68,6 +75,7 @@ def solve_instances(instances, args): { "task_instances": g, "func": solve_instance, + "output_file": args.output, **vars(args), } for g in instance_groups @@ -83,14 +91,13 @@ def solve_instances(instances, args): pool.join() def main(args): - dataset = load_data(args.instances, args.split) - global output_file - output_file = args.output + dataset = load_data(args.instances_path, args.split) solve_instances(dataset, args) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( + "--instances_path", "--instances", type=str, help="path or huggingface name of task instances dataset", diff --git a/swebench/harness/context_manager.py b/swebench/harness/context_manager.py index f267ee85..6e28c30f 100644 --- a/swebench/harness/context_manager.py +++ b/swebench/harness/context_manager.py @@ -111,6 +111,9 @@ def __init__( timeout: int = None, verbose: bool = False, keep: bool = False, + appmap_command: str = None, + solver_path: str = None, + output_file: str = None, ): """ Initialize testbed context. Creates temporary directories and groups task instances @@ -143,6 +146,9 @@ def __init__( "stderr": subprocess.STDOUT, }, ) + self.solver_path = solver_path + self.appmap_command = appmap_command + self.output_file = output_file # Create log, temp directories if they don't exist if not os.path.exists(self.log_dir): @@ -437,6 +443,9 @@ def get_distributed_tasks(self) -> list: "venv": env_name, "version": version, "verbose": self.verbose, + "solver_path": self.solver_path, + "appmap_command": self.appmap_command, + "output_file": self.output_file, } distributed_tasks.append(task_set) return distributed_tasks diff --git a/swebench/harness/engine_validation.py b/swebench/harness/engine_validation.py index a3a9fa7f..ea08f6b5 100644 --- a/swebench/harness/engine_validation.py +++ b/swebench/harness/engine_validation.py @@ -85,6 +85,9 @@ def setup_testbed(data: dict): temp_dir: Path to temporary directory for storing virtual envs timeout: Timeout (seconds) for testing script execution verbose: Verbose mode + appmap_command: Path to appmap command + solver_path: Path to solver + output_file: Path to output file """ data_dict = DotDict(data) with TestbedContextManager( @@ -96,6 +99,9 @@ def setup_testbed(data: dict): temp_dir=data_dict.temp_dir, timeout=data_dict.timeout, verbose=data_dict.verbose, + appmap_command=data_dict.appmap_command, + solver_path=data_dict.solver_path, + output_file=data_dict.output_file, ) as tcm: distributed_task_list = tcm.get_distributed_tasks() for task_list in distributed_task_list: