diff --git a/.github/workflows/solve.yml b/.github/workflows/solve.yml new file mode 100644 index 00000000..34ced2df --- /dev/null +++ b/.github/workflows/solve.yml @@ -0,0 +1,72 @@ +on: + workflow_dispatch: + pull_request: + +jobs: + solve: + runs-on: swe-bench-ubuntu-latest + defaults: + run: + shell: bash -leo pipefail {0} + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + - name: Set up Python + uses: actions/setup-python@v4 + # TODO: Cache conda env + - name: Create conda env + run: | + conda init bash + conda env create -f environment.yml + - name: Build submodules + env: + PUPPETEER_SKIP_DOWNLOAD: true + run: | + cd submodules/appmap-js + git checkout -- . + yarn + yarn build + - name: Run benchmark + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate swe-bench + export PYTHONPATH=$PYTHONPATH:$(pwd) + conda info + + cat > appmap.sh < tuple[DatasetDict, str]: + dataset_dir = datasets_dir / dataset_name.replace("/", "__") + dataset = None + if Path(dataset_dir).exists(): + dataset = load_from_disk(str(dataset_dir)) + else: + dataset = load_dataset(dataset_name) + Path.mkdir(dataset_dir, parents=True) + dataset.save_to_disk(str(dataset_dir)) + + return dataset[split] + + +def solve_instance(data): + # Check that this is defined + output_file = data["output_file"] + + for instance in data["task_instances"]: + # Create a temporary directory to store the problem statement and the working files + issue_dir = Path(data["testbed"]) / instance["instance_id"] + issue_dir.mkdir(parents=True, exist_ok=True) + issue_file = issue_dir / "issue.txt" + with open(issue_file, "w") as f: + f.write(instance["problem_statement"]) + + try: + run( + [ + "python", + abspath(data["solver_path"]), + data["testbed"], + str(issue_file), + "--appmap-command", + data["appmap_command"] + ], + check=True, + cwd=data["testbed"], + ) + output = run(["git", "--no-pager", "diff"], check=True, cwd=data["testbed"], capture_output=True, text=True) + if output.stdout: + instance["model_patch"] = output.stdout + instance["model_name_or_path"] = "navie" + with FileLock(f"{output_file}.lock"): + with open(output_file, "a+") as f: + f.write(json.dumps(instance) + "\n") + except Exception as e: + import traceback + print(f"Error processing {instance['instance_id']}") + traceback.print_exc() + +def solve_instances(instances, args): + if args.filter is not None: + instances = [ + instance for instance in instances if args.filter in instance["instance_id"] + ] + + instance_groups = split_instances(list(instances), args.num_workers) + data_groups = [ + { + "task_instances": g, + "func": solve_instance, + "output_file": args.output, + **vars(args), + } + for g in instance_groups + ] + + if args.num_workers == 1: + setup_testbed(data_groups[0]) + return + + pool = Pool(processes=args.num_workers) + pool.map(setup_testbed, data_groups) + pool.close() + pool.join() + +def main(args): + dataset = load_data(args.instances_path, args.split) + solve_instances(dataset, args) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--instances_path", + "--instances", + type=str, + help="path or huggingface name of task instances dataset", + default="princeton-nlp/SWE-bench_Lite", + ) + parser.add_argument( + "--split", type=str, default="test", help="Dataset split to use" + ) + parser.add_argument( + "--log_dir", type=str, help="Path to log directory", default="logs" + ) + parser.add_argument( + "--conda_link", + type=str, + default=None, + help="(Optional) URL to conda installation to use", + ) + parser.add_argument( + "--log_suffix", + type=str, + default=None, + help="(Optional) Suffix to append to log file names", + ) + parser.add_argument( + "--path_conda", + type=str, + help="(Optional) Path to miniconda3 or anaconda installation", + ) + parser.add_argument( + "--testbed", type=str, help="(Optional) Path to testbed directory" + ) + parser.add_argument( + "--temp_dir", + type=str, + help="(Optional) Path to temporary directory for storing virtual envs", + ) + parser.add_argument( + "--timeout", + type=int, + default=None, + help="(Optional) Timeout (seconds) for testing script execution", + ) + parser.add_argument( + "--verbose", action="store_true", help="(Optional) Verbose mode" + ) + parser.add_argument( + "--num_workers", + type=int, + default=cpu_count(), + help="(Optional) Number of workers", + ) + parser.add_argument( + "--filter", + type=str, + default=None, + help="(Optional) Filter to apply to task instances", + ) + parser.add_argument( + "--solver_path", type=str, default=None, help="Path to solver", required=True + ) + parser.add_argument( + "--appmap_command", type=str, default="appmap", help="Path to appmap command" + ) + parser.add_argument( + "--output", + type=str, + default="predictions.jsonl", + help="Path to output predictions", + ) + args = parser.parse_args() + main(args) diff --git a/submodules/appmap-js b/submodules/appmap-js new file mode 160000 index 00000000..05bdb699 --- /dev/null +++ b/submodules/appmap-js @@ -0,0 +1 @@ +Subproject commit 05bdb69948f0807d19ba56bb0f500b3d6810e097 diff --git a/swebench/harness/context_manager.py b/swebench/harness/context_manager.py index e74211b8..6e28c30f 100644 --- a/swebench/harness/context_manager.py +++ b/swebench/harness/context_manager.py @@ -111,6 +111,9 @@ def __init__( timeout: int = None, verbose: bool = False, keep: bool = False, + appmap_command: str = None, + solver_path: str = None, + output_file: str = None, ): """ Initialize testbed context. Creates temporary directories and groups task instances @@ -143,6 +146,9 @@ def __init__( "stderr": subprocess.STDOUT, }, ) + self.solver_path = solver_path + self.appmap_command = appmap_command + self.output_file = output_file # Create log, temp directories if they don't exist if not os.path.exists(self.log_dir): @@ -164,7 +170,9 @@ def __init__( # Create test command from framework + directives test_type = MAP_REPO_TO_TEST_FRAMEWORK[instance["repo"]] instance["test_directives"] = get_test_directives(instance) - instance["test_cmd"] = f"{test_type} {' '.join(instance['test_directives'])}" + # KEG: I'm not sure where this is used from, because it's not called by + # make_appmaps.py. But I'm including it for completeness. + instance["test_cmd"] = f"appmap-python {test_type} {' '.join(instance['test_directives'])}" # Group task instances by repo, version repo = instance["repo"] @@ -435,6 +443,9 @@ def get_distributed_tasks(self) -> list: "venv": env_name, "version": version, "verbose": self.verbose, + "solver_path": self.solver_path, + "appmap_command": self.appmap_command, + "output_file": self.output_file, } distributed_tasks.append(task_set) return distributed_tasks diff --git a/swebench/harness/engine_validation.py b/swebench/harness/engine_validation.py index 4007b159..ea08f6b5 100644 --- a/swebench/harness/engine_validation.py +++ b/swebench/harness/engine_validation.py @@ -85,6 +85,9 @@ def setup_testbed(data: dict): temp_dir: Path to temporary directory for storing virtual envs timeout: Timeout (seconds) for testing script execution verbose: Verbose mode + appmap_command: Path to appmap command + solver_path: Path to solver + output_file: Path to output file """ data_dict = DotDict(data) with TestbedContextManager( @@ -96,6 +99,9 @@ def setup_testbed(data: dict): temp_dir=data_dict.temp_dir, timeout=data_dict.timeout, verbose=data_dict.verbose, + appmap_command=data_dict.appmap_command, + solver_path=data_dict.solver_path, + output_file=data_dict.output_file, ) as tcm: distributed_task_list = tcm.get_distributed_tasks() for task_list in distributed_task_list: @@ -121,6 +127,15 @@ def main(args): args.num_workers = cpu_count() task_instances = list(get_eval_refs(args.instances_path).values()) + + # filter by optional filter + if args.filter is not None: + task_instances = [ + task_instance + for task_instance in task_instances + if args.filter in task_instance["instance_id"] + ] + task_instances_groups = split_instances(task_instances, args.num_workers) data_groups = [ @@ -148,6 +163,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--instances_path", type=str, help="Path to candidate task instances file", required=True) + parser.add_argument("--filter", type=str, help="(Optional) Filter for task instances") parser.add_argument("--log_dir", type=str, help="Path to log directory", required=True) parser.add_argument("--conda_link", type=str, default=None, help="(Optional) URL to conda installation to use") parser.add_argument("--log_suffix", type=str, default=None, help="(Optional) Suffix to append to log file names") diff --git a/viewer/script.js b/viewer/script.js index af700e07..4f005450 100644 --- a/viewer/script.js +++ b/viewer/script.js @@ -28,9 +28,31 @@ function addPreField(k, data) { const h1 = document.createElement("h2"); h1.textContent = k; div.appendChild(h1); - const p = document.createElement("pre"); - p.textContent = stringify(data[k]); - div.appendChild(p); + + if (k === "navie_context") { + data[k].forEach((item) => { + const d = document.createElement("div"); + d.style.background = "#eee"; + d.style.padding = "1rem"; + const h4 = document.createElement("h4"); + h4.style.marginBottom = "0"; + h4.textContent = item.directory; + const subtitle = document.createElement("h5"); + subtitle.style.marginBottom = "0"; + subtitle.style.marginTop = "0"; + subtitle.textContent = item.type; + const pre = document.createElement("pre"); + pre.textContent = item.content.replaceAll("\\n", "\n"); + d.appendChild(h4); + d.appendChild(subtitle); + d.appendChild(pre); + div.appendChild(d); + }); + } else { + const p = document.createElement("pre"); + p.textContent = stringify(data[k]); + div.appendChild(p); + } dataContainer.appendChild(div); } } @@ -52,7 +74,11 @@ function addMdField(k, data) { function stringify(value) { switch (typeof value) { case "object": - return JSON.stringify(value, undefined, 2); + return JSON.stringify(value, undefined, 2).replaceAll( + "\\n", + ` +` + ); default: return String(value); } diff --git a/viewer/viewer.py b/viewer/viewer.py index b34727c3..4627a0d7 100644 --- a/viewer/viewer.py +++ b/viewer/viewer.py @@ -5,7 +5,7 @@ import os import sys -PORT = 8080 +PORT = 8081 data_path = os.path.abspath(sys.argv[1])