diff --git a/.github/workflows/solve.yml b/.github/workflows/solve.yml
new file mode 100644
index 00000000..8faca73d
--- /dev/null
+++ b/.github/workflows/solve.yml
@@ -0,0 +1,139 @@
+on:
+  workflow_dispatch:
+    inputs:
+      filter:
+        description: "Instance filter"
+        required: true
+        default: marshmallow
+      dataset:
+        description: "Dataset name"
+        required: true
+        default: princeton-nlp/SWE-bench_Lite
+      split:
+        description: "Dataset split"
+        required: true
+        default: dev
+      retries:
+        description: "Number of retries to perform on each instance until a patch is found"
+        required: false
+        default: "3"
+
+  pull_request:
+
+jobs:
+  solve:
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'evaluate') || github.event_name == 'workflow_dispatch' }}
+    runs-on: swe-bench-ubuntu-latest
+    defaults:
+      run:
+        shell: bash -leo pipefail {0}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+
+      # Cache the conda environment
+      - name: Cache conda environment
+        id: cache-conda
+        uses: actions/cache@v3
+        with:
+          path: /usr/share/miniconda/envs/swe-bench
+          key: conda-${{ runner.os }}-${{ hashFiles('environment.yml') }}
+
+      # Create conda env if cache miss happens
+      - name: Create conda env
+        if: steps.cache-conda.outputs.cache-hit != 'true'
+        run: |
+          conda init bash
+          conda env create -f environment.yml
+          pip install flake8 black
+
+      # Cache the appmap-js build
+      - name: Cache appmap-js build
+        uses: actions/cache@v3
+        id: cache-appmap-js
+        with:
+          path: |
+            submodules/appmap-js/node_modules
+            submodules/appmap-js/packages/*/built
+          key: appmap-js-${{ runner.os }}-${{ hashFiles('submodules/appmap-js/package.json') }}
+
+      - name: Build submodules
+        # TODO: figure out why it doesn't work with cache
+        # if: steps.cache-appmap-js.outputs.cache-hit != 'true'
+        env:
+          PUPPETEER_SKIP_DOWNLOAD: true
+        run: |
+          cd submodules/appmap-js
+          git checkout -- .
+          yarn
+          yarn build
+          chmod +x packages/cli/built/cli.js
+
+      - name: Run benchmark
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          SWE_DATASET: ${{ inputs.dataset }}
+          SWE_SPLIT: ${{ inputs.split }}
+          SWE_FILTER: ${{ inputs.filter }}
+          SWE_RETRIES: ${{ inputs.retries }}
+        run: |
+          source /usr/share/miniconda/etc/profile.d/conda.sh
+          conda activate swe-bench
+          export PYTHONPATH=$PYTHONPATH:$(pwd)
+          python appmap/solve.py \
+            --instances ${SWE_DATASET:-princeton-nlp/SWE-bench_Lite} \
+            --split ${SWE_SPLIT:-dev} \
+            --filter ${SWE_FILTER:-marshmallow} \
+            --retries ${SWE_RETRIES:-3} \
+            --appmap_command $(pwd)/submodules/appmap-js/packages/cli/built/cli.js \
+            --lint_command "flake8 --extend-ignore=BLK100,W293,E201,E202,E303,E501,E128,E231,C408,F401,C402,E402,C416,E261,E302,D" \
+            --temp_dir ${{ runner.temp }} \
+            --num_workers 6 \
+            --path_conda $(conda info --base) \
+            --verbose
+
+      - name: Run evaluation
+        env:
+          SWE_DATASET: ${{ inputs.dataset }}
+        run: |
+          mkdir -p logs
+          source /usr/share/miniconda/etc/profile.d/conda.sh
+          conda activate swe-bench
+          export PYTHONPATH=$PYTHONPATH:$(pwd)
+          python swebench/harness/run_evaluation.py \
+            --predictions_path predictions.jsonl \
+            --swe_bench_tasks ${SWE_DATASET:-princeton-nlp/SWE-bench_Lite} \
+            --log_dir logs \
+            --testbed ${{ runner.temp }} \
+            --skip_existing \
+            --timeout 900 \
+            --verbose \
+            --num_processes 8 \
+            --path_conda $(conda info --base)
+
+      - name: Generate AppMap report
+        env:
+          SWE_DATASET: ${{ inputs.dataset }}
+          SWE_SPLIT: ${{ inputs.split }}
+        run: |
+          source /usr/share/miniconda/etc/profile.d/conda.sh
+          conda activate swe-bench
+          export PYTHONPATH=$PYTHONPATH:$(pwd)
+          conda info
+          python appmap/report.py \
+            --instances ${SWE_DATASET:-princeton-nlp/SWE-bench_Lite} \
+            --split ${SWE_SPLIT:-dev}
+
+      - name: Archive predictions and logs
+        uses: actions/upload-artifact@v4
+        with:
+          name: results
+          path: |
+            logs/
+            predictions.jsonl
+            results.csv
diff --git a/.gitignore b/.gitignore
index 5fd289a5..a0489ac7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,3 +174,9 @@ analysis/evaluation/*.csv
 analysis/evaluation/*.pdf
 data/repos/copies
 notebooks/
+*.csv
+appmap.sh
+work
+appmap/datasets
+logs
+
diff --git a/.gitmodules b/.gitmodules
index e69de29b..37673848 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "submodules/appmap-js"]
+	path = submodules/appmap-js
+	url = https://github.com/getappmap/appmap-js
+	branch = feat/apply-command
diff --git a/appmap/__init__.py b/appmap/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/appmap/data.py b/appmap/data.py
new file mode 100644
index 00000000..8d7ce2f2
--- /dev/null
+++ b/appmap/data.py
@@ -0,0 +1,17 @@
+from datasets import DatasetDict, load_dataset, load_from_disk
+from pathlib import Path
+
+datasets_dir = Path(__file__).parent / "datasets"
+
+
+def load_data(dataset_name, split) -> tuple[DatasetDict, str]:
+    dataset_dir = datasets_dir / dataset_name.replace("/", "__")
+    dataset = None
+    if Path(dataset_dir).exists():
+        dataset = load_from_disk(str(dataset_dir))
+    else:
+        dataset = load_dataset(dataset_name)
+        Path.mkdir(dataset_dir, parents=True)
+        dataset.save_to_disk(str(dataset_dir))
+
+    return dataset[split]
diff --git a/appmap/make_appmaps.py b/appmap/make_appmaps.py
index a1d09172..da48ba7e 100644
--- a/appmap/make_appmaps.py
+++ b/appmap/make_appmaps.py
@@ -1,12 +1,12 @@
 import argparse, glob, itertools, os, tarfile, subprocess
 
 from multiprocessing import Pool, cpu_count
-from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK, PatchType
+from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK
 from swebench.harness.context_manager import (
     TaskEnvContextManager,
     TestbedContextManager,
 )
-from swebench.harness.utils import get_instances, split_instances, DotDict
+from swebench.harness.utils import split_instances, DotDict
 from swebench.metrics.getters import get_eval_refs
 
 
@@ -36,9 +36,9 @@ def validate_args(args):
 
     # If value is provided, check that it is valid
     if args.timeout is not None and args.timeout < 0:
-        raise ValueError(f"Timeout must be a positive integer")
+        raise ValueError("Timeout must be a positive integer")
     if args.num_workers is not None and args.num_workers < 1:
-        raise ValueError(f"Number of workers must be a positive integer")
+        raise ValueError("Number of workers must be a positive integer")
 
     if not os.path.exists(appmap_bin):
         raise ValueError(f"Could not find appmap binary at {args.appmap_bin}")
@@ -252,7 +252,7 @@ def main(args):
         "--num_workers", type=int, default=None, help="(Optional) Number of workers"
     )
     parser.add_argument(
-        "--appmap-bin",
+        "--appmap_bin",
         type=str,
         help="path to appmap binary",
         default="~/.appmap/bin/appmap",
diff --git a/appmap/navie_issue.py b/appmap/navie_issue.py
index 1877e3f5..b3dbd1f8 100755
--- a/appmap/navie_issue.py
+++ b/appmap/navie_issue.py
@@ -9,7 +9,6 @@
 from datasets import DatasetDict, load_dataset, load_from_disk
 
 from swebench.harness.utils import clone_to
-from swebench.metrics.getters import get_eval_refs
 from subprocess import PIPE, Popen
 import json
 from filelock import FileLock
diff --git a/appmap/report.py b/appmap/report.py
new file mode 100644
index 00000000..4b7145d9
--- /dev/null
+++ b/appmap/report.py
@@ -0,0 +1,96 @@
+import argparse
+import csv
+import os
+
+from swebench import get_model_report
+from appmap.data import load_data
+
+
+def main(predictions, instances, log_dir, model, split, save_results, verbose, output):
+    report = get_model_report(
+        model=model,
+        predictions_path=os.path.abspath(predictions),
+        swe_bench_tasks=instances,
+        log_dir=os.path.join(log_dir, model),
+        verbose=verbose,
+    )
+
+    for k, v in report.items():
+        print(f"{k}: {len(v)}")
+
+    if save_results:
+        dataset = load_data(instances, split)
+        write_csv_report(
+            report,
+            dataset,
+            split,
+            output,
+        )
+
+
+def write_csv_report(report_map, dataset, split, output_csv_path):
+    # Prepare CSV headers
+    headers = ["instance_id", "split"] + [
+        key for key in report_map.keys() if key != "no_generation"
+    ]
+
+    all_preds = set()
+    for ids in report_map.values():
+        all_preds.update(ids)
+
+    # Write to CSV
+    with open(output_csv_path, "w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=headers)
+        writer.writeheader()
+        for instance in dataset.to_list():
+            if instance["instance_id"] not in all_preds:
+                continue
+            row = {"instance_id": instance["instance_id"], "split": split}
+            for category in headers[len(row) :]:
+                row[category] = instance["instance_id"] in report_map.get(category, [])
+            writer.writerow(row)
+
+        print(f"Wrote {len(all_preds)} predictions to {output_csv_path}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--predictions",
+        type=str,
+        default="predictions.jsonl",
+        help="Path to predictions file",
+    )
+    parser.add_argument(
+        "--instances",
+        type=str,
+        help="huggingface name of task instances dataset",
+        default="princeton-nlp/SWE-bench_Lite",
+    )
+    parser.add_argument(
+        "--log_dir", type=str, help="Path to log directory", default="logs"
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="navie",
+        help="Name of folder containing model evaluation results (e.g. '20240402_sweagent_gpt4)",
+    )
+    parser.add_argument(
+        "--split",
+        type=str,
+        default="test",
+        help="Name of split to get evaluation results for (should be parent folder, e.g. 'test', 'dev')",
+        choices=["test", "dev"],
+    )
+    parser.add_argument(
+        "--save_results", default=True, action="store_true", help="Save results to file"
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", help="Show intermediate messages"
+    )
+    parser.add_argument(
+        "--output", type=str, default="results.csv", help="Path to output file"
+    )
+    args = parser.parse_args()
+    main(**vars(args))
diff --git a/appmap/solve.py b/appmap/solve.py
new file mode 100644
index 00000000..6ff903d1
--- /dev/null
+++ b/appmap/solve.py
@@ -0,0 +1,256 @@
+import argparse
+import json
+from pathlib import Path
+from multiprocessing import Pool, current_process, cpu_count
+from swebench.harness.context_manager import (
+    TestbedContextManager,
+    TaskEnvContextManager,
+)
+from swebench.harness.utils import split_instances, DotDict
+from subprocess import run
+from os.path import abspath
+from filelock import FileLock
+from data import load_data
+
+
+def output_results(instance, output_file, patch):
+    if patch is None:
+        return
+    instance["model_patch"] = patch
+    instance["model_name_or_path"] = "navie"
+    with FileLock(f"{output_file}.lock"):
+        with open(output_file, "a+") as f:
+            f.write(json.dumps(instance) + "\n")
+
+
+def solve_instance(instance, log_dir, testbed, appmap_command, lint_command, retries):
+    issue_dir = Path(log_dir) / "solve" / instance["instance_id"]
+    issue_dir.mkdir(parents=True, exist_ok=True)
+    issue_file = issue_dir / "issue.txt"
+    with open(issue_file, "w") as f:
+        f.write(instance["problem_statement"])
+
+    solver_path = Path(__file__).parent / "solve" / "solver.py"
+    run_args = [
+        "python",
+        str(solver_path),
+        str(issue_file),
+        "--retries",
+        str(retries),
+        "--log-dir",
+        log_dir,
+        "--appmap-command",
+        appmap_command,
+    ]
+    if lint_command is not None:
+        run_args.extend(["--lint-command", lint_command])
+
+    try:
+        # Run this as a separate process so that it can change the working directory.
+        run(run_args, check=True, cwd=testbed)
+        output = run(
+            ["git", "--no-pager", "diff"],
+            check=True,
+            cwd=testbed,
+            capture_output=True,
+            text=True,
+        )
+        return output.stdout
+    except Exception:
+        print(f"Error processing {instance['instance_id']}")
+        import traceback
+
+        traceback.print_exc()
+
+
+def worker_init(data: dict):
+    """
+    Args:
+        data: Dict containing task instances and other data
+        conda_link: URL to conda installation to use
+        task_instances: List of task instances
+        log_dir: Path to log directory
+        path_conda: Path to miniconda3 or anaconda installation
+        testbed: Path to testbed directory
+        temp_dir: Path to temporary directory for storing virtual envs
+        timeout: Timeout (seconds) for testing script execution
+        verbose: Verbose mode
+        output_file: Path to output file
+    """
+    data_dict = DotDict(data)
+
+    assert data_dict.output is not None
+    assert data_dict.appmap_command is not None
+    assert data_dict.path_conda is not None
+    assert data_dict.retries is not None
+
+    output_file = abspath(data_dict.output)
+
+    try:
+        with TestbedContextManager(
+            data_dict.task_instances,
+            data_dict.log_dir,
+            conda_link=data_dict.conda_link,
+            path_conda=data_dict.path_conda,
+            testbed=data_dict.testbed,
+            temp_dir=data_dict.temp_dir,
+            timeout=data_dict.timeout,
+            verbose=data_dict.verbose,
+            keep=data_dict.keep,
+        ) as tcm:
+            for instance in data_dict.task_instances:
+                repo_prefix = instance["repo"].replace("/", "__")
+                env_name = f"{repo_prefix}__{instance['version']}"
+                testbed = Path(tcm.testbed) / env_name
+                log_dir = abspath(data_dict.log_dir)
+                try:
+                    with TaskEnvContextManager(
+                        instance,
+                        testbed.as_posix(),
+                        env_name,
+                        log_dir,
+                        data_dict.path_conda,
+                        timeout=data_dict.timeout,
+                        verbose=data_dict.verbose,
+                        log_suffix=data_dict.log_suffix,
+                    ) as task_manager:
+                        if not task_manager.reset_task_env(instance):
+                            return
+                        patch = solve_instance(
+                            instance,
+                            log_dir,
+                            testbed,
+                            data_dict.appmap_command,
+                            data_dict.lint_command,
+                            data_dict.retries
+                        )
+                        output_results(instance, output_file, patch)
+                except Exception:
+                    print(f"Error processing {instance['instance_id']}")
+                    import traceback
+                    traceback.print_exc()
+    except Exception:
+        print("Error instantiating testbed")
+        import traceback
+        traceback.print_exc()
+
+
+def solve_instances(instances, args):
+    if args.filter is not None:
+        instances = [
+            instance for instance in instances if args.filter in instance["instance_id"]
+        ]
+
+    instance_groups = split_instances(list(instances), args.num_workers)
+    data_groups = [
+        {
+            "task_instances": g,
+            "func": solve_instance,
+            **vars(args),
+        }
+        for g in instance_groups
+    ]
+
+    if args.num_workers == 1:
+        worker_init(data_groups[0])
+        return
+
+    pool = Pool(processes=args.num_workers)
+    pool.map(worker_init, data_groups)
+    pool.close()
+    pool.join()
+
+
+def main(args):
+    dataset = load_data(args.instances_path, args.split)
+    solve_instances(dataset, args)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--instances_path",
+        "--instances",
+        type=str,
+        help="path or huggingface name of task instances dataset",
+        default="princeton-nlp/SWE-bench_Lite",
+    )
+    parser.add_argument(
+        "--split", type=str, default="test", help="Dataset split to use"
+    )
+    parser.add_argument(
+        "--log_dir", type=str, help="Path to log directory", default="logs"
+    )
+    parser.add_argument(
+        "--conda_link",
+        type=str,
+        default=None,
+        help="(Optional) URL to conda installation to use",
+    )
+    parser.add_argument(
+        "--log_suffix",
+        type=str,
+        default=None,
+        help="(Optional) Suffix to append to log file names",
+    )
+    parser.add_argument(
+        "--path_conda",
+        type=str,
+        help="(Optional) Path to miniconda3 or anaconda installation",
+    )
+    parser.add_argument(
+        "--testbed", type=str, help="(Optional) Path to testbed directory"
+    )
+    parser.add_argument(
+        "--temp_dir",
+        type=str,
+        help="(Optional) Path to temporary directory for storing virtual envs",
+    )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=None,
+        help="(Optional) Timeout (seconds) for testing script execution",
+    )
+    parser.add_argument(
+        "--retries",
+        type=int,
+        default=3,
+        help="Number of times to try and create a code update for each test instance",
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", help="(Optional) Verbose mode"
+    )
+    parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=cpu_count(),
+        help="(Optional) Number of workers",
+    )
+    parser.add_argument(
+        "--filter",
+        type=str,
+        default=None,
+        help="(Optional) Filter to apply to task instances",
+    )
+    parser.add_argument(
+        "--appmap_command", type=str, default="appmap", help="Path to appmap command"
+    )
+    parser.add_argument(
+        "--lint_command",
+        type=str,
+        help="Path to lint command. Example: flake8 --extend-ignore=BLK100,W293,E501,E302,D",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="predictions.jsonl",
+        help="Path to output predictions",
+    )
+    parser.add_argument(
+        "--keep",
+        action="store_true",
+        help="(Optional) Keep temporary directories after running",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/appmap/solve/__init__.py b/appmap/solve/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/appmap/solve/format_instructions.py b/appmap/solve/format_instructions.py
new file mode 100644
index 00000000..d74055a3
--- /dev/null
+++ b/appmap/solve/format_instructions.py
@@ -0,0 +1,80 @@
+import textwrap
+
+
+def format_instructions():
+    return textwrap.dedent(
+        """
+    For each change you want to make, generate a pair of tags called <original> and <modified>.
+
+    Wrap these tags with a <change> tag that also includes a <file> tag with the file path.
+
+    The <original> tag should contain the original code that you want to change. Do not abbreviate
+    existing code using ellipses or similar.
+
+    Always include an attribute "no-ellipsis" with the value "true" in the <original> tag.
+    This should be a true statement about the tag.
+
+    The <original> code should contain an attribute that indicates about how many lines of context
+    it contains. You should plan for this context to contain the code that should be modified, plus
+    three lines before and after it.
+
+    Do not output the entire original code, or long functions, if you only want to change a part of it.
+    Plan to output only the part that you want to change.
+
+    If you need to make multiple changes to the same file, output multiple <change> tags.
+    In the change, indicate the number of the change that this is, starting from 1.
+
+    The <modified> tag should contain the modified code that you want to replace the original code with.
+    Do not abbreviate the modified code using ellipses or similar. You must place the exact modified code
+    in the <modified> tag.
+
+    You do not need to output the entire modified code if you only want to change a part of it. Output
+    only the part that you want to change.
+
+    Always include an attribute "no-ellipsis" with the value "true" in the <modified> tag.
+    This should be a true statement about the tag.
+
+    Both the original code and the output code must contain the proper indentation and formatting.
+    For example, if the original code has 4 spaces of indentation, the output code must also have 4
+    spaces of indentation. If the original code has 8 spaces of indentation, the output code must also have
+    8 spaces of indentation.
+
+    The <original> and <modified> content should be wrapped in a CDATA section to avoid XML parsing issues.
+
+    ## Example output
+
+    <change>
+        <file change-number-for-this-file="1">src/main/java/org/springframework/samples/petclinic/vet/Vet.java</file>
+        <original line-count="13" no-ellipsis="true"><![CDATA[
+        @JoinTable(
+            name = "vet_specialties",
+            joinColumns = @JoinColumn(name = "vet_id"),
+            inverseJoinColumns = @JoinColumn(name = "specialty_id")
+        )
+        private Set<Specialty> specialties;
+
+        protected Set<Specialty> getSpecialtiesInternal() {
+            if (this.specialties == null) {
+                this.specialties = new HashSet<>();
+            }
+            return this.specialties;
+        }]]></original>
+        <modified no-ellipsis="true"><![CDATA[
+        @JoinTable(
+            name = "vet_specialties",
+            joinColumns = @JoinColumn(name = "vet_id"),
+            inverseJoinColumns = @JoinColumn(name = "specialty_id")
+        )
+        private Set<Specialty> specialties;
+
+        private String address;
+
+        protected Set<Specialty> getSpecialtiesInternal() {
+            if (this.specialties == null) {
+                this.specialties = new HashSet<>();
+            }
+            return this.specialties;
+        }]]></modified>
+    </change>
+    """
+    )
diff --git a/appmap/solve/log.py b/appmap/solve/log.py
new file mode 100644
index 00000000..a3189a61
--- /dev/null
+++ b/appmap/solve/log.py
@@ -0,0 +1,30 @@
+import os
+
+from filelock import FileLock
+
+
+def log_command(dir, command):
+    command_lock_file = os.path.join(dir, "command.lock")
+    command_log_file = os.path.join(dir, "command.log")
+
+    with FileLock(command_lock_file):
+        with open(command_log_file, "a+") as f:
+            f.write(command + "\n")
+
+
+def log_lint(dir, file, lint_messages):
+    lint_lock_file = os.path.join(dir, "lint.lock")
+    lint_log_file = os.path.join(dir, "lint.log")
+
+    with FileLock(lint_lock_file):
+        with open(lint_log_file, "a+") as f:
+            f.writelines("\n".join([file, "-" * len(file), lint_messages, "\n"]))
+
+
+def log_diff(dir, file, diff):
+    diff_lock_file = os.path.join(dir, "diff.lock")
+    diff_log_file = os.path.join(dir, "diff.log")
+
+    with FileLock(diff_lock_file):
+        with open(diff_log_file, "a+") as f:
+            f.writelines("\n".join([file, "-" * len(file), diff, "\n"]))
diff --git a/appmap/solve/run_command.py b/appmap/solve/run_command.py
new file mode 100644
index 00000000..56ec4b8f
--- /dev/null
+++ b/appmap/solve/run_command.py
@@ -0,0 +1,13 @@
+import subprocess
+
+from .log import log_command
+
+
+def run_command(log_dir, command, fail_on_error=True):
+    log_command(log_dir, command)
+
+    result = subprocess.run(command, shell=True, capture_output=True)
+    if result.returncode != 0 and fail_on_error:
+        raise RuntimeError(f"Failed to execute command {command}")
+
+    return result.stdout.decode()
diff --git a/appmap/solve/run_navie_command.py b/appmap/solve/run_navie_command.py
new file mode 100644
index 00000000..c1a7a677
--- /dev/null
+++ b/appmap/solve/run_navie_command.py
@@ -0,0 +1,45 @@
+import os
+
+from .log import log_command
+
+
+def run_navie_command(
+    log_dir,
+    command,
+    output_path,
+    log_path,
+    context_path=None,
+    input_path=None,
+    additional_args=None,
+):
+    """
+    Execute the navie command with specified arguments.
+
+    :param command: Command to execute (e.g., 'navie')
+    :param context_path: Path to the context file
+    :param input_path: Path to the input file
+    :param output_path: Path to the output file
+    :param log_path: Path to the log file
+    :param additional_args: Additional arguments for the command
+    :return: None
+    """
+    # Build the command
+    cmd = f"{command} navie --log-navie"
+    # TODO: Add token limit option, e.g. --ai-option tokenLimit=4000
+    if input_path:
+        cmd += f" -i {input_path}"
+    if context_path:
+        cmd += f" -c {context_path}"
+    cmd += f" -o {output_path}"
+    if additional_args:
+        cmd += f" {additional_args}"
+    cmd += f" > {log_path} 2>&1"
+
+    log_command(log_dir, cmd)
+
+    result = os.system(cmd)
+
+    if result != 0:
+        raise RuntimeError(
+            f"Failed to execute command {cmd}. See {log_path} for details."
+        )
diff --git a/appmap/solve/solver.py b/appmap/solve/solver.py
new file mode 100644
index 00000000..7818a6ae
--- /dev/null
+++ b/appmap/solve/solver.py
@@ -0,0 +1,217 @@
+import argparse
+import json
+import os
+import sys
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.join(SCRIPT_DIR, "..", ".."))
+
+from appmap.solve.steps.step_lint_repair import step_lint_repair
+from appmap.solve.steps.step_apply import step_apply
+from appmap.solve.steps.step_generate import step_generate
+from appmap.solve.steps.step_list import step_list
+from appmap.solve.steps.step_plan import step_plan
+
+DEFAULT_STEPS = {"plan": True, "list": True, "generate": True, "apply": True}
+
+
+class Solver:
+    def __init__(
+        self,
+        issue_file,
+        log_dir,
+        format_command=None,
+        lint_command=None,
+        lint_error_pattern=None,
+        appmap_command="appmap",
+        steps=None,
+    ):
+        self.issue_file = issue_file
+        self.log_dir = log_dir
+        self.format_command = format_command
+        self.lint_command = lint_command
+        self.lint_error_pattern = lint_error_pattern
+        self.appmap_command = appmap_command
+        self.steps = steps or DEFAULT_STEPS
+
+        if self.lint_command and not self.steps["apply"]:
+            print("WARN: Lint command will not be executed without apply step.")
+
+        if not os.path.isfile(self.issue_file):
+            raise FileNotFoundError(f"File '{self.issue_file}' not found.")
+
+        self.work_dir = os.path.dirname(os.path.abspath(self.issue_file))
+
+        self.plan_file = os.path.join(self.work_dir, "plan.md")
+        self.solution_file = os.path.join(self.work_dir, "solution.md")
+        self.apply_file = os.path.join(self.work_dir, "apply.md")
+        self.files = []
+
+    def solve(self):
+        if self.steps["plan"]:
+            self.plan()
+
+        if self.steps["list"]:
+            self.list_files()
+
+        if self.steps["generate"]:
+            self.generate_code()
+
+        self.base_file_content = {}
+        self.files_changed = []
+        if self.steps["apply"]:
+            self.base_file_content = self.load_file_content()
+
+            self.apply_changes()
+
+            self.updated_file_content = self.load_file_content()
+            for file in self.updated_file_content:
+                if self.updated_file_content[file] != self.base_file_content[file]:
+                    self.files_changed.append(file)
+
+        if self.lint_command:
+            if len(self.files_changed) > 0:
+                self.lint_repair()
+            else:
+                print(
+                    "WARN: No changes were applied. Lint repair step will be skipped."
+                )
+
+    def plan(self):
+        step_plan(
+            self.log_dir,
+            self,
+            self.issue_file,
+            self.work_dir,
+            self.appmap_command,
+            self.plan_file,
+        )
+
+    def list_files(self):
+        step_list(self.log_dir, self.work_dir, self.appmap_command, self.plan_file)
+        with open(os.path.join(self.work_dir, "files.json")) as f:
+            self.files = json.load(f)
+
+    def generate_code(self):
+        step_generate(
+            self.log_dir,
+            self,
+            self.work_dir,
+            self.appmap_command,
+            self.plan_file,
+            self.solution_file,
+            self.files,
+        )
+
+    def load_file_content(self):
+        result = {}
+        for file in self.files:
+            if os.path.isfile(file):
+                with open(file, "r") as f:
+                    result[file] = f.read()
+        return result
+
+    def apply_changes(self):
+        step_apply(
+            self.log_dir,
+            self.work_dir,
+            self.appmap_command,
+            self.solution_file,
+            self.apply_file,
+        )
+
+    def lint_repair(self):
+        step_lint_repair(
+            self.log_dir,
+            self,
+            self.work_dir,
+            self.appmap_command,
+            self.base_file_content,
+        )
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Solve software issue described in a file."
+    )
+    parser.add_argument(
+        "issue_file", type=str, help="File containing the issue description"
+    )
+
+    parser.add_argument(
+        "--retries",
+        type=int,
+        default=3,
+        help="Number of times to try and create a code update for each test instance",
+    )
+
+    parser.add_argument(
+        "--directory",
+        type=str,
+        help="Working directory of the project to modify",
+        default=None,
+    )
+    parser.add_argument(
+        "--log-dir", type=str, help="Directory to store logs", default="logs"
+    )
+    parser.add_argument(
+        "--format-command", type=str, help="Format command to use", default=None
+    )
+    parser.add_argument(
+        "--lint-command", type=str, help="Lint command to use", default=None
+    )
+    parser.add_argument(
+        "--lint-error-pattern", type=str, help="Lint error pattern to use", default=None
+    )
+    parser.add_argument(
+        "--appmap-command", type=str, help="AppMap command to use", default="appmap"
+    )
+
+    parser.add_argument("--noplan", action="store_true", help="Do not generate a plan")
+    parser.add_argument(
+        "--nolist", action="store_true", help="Do not list files to be modified"
+    )
+    parser.add_argument(
+        "--nogenerate", action="store_true", help="Do not generate code"
+    )
+    parser.add_argument("--noapply", action="store_true", help="Do not apply changes")
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    steps = {
+        "plan": not args.noplan,
+        "list": not args.nolist,
+        "generate": not args.nogenerate,
+        "apply": not args.noapply,
+    }
+
+    if args.directory:
+        os.chdir(args.directory)
+
+    if args.log_dir:
+        os.makedirs(args.log_dir, exist_ok=True)
+
+    attempt_number = 0
+    files_changed = []
+    while len(files_changed) == 0:
+        solver = Solver(
+            issue_file=args.issue_file,
+            log_dir=args.log_dir,
+            format_command=args.format_command,
+            lint_command=args.lint_command,
+            lint_error_pattern=args.lint_error_pattern,
+            appmap_command=args.appmap_command,
+            steps=steps,
+        )
+        solver.solve()
+        files_changed = solver.files_changed
+        if len(files_changed) == 0:
+            print("No files were changed.")
+            attempt_number += 1
+            if attempt_number == args.retries:
+                print("Giving up after {attempt_number} attempts")
+            else:
+                print(f"Retrying (attempt number {attempt_number + 1} of {args.retries})")
diff --git a/appmap/solve/steps/__init__.py b/appmap/solve/steps/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/appmap/solve/steps/step_apply.py b/appmap/solve/steps/step_apply.py
new file mode 100644
index 00000000..5dc9b2c4
--- /dev/null
+++ b/appmap/solve/steps/step_apply.py
@@ -0,0 +1,23 @@
+from ..run_navie_command import run_navie_command
+
+
+import os
+
+
+def step_apply(log_dir, work_dir, appmap_command, solution_file, apply_file):
+    apply_prompt = os.path.join(work_dir, "apply.txt")
+    with open(apply_prompt, "w") as apply_f:
+        apply_f.write("@apply /all\n\n")
+        with open(solution_file, "r") as sol_f:
+            apply_f.write(sol_f.read())
+
+    print("Applying changes to source files")
+    run_navie_command(
+        log_dir,
+        command=appmap_command,
+        input_path=apply_prompt,
+        output_path=apply_file,
+        log_path=os.path.join(work_dir, "apply.log"),
+    )
+
+    print("Changes applied")
diff --git a/appmap/solve/steps/step_generate.py b/appmap/solve/steps/step_generate.py
new file mode 100644
index 00000000..76303516
--- /dev/null
+++ b/appmap/solve/steps/step_generate.py
@@ -0,0 +1,86 @@
+from ..run_command import run_command
+from ..run_navie_command import run_navie_command
+from ..format_instructions import format_instructions
+
+
+import os
+import sys
+
+
+def step_generate(
+    log_dir, args, work_dir, appmap_command, plan_file, solution_file, files
+):
+    context_file = os.path.join(work_dir, "context.txt")
+    with open(context_file, "w") as context_f:
+        for file in files:
+            context_f.write("<file>\n")
+            context_f.write(f"<path>{file}</path>\n")
+            context_f.write("<content>\n")
+            if os.path.isfile(file):
+                if args.format_command:
+                    print(f"Auto-formatting file {file}")
+                    format_command = args.format_command.split() + [file]
+                    run_command(" ".join(format_command))
+
+                with open(file, "r") as content_f:
+                    file_content = content_f.read()
+                    file_lines = file_content.split("\n")
+                    any_line_starts_with_tabs = any(
+                        line.startswith("\t") for line in file_lines
+                    )
+                    if any_line_starts_with_tabs:
+                        print(
+                            f"Warning: File '{file}' starts with tabs. Code generation is not likely to be reliable. Please replace identation with spaces, or specify the --format-command option to have it done automatically.",
+                            file=sys.stderr,
+                        )
+
+                    context_f.write(file_content)
+            else:
+                print(
+                    f"Notice: File '{file}' does not exist. It will probably be created in the code generation step.",
+                    file=sys.stderr,
+                )
+            context_f.write("</content>\n")
+            context_f.write("</file>\n")
+
+    generate_prompt = os.path.join(work_dir, "generate.txt")
+    with open(generate_prompt, "w") as generate_f:
+        generate_f.write(
+            f"""@generate /nocontext /noformat
+
+## Input format
+
+The plan is delineated by the XML <plan> tag.
+The source files are delineated by XML <file> tags. Each file has a <path> tag with the file path and a <content> tag with the file content.
+Do not treat the XML tags as part of the source code. They are only there to help you parse the context.
+
+## Guidelines
+
+Try to solve the problem with a minimal set of code changes.
+Avoid refactorings that will affect multiple parts of the codebase.
+
+## Output format
+
+{format_instructions()}
+
+"""
+        )
+
+        generate_f.write("<plan>\n")
+        with open(plan_file, "r") as plan_content:
+            generate_f.write(plan_content.read())
+        generate_f.write("</plan>\n")
+        with open(context_file, "r") as context_content:
+            generate_f.write(context_content.read())
+
+    print("Solving plan", plan_file, "using", generate_prompt)
+
+    run_navie_command(
+        log_dir,
+        command=appmap_command,
+        input_path=generate_prompt,
+        output_path=solution_file,
+        log_path=os.path.join(work_dir, "generate.log"),
+    )
+
+    print(f"Code generated in {solution_file}")
diff --git a/appmap/solve/steps/step_lint_repair.py b/appmap/solve/steps/step_lint_repair.py
new file mode 100644
index 00000000..a2e9bf54
--- /dev/null
+++ b/appmap/solve/steps/step_lint_repair.py
@@ -0,0 +1,215 @@
+from ..log import log_diff, log_lint
+from ..run_command import run_command
+from ..run_navie_command import run_navie_command
+from ..format_instructions import format_instructions
+
+
+import os
+import re
+import subprocess
+
+
+def step_lint_repair(log_dir, args, work_dir, appmap_command, base_file_content):
+    lint_command = args.lint_command
+    lint_error_pattern = args.lint_error_pattern
+
+    print("Linting source files")
+
+    work_dir_base_name = os.path.basename(work_dir)
+
+    for file in base_file_content.keys():
+        print(f"Linting {file}")
+        norm_file = file.replace("/", "_")
+
+        lint_args = lint_command.split() + [file]
+
+        lint_result = subprocess.run(
+            lint_args,
+            capture_output=True,
+            text=True,
+        )
+
+        lint_output = lint_result.stdout + lint_result.stderr
+
+        log_lint(log_dir, os.path.join(work_dir_base_name, file), lint_output)
+
+        # If lint_error_pattern starts and ends with '/', treat it as a regular expression.
+        # Otherwise, treat it as a string literal.
+        #
+        # Find all lint errors reported in the output. Then select just those errors that
+        # are reported on lines that we have modified.
+        lint_errors = []
+        if lint_error_pattern:
+            if lint_error_pattern.startswith("/") and lint_error_pattern.endswith("/"):
+                lint_errors = re.findall(lint_error_pattern[1:-1], lint_output)
+            else:
+                lint_errors = lint_output.split("\n").filter(
+                    lambda line: lint_error_pattern in line
+                )
+        else:
+            lint_errors = lint_output.split("\n")
+
+        temp_dir = os.path.join(work_dir, "diff", norm_file)
+        os.makedirs(temp_dir, exist_ok=True)
+        # Write the base file content
+        with open(os.path.join(temp_dir, "base"), "w") as f:
+            f.write(base_file_content[file])
+        with open(file, "r") as f:
+            with open(os.path.join(temp_dir, "updated"), "w") as f2:
+                f2.write(f.read())
+        # Run the diff command
+        diff_command = f"diff -u {os.path.join(temp_dir, 'base')} {os.path.join(temp_dir, 'updated')}"
+        file_diff = run_command(log_dir, diff_command, fail_on_error=False)
+
+        log_diff(log_dir, os.path.join(work_dir_base_name, file), file_diff)
+
+        # Lint errors are formatted like this:
+        # bin/solve.py:257:80: E501 line too long (231 > 79 characters)
+        # Collect the line numbers of the lint errors.
+        lint_errors_by_line_number = {}
+        for error in lint_errors:
+            if error:
+                line_number = error.split(":")[1]
+                lint_errors_by_line_number[int(line_number)] = error
+
+        # The file diff contains chunks like:
+        # @@ -147,15 +147,21 @@
+        # Find the '+' number, which indicates the start line. Also find the number after the
+        # comma, which indicates the number of lines. Report these two numbers for each chunk.
+        diff_ranges = [
+            [int(ch) for ch in chunk.split(" ")[2].split(",")]
+            for chunk in file_diff.split("\n")
+            if chunk.startswith("@@")
+        ]
+
+        for diff_range in diff_ranges:
+            print(
+                f"The file has changes between lines {diff_range[0]} and {diff_range[0] + diff_range[1]}"
+            )
+
+        lint_error_line_numbers_within_diff_sections = [
+            line_number
+            for line_number in lint_errors_by_line_number.keys()
+            for diff_range in diff_ranges
+            if diff_range[0] <= line_number <= diff_range[0] + diff_range[1]
+        ]
+
+        if lint_error_line_numbers_within_diff_sections:
+            lint_errors = [
+                lint_errors_by_line_number[line_number]
+                for line_number in lint_error_line_numbers_within_diff_sections
+            ]
+
+            lint_error_message = "\n".join(
+                [
+                    "Lint errors within diff sections:",
+                    *lint_errors,
+                ]
+            )
+
+            print(lint_error_message)
+            log_diff(
+                log_dir, os.path.join(work_dir_base_name, file), lint_error_message
+            )
+        else:
+            print("There are no lint errors within diff sections")
+            log_diff(
+                log_dir,
+                os.path.join(work_dir_base_name, file),
+                "No lint errors within diff sections",
+            )
+
+        for line_number in lint_error_line_numbers_within_diff_sections:
+            lint_error = lint_errors_by_line_number[line_number]
+            print(f"Error reported on line {line_number}: {lint_error}")
+
+            # Extract the chunk of code that contains the error
+            content_chunk_lines = []
+            with open(file, "r") as f:
+                lines = f.readlines()
+
+                range_min = max(0, line_number - 7)
+                range_max = min(len(lines), line_number + 7)
+                for line_number in range(range_min, range_max):
+                    content_chunk_lines.append(
+                        f"{line_number + 1}: {lines[line_number]}"
+                    )
+
+            repair_dir = os.path.join(work_dir, "repair", norm_file, str(line_number))
+            os.makedirs(repair_dir, exist_ok=True)
+
+            repair_prompt, repair_output, repair_log = [
+                os.path.join(repair_dir, f"generate.{ext}")
+                for ext in ["txt", "md", "log"]
+            ]
+            repair_apply_prompt, repair_apply_output, repair_apply_log = [
+                os.path.join(repair_dir, f"apply.{ext}") for ext in ["txt", "md", "log"]
+            ]
+
+            with open(repair_prompt, "w") as f:
+                f.write(
+                    f"""@generate /nocontext /noformat
+
+Fix the linter errors indicated by the <lint-error> tag.
+
+## Output format
+
+{format_instructions()}
+
+In the <original> and <modified> tags, do not emit line numbers. The line numbers are
+only present in the file/content to help you identify which line has the lint error.
+
+## Error report
+
+<lint-error>
+"""
+                )
+                f.write(lint_error)
+                f.write(
+                    """
+</lint-error>
+<file>
+<path>"""
+                )
+                f.write(file)
+                f.write(
+                    """
+</path>
+<content>
+"""
+                )
+                f.write("".join(content_chunk_lines))
+                f.write(
+                    """
+</content>
+</file>
+"""
+                )
+
+                # Plan the repair
+            print(f"Generating code to repair {file}")
+            run_navie_command(
+                log_dir,
+                command=appmap_command,
+                input_path=repair_prompt,
+                output_path=repair_output,
+                log_path=repair_log,
+            )
+
+            print(f"Code generated to repair source file in {repair_output}")
+
+            with open(repair_apply_prompt, "w") as f:
+                f.write("@apply /all\n\n")
+                with open(repair_output, "r") as plan_fp:
+                    f.write(plan_fp.read())
+
+            print("Applying changes to source files")
+            run_navie_command(
+                log_dir,
+                command=appmap_command,
+                input_path=repair_apply_prompt,
+                output_path=repair_apply_output,
+                log_path=repair_apply_log,
+            )
+
+            print("Changes applied")
diff --git a/appmap/solve/steps/step_list.py b/appmap/solve/steps/step_list.py
new file mode 100644
index 00000000..b3c2ce12
--- /dev/null
+++ b/appmap/solve/steps/step_list.py
@@ -0,0 +1,18 @@
+from ..run_navie_command import run_navie_command
+
+
+import os
+
+
+def step_list(log_dir, work_dir, appmap_command, plan_file):
+    print("Detecting files to be modified")
+    run_navie_command(
+        log_dir,
+        command=appmap_command,
+        context_path=plan_file,
+        output_path=os.path.join(work_dir, "files.json"),
+        log_path=os.path.join(work_dir, "list-files.log"),
+        additional_args="@list-files /format=json /nofence",
+    )
+
+    print(f"Files to be modified stored in {os.path.join(work_dir, 'files.json')}")
diff --git a/appmap/solve/steps/step_plan.py b/appmap/solve/steps/step_plan.py
new file mode 100644
index 00000000..f8c81c60
--- /dev/null
+++ b/appmap/solve/steps/step_plan.py
@@ -0,0 +1,46 @@
+import textwrap
+from ..run_navie_command import run_navie_command
+
+
+import os
+import re
+
+
+def step_plan(log_dir, args, issue_file, work_dir, appmap_command, plan_file):
+    print(f"Generating a plan for {args.issue_file}")
+
+    plan_prompt = os.path.join(work_dir, "plan.txt")
+    with open(plan_prompt, "w") as plan_f:
+        plan_f.write(
+            textwrap.dedent(
+                """@plan
+
+            ## Guidelines
+
+            * Try to solve the problem with a minimal set of code changes.
+            * Do not output code blocks or fenced code. Output only a text description of the suggested
+                changes, along with the file names.
+            """
+            )
+        )
+
+    run_navie_command(
+        log_dir,
+        command=appmap_command,
+        context_path=issue_file,
+        input_path=plan_prompt,
+        output_path=plan_file,
+        log_path=os.path.join(work_dir, "plan.log"),
+    )
+
+    print(f"Plan stored in {plan_file}")
+
+    # Load the plan file and strip code blocks that are delimited by ```
+    with open(plan_file, "r") as f:
+        plan_content = f.read()
+    original_plan_content = plan_content
+    plan_content = re.sub(r"```.*?```", "", plan_content, flags=re.DOTALL)
+    # Diff the original and stripped content
+    if original_plan_content != plan_content:
+        with open(plan_file, "w") as f:
+            f.write(plan_content)
diff --git a/submodules/appmap-js b/submodules/appmap-js
new file mode 160000
index 00000000..dc69da4c
--- /dev/null
+++ b/submodules/appmap-js
@@ -0,0 +1 @@
+Subproject commit dc69da4c418e0e58dc62af78d1491bc966f720b6
diff --git a/swebench/harness/context_manager.py b/swebench/harness/context_manager.py
index e74211b8..b12fdb4c 100644
--- a/swebench/harness/context_manager.py
+++ b/swebench/harness/context_manager.py
@@ -305,7 +305,6 @@ def __enter__(self):
         self.path_conda = os.path.abspath(self.path_conda)
         path_activate = os.path.join(self.path_conda, "bin", "activate")
         exec_cmd = os.path.join(self.path_conda, "bin", "conda")
-        env_list = get_conda_env_names(exec_cmd)
 
         # Set up testbed (environment, github repo) for each repo
         for repo, version_to_setup_ref in self.setup_refs.items():
@@ -335,83 +334,102 @@ def __enter__(self):
                 else:
                     self.log.write(f"Repo for {repo_prefix} version {version} exists: {repo_path}; skipping")
 
-                # Skip if conda environment already exists
-                if env_name in env_list:
-                    self.log.write(f"Environment {env_name} already exists; skipping")
-                    continue
+                self.create_conda_env(
+                    version,
+                    path_activate,
+                    exec_cmd,
+                    version_to_setup_ref,
+                    install,
+                    env_name,
+                )
 
-                # Get setup reference instance
-                setup_ref_instance = version_to_setup_ref[version]
+        return self
 
-                # Create conda environment according to install instructinos
-                pkgs = install["packages"] if "packages" in install else ""
-                if pkgs == "requirements.txt":
-                    # Create environment
-                    cmd = (
-                        f"{exec_cmd} create -n {env_name} python={install['python']} -y"
+    def create_conda_env(
+        self, version, path_activate, exec_cmd, version_to_setup_ref, install, env_name
+    ):
+        with FileLock(f"/tmp/conda-env-setup-{env_name}.lock"):
+            if env_name in get_conda_env_names(exec_cmd):
+                self.log.write(f"Environment {env_name} already exists; skipping")
+
+            # Get setup reference instance
+            setup_ref_instance = version_to_setup_ref[version]
+
+            # Create conda environment according to install instructinos
+            pkgs = install["packages"] if "packages" in install else ""
+            if pkgs == "requirements.txt":
+                # Create environment
+                cmd = f"{exec_cmd} create -n {env_name} python={install['python']} -y"
+                self.log.write(f"Creating environment {env_name}")
+                self.exec(cmd.split(" "))
+
+                # Install dependencies
+                path_to_reqs = get_requirements(setup_ref_instance, self.testbed)
+                cmd = f". {path_activate} {env_name} && echo 'activate successful' && pip install -r {path_to_reqs}"
+                self.log.write(
+                    f"Installing dependencies for {env_name}; Command: {cmd}"
+                )
+                self.exec(["bash", "-c", cmd])
+                os.remove(path_to_reqs)
+            elif pkgs == "environment.yml":
+                if "no_use_env" in install and install["no_use_env"]:
+                    # Create environment from yml
+                    path_to_reqs = get_environment_yml(
+                        setup_ref_instance, env_name, save_path=self.testbed
                     )
+
+                    # `conda create` based installation
+                    cmd = f"{exec_cmd} create -c conda-forge -n {env_name} python={install['python']} -y"
                     self.log.write(f"Creating environment {env_name}")
                     self.exec(cmd.split(" "))
 
                     # Install dependencies
-                    path_to_reqs = get_requirements(setup_ref_instance, self.testbed)
-                    cmd = f". {path_activate} {env_name} && echo 'activate successful' && pip install -r {path_to_reqs}"
-                    self.log.write(f"Installing dependencies for {env_name}; Command: {cmd}")
-                    self.exec(['bash', '-c', cmd])
-                    os.remove(path_to_reqs)
-                elif pkgs == "environment.yml":
-                    if "no_use_env" in install and install["no_use_env"]:
-                        # Create environment from yml
-                        path_to_reqs = get_environment_yml(
-                            setup_ref_instance, env_name,
-                            save_path=self.testbed
-                        )
-
-                        # `conda create` based installation
-                        cmd = f"{exec_cmd} create -c conda-forge -n {env_name} python={install['python']} -y"
-                        self.log.write(f"Creating environment {env_name}")
-                        self.exec(cmd.split(" "))
-
-                        # Install dependencies
-                        cmd = f"{exec_cmd} env update -f {path_to_reqs}"
-                        self.log.write(f"Installing dependencies for {env_name}; Command: {cmd}")
-                        self.exec(cmd.split(" "))
-                    else:
-                        # Create environment from yml
-                        path_to_reqs = get_environment_yml(
-                            setup_ref_instance, env_name,
-                            save_path=self.testbed,
-                            python_version=install["python"]
-                        )
-
-                        # `conda env create` based installation
-                        cmd = f"{exec_cmd} env create --file {path_to_reqs}"
-                        self.log.write(f"Creating environment {env_name}")
-                        self.exec(cmd.split(" "))
-
-                    # Remove environment.yml
-                    os.remove(path_to_reqs)
+                    cmd = f"{exec_cmd} env update -f {path_to_reqs}"
+                    self.log.write(
+                        f"Installing dependencies for {env_name}; Command: {cmd}"
+                    )
+                    self.exec(cmd.split(" "))
                 else:
-                    # Create environment + install dependencies
-                    cmd = f"{exec_cmd} create -n {env_name} python={install['python']} {pkgs} -y"
+                    # Create environment from yml
+                    path_to_reqs = get_environment_yml(
+                        setup_ref_instance,
+                        env_name,
+                        save_path=self.testbed,
+                        python_version=install["python"],
+                    )
+
+                    # `conda env create` based installation
+                    cmd = f"{exec_cmd} env create --file {path_to_reqs}"
                     self.log.write(f"Creating environment {env_name}")
                     self.exec(cmd.split(" "))
 
-                arch = platform.machine()
-                arch_specific_packages = install.get("arch_specific_packages", {}).get(arch, "")
-                if arch_specific_packages:
-                    cmd = f". {path_activate} {env_name} && conda install {arch_specific_packages} -y"
-                    self.log.write(f"Installing arch-specific packages for {env_name}; Command: {cmd}")
-                    self.exec(['bash', '-c', cmd])
-
-                # Install additional packages if specified
-                if "pip_packages" in install:
-                    pip_packages = " ".join(install["pip_packages"])
-                    cmd = f". {path_activate} {env_name} && pip install {pip_packages}"
-                    self.log.write(f"Installing pip packages for {env_name}; Command: {cmd}")
-                    self.exec(['bash', '-c', cmd])
-
-        return self
+                    # Remove environment.yml
+                os.remove(path_to_reqs)
+            else:
+                # Create environment + install dependencies
+                cmd = f"{exec_cmd} create -n {env_name} python={install['python']} {pkgs} -y"
+                self.log.write(f"Creating environment {env_name}")
+                self.exec(cmd.split(" "))
+
+            arch = platform.machine()
+            arch_specific_packages = install.get("arch_specific_packages", {}).get(
+                arch, ""
+            )
+            if arch_specific_packages:
+                cmd = f". {path_activate} {env_name} && conda install {arch_specific_packages} -y"
+                self.log.write(
+                    f"Installing arch-specific packages for {env_name}; Command: {cmd}"
+                )
+                self.exec(["bash", "-c", cmd])
+
+            # Install additional packages if specified
+            if "pip_packages" in install:
+                pip_packages = " ".join(install["pip_packages"])
+                cmd = f". {path_activate} {env_name} && pip install {pip_packages}"
+                self.log.write(
+                    f"Installing pip packages for {env_name}; Command: {cmd}"
+                )
+                self.exec(["bash", "-c", cmd])
 
     def get_distributed_tasks(self) -> list:
         """
@@ -434,7 +452,7 @@ def get_distributed_tasks(self) -> list:
                     "timeout": self.timeout,
                     "venv": env_name,
                     "version": version,
-                    "verbose": self.verbose,
+                    "verbose": self.verbose
                 }
                 distributed_tasks.append(task_set)
         return distributed_tasks
diff --git a/swebench/harness/engine_validation.py b/swebench/harness/engine_validation.py
index 4007b159..8ec9c147 100644
--- a/swebench/harness/engine_validation.py
+++ b/swebench/harness/engine_validation.py
@@ -95,7 +95,7 @@ def setup_testbed(data: dict):
         testbed=data_dict.testbed,
         temp_dir=data_dict.temp_dir,
         timeout=data_dict.timeout,
-        verbose=data_dict.verbose,
+        verbose=data_dict.verbose
     ) as tcm:
         distributed_task_list = tcm.get_distributed_tasks()
         for task_list in distributed_task_list:
diff --git a/viewer/script.js b/viewer/script.js
index af700e07..4f005450 100644
--- a/viewer/script.js
+++ b/viewer/script.js
@@ -28,9 +28,31 @@ function addPreField(k, data) {
     const h1 = document.createElement("h2");
     h1.textContent = k;
     div.appendChild(h1);
-    const p = document.createElement("pre");
-    p.textContent = stringify(data[k]);
-    div.appendChild(p);
+
+    if (k === "navie_context") {
+      data[k].forEach((item) => {
+        const d = document.createElement("div");
+        d.style.background = "#eee";
+        d.style.padding = "1rem";
+        const h4 = document.createElement("h4");
+        h4.style.marginBottom = "0";
+        h4.textContent = item.directory;
+        const subtitle = document.createElement("h5");
+        subtitle.style.marginBottom = "0";
+        subtitle.style.marginTop = "0";
+        subtitle.textContent = item.type;
+        const pre = document.createElement("pre");
+        pre.textContent = item.content.replaceAll("\\n", "\n");
+        d.appendChild(h4);
+        d.appendChild(subtitle);
+        d.appendChild(pre);
+        div.appendChild(d);
+      });
+    } else {
+      const p = document.createElement("pre");
+      p.textContent = stringify(data[k]);
+      div.appendChild(p);
+    }
     dataContainer.appendChild(div);
   }
 }
@@ -52,7 +74,11 @@ function addMdField(k, data) {
 function stringify(value) {
   switch (typeof value) {
     case "object":
-      return JSON.stringify(value, undefined, 2);
+      return JSON.stringify(value, undefined, 2).replaceAll(
+        "\\n",
+        `
+`
+      );
     default:
       return String(value);
   }
diff --git a/viewer/viewer.py b/viewer/viewer.py
index b34727c3..4627a0d7 100644
--- a/viewer/viewer.py
+++ b/viewer/viewer.py
@@ -5,7 +5,7 @@
 import os
 import sys
 
-PORT = 8080
+PORT = 8081
 
 data_path = os.path.abspath(sys.argv[1])