diff --git a/appmap/navie/format_instructions.py b/appmap/navie/format_instructions.py
index 1595144f..cc98d034 100644
--- a/appmap/navie/format_instructions.py
+++ b/appmap/navie/format_instructions.py
@@ -46,37 +46,40 @@ def xml_format_instructions():
src/myproj/myfunc.py
-
+
"""
diff --git a/appmap/solve/solver.py b/appmap/solve/solver.py
index 2727ea52..225100a9 100644
--- a/appmap/solve/solver.py
+++ b/appmap/solve/solver.py
@@ -62,11 +62,6 @@ def __init__(
self.changed_files_limit = changed_files_limit
self.test_attempts = test_attempts
- if self.lint_command and not self.steps["apply"]:
- print(
- f"[solver] ({self.instance_id}) WARN: Lint command will not be executed without apply step."
- )
-
if not os.path.isfile(self.issue_file):
raise FileNotFoundError(f"File '{self.issue_file}' not found.")
@@ -168,12 +163,19 @@ def maketest(self):
self.task_manager,
self.issue_file,
self.work_dir,
+ self.lint_command,
self.test_attempts,
)
- maketest_files = [result["test_file"] for result in maketest_results]
- self.maketest_errors = [result["error_summary"] for result in maketest_results]
- self.extend_test_directives(maketest_files)
+ results_with_error_summary = [
+ result for result in maketest_results if "error_summary" in result
+ ]
+ self.maketest_errors = [
+ result["error_summary"] for result in results_with_error_summary
+ ]
+ self.extend_test_directives(
+ [result["test_directive"] for result in maketest_results]
+ )
def plan(self):
step_plan(
diff --git a/appmap/solve/steps/lint_repair.py b/appmap/solve/steps/lint_repair.py
new file mode 100644
index 00000000..405dd28d
--- /dev/null
+++ b/appmap/solve/steps/lint_repair.py
@@ -0,0 +1,35 @@
+import subprocess
+
+
+# Lint the file and return a dictionary of line numbers with lint errors.
+def lint(lint_command: list) -> dict:
+ lint_result = subprocess.run(
+ lint_command, capture_output=True, shell=False, text=True
+ )
+
+ lint_output = lint_result.stdout
+ lint_errors = lint_output.split("\n")
+
+ # Lint errors are formatted like this:
+ # bin/solve.py:257:80: E501 line too long (231 > 79 characters)
+ # Collect the line numbers of the lint errors.
+ lint_errors_by_line_number = {}
+ for error in lint_errors:
+ if error:
+ tokens = error.split(":")
+ if len(tokens) > 1:
+ line_number = tokens[1]
+ if line_number and line_number.isdigit():
+ lint_errors_by_line_number[int(line_number)] = error
+
+ return lint_errors_by_line_number
+
+
+def lint_in_conda(conda_path, conda_env, lint_command, file):
+ return lint(
+ [
+ "bash",
+ "-c",
+ f". {conda_path}/bin/activate {conda_env} && {lint_command} {file}",
+ ]
+ )
diff --git a/appmap/solve/steps/step_lint_repair.py b/appmap/solve/steps/step_lint_repair.py
index 271664f3..64d7b3bb 100644
--- a/appmap/solve/steps/step_lint_repair.py
+++ b/appmap/solve/steps/step_lint_repair.py
@@ -1,13 +1,13 @@
from appmap.navie.editor import Editor
from appmap.navie.extract_changes import extract_changes
-from ..log import log_diff, log_lint, log_command
+from appmap.solve.steps.lint_repair import lint_in_conda
+from ..log import log_diff
from ..run_command import run_command
from ..run_navie_command import run_navie_command
from ...navie.format_instructions import xml_format_instructions
import os
-import subprocess
class LintRepairContext:
@@ -58,42 +58,7 @@ def diff_file(context, file, step):
return file_diff
-# Lint the file and return a dictionary of line numbers with lint errors
-def lint_file(context, file):
- lint_args = [
- "bash",
- "-c",
- f". {context.conda_path}/bin/activate {context.conda_env} && {context.lint_command} {file}",
- ]
- log_command(context.log_dir, " ".join(lint_args))
-
- lint_result = subprocess.run(lint_args, capture_output=True, shell=False, text=True)
-
- lint_output = lint_result.stdout
-
- log_lint(
- context.log_dir, os.path.join(context.work_dir_base_name, file), lint_output
- )
-
- lint_errors = lint_output.split("\n")
-
- # Lint errors are formatted like this:
- # bin/solve.py:257:80: E501 line too long (231 > 79 characters)
- # Collect the line numbers of the lint errors.
- lint_errors_by_line_number = {}
- for error in lint_errors:
- if error:
- tokens = error.split(":")
- if len(tokens) > 1:
- line_number = tokens[1]
- if line_number and line_number.isdigit():
- lint_errors_by_line_number[int(line_number)] = error
- return lint_errors_by_line_number
-
-
-def lint_error_line_numbers_within_diff_sections(
- file, lint_errors_by_line_number, file_diff
-):
+def lint_error_line_numbers_within_diff_sections(lint_errors_by_line_number, file_diff):
# The file diff contains chunks like:
# @@ -147,15 +147,21 @@
# Find the '+' number, which indicates the start line. Also find the number after the
@@ -143,18 +108,22 @@ def step_lint_repair(
print(f"[lint-repair] ({instance_id}) Linting {file}")
- lint_errors_by_line_number = lint_file(context, file)
+ lint_errors_by_line_number = lint_in_conda(
+ context.conda_path, context.conda_env, context.lint_command, file
+ )
if not len(lint_errors_by_line_number):
print(f"[lint-repair] ({instance_id}) No lint errors found in {file}")
continue
lint_errors = "\n".join(lint_errors_by_line_number.values())
- print(lint_errors)
+ print(
+ f"[lint-repair] ({instance_id}) Lint errors found in {file}: {lint_errors}"
+ )
file_diff = diff_file(context, file, "pre")
line_numbers = lint_error_line_numbers_within_diff_sections(
- file, lint_errors_by_line_number, file_diff
+ lint_errors_by_line_number, file_diff
)
if line_numbers:
@@ -320,13 +289,15 @@ def step_lint_repair(
)
repair_item += 1
- post_fix_lint_errors_by_line_number = lint_file(context, file)
+ post_fix_lint_errors_by_line_number = lint_in_conda(
+ context.conda_path, context.conda_env, context.lint_command, file
+ )
post_file_diff = diff_file(context, file, "post")
- print(post_file_diff)
+ print(f"[lint-repair] ({instance_id}) Diff after repair:\n{post_file_diff}")
post_line_numbers = lint_error_line_numbers_within_diff_sections(
- file, post_fix_lint_errors_by_line_number, post_file_diff
+ post_fix_lint_errors_by_line_number, post_file_diff
)
if post_line_numbers:
diff --git a/appmap/solve/steps/step_maketest.py b/appmap/solve/steps/step_maketest.py
index a11f148a..e40a0d60 100644
--- a/appmap/solve/steps/step_maketest.py
+++ b/appmap/solve/steps/step_maketest.py
@@ -1,19 +1,39 @@
import os
-from typing import TypedDict, Optional
+from typing import List, TypedDict, Optional, Union
import yaml
from appmap.navie.editor import Editor
+from appmap.navie.extract_changes import extract_changes
from appmap.navie.fences import extract_fenced_content
+from appmap.navie.format_instructions import xml_format_instructions
+from appmap.solve.steps.lint_repair import lint_in_conda
from appmap.solve.steps.run_test import run_test
+from appmap.solve.steps.test_files_to_modules import test_files_to_modules
+
+
+class TestError(TypedDict):
+ error: str
+
+
+class TestResult(TypedDict):
+ test_directive: str
+ verifies_issue: bool
+ error_summary: Optional[str]
def maketest(
tcm,
issue_file,
work_dir,
+ lint_command,
test_number,
-):
- print(f"[maketest] Generating a test case to verify the solution to {issue_file}")
+) -> Union[TestResult, TestError]:
+ instance = tcm.instance
+ instance_id = tcm.instance["instance_id"]
+
+ print(
+ f"[maketest] ({instance_id}) Generating a test case to verify the solution to {issue_file}"
+ )
with open(issue_file, "r") as f:
issue_content = f.read()
@@ -21,7 +41,7 @@ def maketest(
work_dir = os.path.join(work_dir, "maketest", str(test_number))
test_to_modify_str = Editor(os.path.join(work_dir, "choose")).search(
- f"""Identify a single test case that is most related to the following issue:
+ f"""/include=test Identify a single test case that is most related to the following issue:
{issue_content}
""",
@@ -39,59 +59,128 @@ def maketest(
# Expect exactly one file
if len(tests_to_modify) != 1:
- print(f"Expected exactly one file, got {test_to_modify_str}")
- return {"succeeded": False, "test_error": "Expected exactly one file"}
+ print(
+ f"[maketest] ({instance_id}) Expected exactly one file, got {test_to_modify_str}"
+ )
+ return {"error": "Expected exactly one file"}
- test_to_modify = tests_to_modify[0]
- test_to_modify = os.path.relpath(test_to_modify, os.getcwd())
+ test_file = tests_to_modify[0]
+ test_file = os.path.relpath(test_file, os.getcwd())
- print(f"[maketest] Modifying test case {test_to_modify}")
+ print(f"[maketest] ({instance_id}) Modifying test case {test_file}")
- with open(test_to_modify, "r") as f:
+ with open(test_file, "r") as f:
test_content = f.read()
+ original_test_content = test_content
navie = Editor(os.path.join(work_dir, "generate"))
- navie.context(issue_content, exclude_pattern="test")
test_prompt = f"""## Task
-Modify this test case to verify the solution to the described issue:
+Add a new test to the following test file.
+
+The new test should verify the solution to the issue that's described by the user.
+
+The test case MUST FAIL if the issue is NOT FIXED.
+
+If any new imports are needed, be sure to include them.
{test_content}
-## Output instructions
+## Output format
-The output should contain only a single test case.
+{xml_format_instructions()}
+"""
-Remove all test cases from the original file, except the one that you
-are modifying or creating.
+ test_output = navie.test(
+ f"""/exclude=test
+
+{issue_content}""",
+ prompt=test_prompt,
+ )
-The test case MUST FAIL if the issue is NOT FIXED.
+ test_changes_content = "\n\n".join(extract_fenced_content(test_output))
-Be sure to emit all needed imports.
+ changes = extract_changes(test_changes_content)
+ for change in changes:
+ if change.original:
+ print(
+ f"[maketest] ({instance_id}) Applying test change to file: {test_file}"
+ )
+ work_dir = os.path.join(work_dir, "apply")
+ Editor(work_dir).apply(
+ test_file,
+ change.modified,
+ search=change.original,
+ )
+ else:
+ print(
+ f"[maketest] ({instance_id}) Planned test change has no section, so it will be appended to: {test_file}"
+ )
+ with open(test_file, "a") as f:
+ f.write("\n")
+ f.write(change.modified)
-Output only the code, and nothing else.
-"""
+ with open(test_file, "r") as f:
+ test_content = f.read()
- raw_code = navie.test(issue_content, prompt=test_prompt)
+ lint_errors_by_line_number = lint_in_conda(
+ tcm.conda_path,
+ tcm.venv,
+ lint_command,
+ test_file,
+ )
+ if lint_errors_by_line_number:
+ print(
+ f"[maketest] ({instance_id}) Lint errors found in test file {test_file}:\n{lint_errors_by_line_number}"
+ )
- codes = extract_fenced_content(raw_code)
- if not codes or len(codes) != 1:
- print(f"Expected exactly one code block, got {len(codes)}")
- return {"succeeded": False, "test_error": "Expected exactly one code block"}
+ lint_error_str = "\n".join(list(lint_errors_by_line_number.values()))
+ lint_repair = Editor(os.path.join(work_dir, "lint_repair"))
+ lint_repair_content = lint_repair.generate(
+ lint_error_str,
+ prompt=f"""## Task
- raw_code = codes[0]
+Fix lint errors in the code.
- # Append a suffix to the test_to_modify file name.
- # Example: test_to_modify = "test.py", modified_file_name = "test_modified.py"
- test_file = test_to_modify.replace(".py", f"_maketest_{test_number}.py")
+
+{test_content}
+
- print(f"[maketest] Writing test case to {test_file}")
+## Output format
- with open(test_file, "w") as f:
- f.write(raw_code)
+{xml_format_instructions()}
+""",
+ )
+ lint_repair_changes = extract_changes(lint_repair_content)
+ for change in lint_repair_changes:
+ if change.original:
+ print(
+ f"[maketest] ({instance_id}) Applying lint repair change to file: {test_file}"
+ )
+ work_dir = os.path.join(work_dir, "apply")
+ Editor(work_dir).apply(
+ test_file,
+ change.modified,
+ search=change.original,
+ )
+ else:
+ print(
+ f"[maketest] ({instance_id}) Planned lint repair change has no section, so it will be appended to: {test_file}"
+ )
+ with open(test_file, "a") as f:
+ f.write("\n")
+ f.write(change.modified)
+
+ lint_errors_by_line_number_after_repair = lint_in_conda(
+ tcm.conda_path, tcm.venv, lint_command, test_file
+ )
+ if lint_errors_by_line_number_after_repair:
+ print(
+ f"[maketest] ({instance_id}) Lint errors found in test file {test_file} after lint repair:\n{lint_errors_by_line_number_after_repair}"
+ )
# TODO: Don't record appmap data of the test yet
# succeeded, test_error = run_test(tcm, test_file, appmap=True)
@@ -100,15 +189,17 @@ def maketest(
# instance_id = tcm.instance["instance_id"]
# index_appmaps(instance_id, log_dir, appmap_command)
- succeeded, test_error = run_test(tcm, test_file)
+ succeeded, test_error = run_test(tcm, test_file, appmap=False)
# Verify that the test_error indicates that the issue is being reproduced
fails_for_expected_reason = False
if succeeded:
- print(f"[maketest] Test case {test_file} succeeded. This is unexpected!")
+ print(
+ f"[maketest] ({instance_id}) Test case {test_file} succeeded. This is unexpected!"
+ )
else:
print(
- f"[maketest] Test case {test_file} failed. This is expected. Let's see if it failed for the right reason."
+ f"[maketest] ({instance_id}) Test case {test_file} failed. This is expected. Let's see if it failed for the right reason."
)
if "ERROR" in test_error:
@@ -147,19 +238,36 @@ def maketest(
Emit a single word that indicates whether the test error is consistent with the described issue.
- Emit "yes" if the test error is consistent with the described issue.
+- Emit "maybe" if the test error is possibly consistent with the described issue.
- Emit "no" if the test error is NOT consistent with the described issue.
""",
)
- if whyfailed != "yes":
+ if whyfailed == "no":
+ print(
+ f"[maketest] ({instance_id}) Test case {test_file} DID NOT fail for the expected reason"
+ )
print(
- f"[maketest] Test case {test_file} DID NOT fail for the unexpected reason"
+ f"[maketest] ({instance_id}) Reverting test changes to {test_file} and trying again"
)
+ with open(test_file, "w") as f:
+ f.write(original_test_content)
else:
fails_for_expected_reason = True
- print(f"[maketest] Test case {test_file} failed for the expected reason")
+ print(
+ f"[maketest] ({instance_id}) Test case {test_file} failed, possibly / probably for the expected reason"
+ )
+
+ if instance["repo"] == "django/django":
+ test_directive = test_files_to_modules([test_file])[0]
+ else:
+ test_directive = test_file
- error_summary = None
+ result = TestResult(
+ test_directive=test_directive,
+ verifies_issue=fails_for_expected_reason,
+ error_summary=None,
+ )
if fails_for_expected_reason:
error_summary = Editor(os.path.join(work_dir, "summarize")).ask(
f"""/nocontext A test case is failing.
@@ -184,53 +292,33 @@ def maketest(
""",
context=[],
)
-
- result = {
- "test_file": test_file,
- "succeeded": succeeded,
- "test_error": test_error,
- "fails_for_expected_reason": fails_for_expected_reason,
- }
- if error_summary:
result["error_summary"] = error_summary
return result
-class TestResult(TypedDict):
- test_file: str
- error_summary: str
-
-
def step_maketest(
tcm,
issue_file,
work_dir,
+ lint_command,
num_attempts,
-) -> Optional[TestResult]:
+) -> List[TestResult]:
# Try N times to generate a test that fails for the right reason
+ instance_id = tcm.instance["instance_id"]
test_results = []
for i in range(num_attempts):
- test_result = maketest(tcm, issue_file, work_dir, i + 1)
- if (
- "fails_for_expected_reason" in test_result
- and "test_file" in test_result
- and test_result["fails_for_expected_reason"]
- ):
- test_file = test_result["test_file"]
- error_summary = test_result["error_summary"]
- print(
- f"[maketest] Generated test case {test_file} that fails for the right reason"
- )
- test_results.append(
- TestResult(test_file=test_file, error_summary=error_summary)
- )
- # TODO: Allow it to generate more than one test, if they are diverse.
- break
-
- if len(test_results) == 0:
- print(
- "[maketest] Failed to generate a test case that fails for the right reason"
- )
-
- return test_results
+ test_result = maketest(tcm, issue_file, work_dir, lint_command, i + 1)
+ if "test_directive" in test_result:
+ if test_result["verifies_issue"]:
+ print(
+ f"[maketest] ({instance_id}) Test case {test_result['test_directive']} verifies the issue"
+ )
+ return [test_result]
+
+ test_results.append(test_result)
+
+ print(
+ f"[maketest] ({tcm.instance['instance_id']}) No test cases were generated that verify the issue. Returning the first test case for pass-to-pass purposes."
+ )
+ return test_results[0:1]