From 4815d123c4041801175a52ade47c34d6faddbd80 Mon Sep 17 00:00:00 2001
From: Kevin Gilpin <kgilpin@gmail.com>
Date: Sat, 3 Aug 2024 13:08:26 -0400
Subject: [PATCH] feat: Perform lint repair on generated test code

---
 appmap/navie/format_instructions.py    |  49 ++---
 appmap/solve/solver.py                 |  18 +-
 appmap/solve/steps/lint_repair.py      |  35 ++++
 appmap/solve/steps/step_lint_repair.py |  59 ++----
 appmap/solve/steps/step_maketest.py    | 240 +++++++++++++++++--------
 5 files changed, 250 insertions(+), 151 deletions(-)
 create mode 100644 appmap/solve/steps/lint_repair.py
diff --git a/appmap/navie/format_instructions.py b/appmap/navie/format_instructions.py
index 1595144f..cc98d034 100644
--- a/appmap/navie/format_instructions.py
+++ b/appmap/navie/format_instructions.py
@@ -46,37 +46,40 @@ def xml_format_instructions():
 <file change-number-for-this-file="1">src/myproj/myfunc.py</file>
 <original line-count="14" no-ellipsis="true"><![CDATA[
 class DateTime(Field):
-def bind_to_schema(self, field_name, schema):
-    self.field_name = field_name
-    self.container = schema
-    # Configure format from schema opts
-    if self.format is None and hasattr(schema, "opts"):
-        self.format = schema.opts.datetimeformat
-        self.dateformat = schema.opts.dateformat
-        self.metadata["marshmallow_field"] = self
-    # _bind_to_schema is called before processors
-    if hasattr(schema, "root"):
-        self.root = schema.root
-    if self.metadata.get("validate"):
-        self._validate = self.metadata["validate"]
-]]></original>
-<modified no-ellipsis="true"><![CDATA[
-class DateTime(Field):
-def bind_to_schema(self, field_name, schema):
-    self.field_name = field_name
-    self.container = schema
-    # Check if 'schema' has 'opts' before accessing
-    if hasattr(schema, "opts"):
+    def bind_to_schema(self, field_name, schema):
+        self.field_name = field_name
+        self.container = schema
         # Configure format from schema opts
-        if self.format is None:
+        if self.format is None and hasattr(schema, "opts"):
             self.format = schema.opts.datetimeformat
             self.dateformat = schema.opts.dateformat
-        self.metadata["marshmallow_field"] = self
+            self.metadata["marshmallow_field"] = self
         # _bind_to_schema is called before processors
         if hasattr(schema, "root"):
             self.root = schema.root
         if self.metadata.get("validate"):
             self._validate = self.metadata["validate"]
+]]></original>
+<modified no-ellipsis="true"><![CDATA[
+class DateTime(Field):
+    def bind_to_schema(self, field_name, schema):
+        self.field_name = field_name
+        self.container = schema
+        # Check if 'schema' has 'opts' before accessing
+        if hasattr(schema, "opts"):
+            # Configure format from schema opts
+            if self.format is None:
+                self.format = schema.opts.datetimeformat
+                self.dateformat = schema.opts.dateformat
+            self.metadata["marshmallow_field"] = self
+            # _bind_to_schema is called before processors
+            if hasattr(schema, "root"):
+                self.root = schema.root
+            if self.metadata.get("validate"):
+                self._validate = self.metadata["validate"]
+
+    def new_method(self):
+        pass
 ]]></modified>
 </change>
     """
diff --git a/appmap/solve/solver.py b/appmap/solve/solver.py
index 2727ea52..225100a9 100644
--- a/appmap/solve/solver.py
+++ b/appmap/solve/solver.py
@@ -62,11 +62,6 @@ def __init__(
         self.changed_files_limit = changed_files_limit
         self.test_attempts = test_attempts
 
-        if self.lint_command and not self.steps["apply"]:
-            print(
-                f"[solver] ({self.instance_id}) WARN: Lint command will not be executed without apply step."
-            )
-
         if not os.path.isfile(self.issue_file):
             raise FileNotFoundError(f"File '{self.issue_file}' not found.")
 
@@ -168,12 +163,19 @@ def maketest(self):
             self.task_manager,
             self.issue_file,
             self.work_dir,
+            self.lint_command,
             self.test_attempts,
         )
 
-        maketest_files = [result["test_file"] for result in maketest_results]
-        self.maketest_errors = [result["error_summary"] for result in maketest_results]
-        self.extend_test_directives(maketest_files)
+        results_with_error_summary = [
+            result for result in maketest_results if "error_summary" in result
+        ]
+        self.maketest_errors = [
+            result["error_summary"] for result in results_with_error_summary
+        ]
+        self.extend_test_directives(
+            [result["test_directive"] for result in maketest_results]
+        )
 
     def plan(self):
         step_plan(
diff --git a/appmap/solve/steps/lint_repair.py b/appmap/solve/steps/lint_repair.py
new file mode 100644
index 00000000..405dd28d
--- /dev/null
+++ b/appmap/solve/steps/lint_repair.py
@@ -0,0 +1,35 @@
+import subprocess
+
+
+# Lint the file and return a dictionary of line numbers with lint errors.
+def lint(lint_command: list) -> dict:
+    lint_result = subprocess.run(
+        lint_command, capture_output=True, shell=False, text=True
+    )
+
+    lint_output = lint_result.stdout
+    lint_errors = lint_output.split("\n")
+
+    # Lint errors are formatted like this:
+    # bin/solve.py:257:80: E501 line too long (231 > 79 characters)
+    # Collect the line numbers of the lint errors.
+    lint_errors_by_line_number = {}
+    for error in lint_errors:
+        if error:
+            tokens = error.split(":")
+            if len(tokens) > 1:
+                line_number = tokens[1]
+                if line_number and line_number.isdigit():
+                    lint_errors_by_line_number[int(line_number)] = error
+
+    return lint_errors_by_line_number
+
+
+def lint_in_conda(conda_path, conda_env, lint_command, file):
+    return lint(
+        [
+            "bash",
+            "-c",
+            f". {conda_path}/bin/activate {conda_env} && {lint_command} {file}",
+        ]
+    )
diff --git a/appmap/solve/steps/step_lint_repair.py b/appmap/solve/steps/step_lint_repair.py
index 271664f3..64d7b3bb 100644
--- a/appmap/solve/steps/step_lint_repair.py
+++ b/appmap/solve/steps/step_lint_repair.py
@@ -1,13 +1,13 @@
 from appmap.navie.editor import Editor
 from appmap.navie.extract_changes import extract_changes
-from ..log import log_diff, log_lint, log_command
+from appmap.solve.steps.lint_repair import lint_in_conda
+from ..log import log_diff
 from ..run_command import run_command
 from ..run_navie_command import run_navie_command
 from ...navie.format_instructions import xml_format_instructions
 
 
 import os
-import subprocess
 
 
 class LintRepairContext:
@@ -58,42 +58,7 @@ def diff_file(context, file, step):
     return file_diff
 
 
-# Lint the file and return a dictionary of line numbers with lint errors
-def lint_file(context, file):
-    lint_args = [
-        "bash",
-        "-c",
-        f". {context.conda_path}/bin/activate {context.conda_env} && {context.lint_command} {file}",
-    ]
-    log_command(context.log_dir, " ".join(lint_args))
-
-    lint_result = subprocess.run(lint_args, capture_output=True, shell=False, text=True)
-
-    lint_output = lint_result.stdout
-
-    log_lint(
-        context.log_dir, os.path.join(context.work_dir_base_name, file), lint_output
-    )
-
-    lint_errors = lint_output.split("\n")
-
-    # Lint errors are formatted like this:
-    # bin/solve.py:257:80: E501 line too long (231 > 79 characters)
-    # Collect the line numbers of the lint errors.
-    lint_errors_by_line_number = {}
-    for error in lint_errors:
-        if error:
-            tokens = error.split(":")
-            if len(tokens) > 1:
-                line_number = tokens[1]
-                if line_number and line_number.isdigit():
-                    lint_errors_by_line_number[int(line_number)] = error
-    return lint_errors_by_line_number
-
-
-def lint_error_line_numbers_within_diff_sections(
-    file, lint_errors_by_line_number, file_diff
-):
+def lint_error_line_numbers_within_diff_sections(lint_errors_by_line_number, file_diff):
     # The file diff contains chunks like:
     # @@ -147,15 +147,21 @@
     # Find the '+' number, which indicates the start line. Also find the number after the
@@ -143,18 +108,22 @@ def step_lint_repair(
 
         print(f"[lint-repair] ({instance_id}) Linting {file}")
 
-        lint_errors_by_line_number = lint_file(context, file)
+        lint_errors_by_line_number = lint_in_conda(
+            context.conda_path, context.conda_env, context.lint_command, file
+        )
         if not len(lint_errors_by_line_number):
             print(f"[lint-repair] ({instance_id}) No lint errors found in {file}")
             continue
 
         lint_errors = "\n".join(lint_errors_by_line_number.values())
-        print(lint_errors)
+        print(
+            f"[lint-repair] ({instance_id}) Lint errors found in {file}: {lint_errors}"
+        )
 
         file_diff = diff_file(context, file, "pre")
 
         line_numbers = lint_error_line_numbers_within_diff_sections(
-            file, lint_errors_by_line_number, file_diff
+            lint_errors_by_line_number, file_diff
         )
 
         if line_numbers:
@@ -320,13 +289,15 @@ def step_lint_repair(
             )
             repair_item += 1
 
-        post_fix_lint_errors_by_line_number = lint_file(context, file)
+        post_fix_lint_errors_by_line_number = lint_in_conda(
+            context.conda_path, context.conda_env, context.lint_command, file
+        )
         post_file_diff = diff_file(context, file, "post")
 
-        print(post_file_diff)
+        print(f"[lint-repair] ({instance_id}) Diff after repair:\n{post_file_diff}")
 
         post_line_numbers = lint_error_line_numbers_within_diff_sections(
-            file, post_fix_lint_errors_by_line_number, post_file_diff
+            post_fix_lint_errors_by_line_number, post_file_diff
         )
 
         if post_line_numbers:
diff --git a/appmap/solve/steps/step_maketest.py b/appmap/solve/steps/step_maketest.py
index a11f148a..e40a0d60 100644
--- a/appmap/solve/steps/step_maketest.py
+++ b/appmap/solve/steps/step_maketest.py
@@ -1,19 +1,39 @@
 import os
-from typing import TypedDict, Optional
+from typing import List, TypedDict, Optional, Union
 import yaml
 
 from appmap.navie.editor import Editor
+from appmap.navie.extract_changes import extract_changes
 from appmap.navie.fences import extract_fenced_content
+from appmap.navie.format_instructions import xml_format_instructions
+from appmap.solve.steps.lint_repair import lint_in_conda
 from appmap.solve.steps.run_test import run_test
+from appmap.solve.steps.test_files_to_modules import test_files_to_modules
+
+
+class TestError(TypedDict):
+    error: str
+
+
+class TestResult(TypedDict):
+    test_directive: str
+    verifies_issue: bool
+    error_summary: Optional[str]
 
 
 def maketest(
     tcm,
     issue_file,
     work_dir,
+    lint_command,
     test_number,
-):
-    print(f"[maketest] Generating a test case to verify the solution to {issue_file}")
+) -> Union[TestResult, TestError]:
+    instance = tcm.instance
+    instance_id = tcm.instance["instance_id"]
+
+    print(
+        f"[maketest] ({instance_id}) Generating a test case to verify the solution to {issue_file}"
+    )
 
     with open(issue_file, "r") as f:
         issue_content = f.read()
@@ -21,7 +41,7 @@ def maketest(
     work_dir = os.path.join(work_dir, "maketest", str(test_number))
 
     test_to_modify_str = Editor(os.path.join(work_dir, "choose")).search(
-        f"""Identify a single test case that is most related to the following issue:
+        f"""/include=test Identify a single test case that is most related to the following issue:
 
 {issue_content}
 """,
@@ -39,59 +59,128 @@ def maketest(
 
     # Expect exactly one file
     if len(tests_to_modify) != 1:
-        print(f"Expected exactly one file, got {test_to_modify_str}")
-        return {"succeeded": False, "test_error": "Expected exactly one file"}
+        print(
+            f"[maketest] ({instance_id}) Expected exactly one file, got {test_to_modify_str}"
+        )
+        return {"error": "Expected exactly one file"}
 
-    test_to_modify = tests_to_modify[0]
-    test_to_modify = os.path.relpath(test_to_modify, os.getcwd())
+    test_file = tests_to_modify[0]
+    test_file = os.path.relpath(test_file, os.getcwd())
 
-    print(f"[maketest] Modifying test case {test_to_modify}")
+    print(f"[maketest] ({instance_id}) Modifying test case {test_file}")
 
-    with open(test_to_modify, "r") as f:
+    with open(test_file, "r") as f:
         test_content = f.read()
+        original_test_content = test_content
 
     navie = Editor(os.path.join(work_dir, "generate"))
-    navie.context(issue_content, exclude_pattern="test")
 
     test_prompt = f"""## Task
 
-Modify this test case to verify the solution to the described issue:
+Add a new test to the following test file. 
+
+The new test should verify the solution to the issue that's described by the user.
+
+The test case MUST FAIL if the issue is NOT FIXED.
+
+If any new imports are needed, be sure to include them.
 
 <test>
 {test_content}
 </test>
 
-## Output instructions
+## Output format
 
-The output should contain only a single test case. 
+{xml_format_instructions()}
+"""
 
-Remove all test cases from the original file, except the one that you
-are modifying or creating.
+    test_output = navie.test(
+        f"""/exclude=test
+                          
+{issue_content}""",
+        prompt=test_prompt,
+    )
 
-The test case MUST FAIL if the issue is NOT FIXED.
+    test_changes_content = "\n\n".join(extract_fenced_content(test_output))
 
-Be sure to emit all needed imports.
+    changes = extract_changes(test_changes_content)
+    for change in changes:
+        if change.original:
+            print(
+                f"[maketest] ({instance_id}) Applying test change to file: {test_file}"
+            )
+            work_dir = os.path.join(work_dir, "apply")
+            Editor(work_dir).apply(
+                test_file,
+                change.modified,
+                search=change.original,
+            )
+        else:
+            print(
+                f"[maketest] ({instance_id}) Planned test change has no <original> section, so it will be appended to: {test_file}"
+            )
+            with open(test_file, "a") as f:
+                f.write("\n")
+                f.write(change.modified)
 
-Output only the code, and nothing else.
-"""
+    with open(test_file, "r") as f:
+        test_content = f.read()
 
-    raw_code = navie.test(issue_content, prompt=test_prompt)
+    lint_errors_by_line_number = lint_in_conda(
+        tcm.conda_path,
+        tcm.venv,
+        lint_command,
+        test_file,
+    )
+    if lint_errors_by_line_number:
+        print(
+            f"[maketest] ({instance_id}) Lint errors found in test file {test_file}:\n{lint_errors_by_line_number}"
+        )
 
-    codes = extract_fenced_content(raw_code)
-    if not codes or len(codes) != 1:
-        print(f"Expected exactly one code block, got {len(codes)}")
-        return {"succeeded": False, "test_error": "Expected exactly one code block"}
+        lint_error_str = "\n".join(list(lint_errors_by_line_number.values()))
+        lint_repair = Editor(os.path.join(work_dir, "lint_repair"))
+        lint_repair_content = lint_repair.generate(
+            lint_error_str,
+            prompt=f"""## Task
 
-    raw_code = codes[0]
+Fix lint errors in the code.
 
-    # Append a suffix to the test_to_modify file name.
-    # Example: test_to_modify = "test.py", modified_file_name = "test_modified.py"
-    test_file = test_to_modify.replace(".py", f"_maketest_{test_number}.py")
+<code>
+{test_content}
+</code>
 
-    print(f"[maketest] Writing test case to {test_file}")
+## Output format
 
-    with open(test_file, "w") as f:
-        f.write(raw_code)
+{xml_format_instructions()}
+""",
+        )
+        lint_repair_changes = extract_changes(lint_repair_content)
+        for change in lint_repair_changes:
+            if change.original:
+                print(
+                    f"[maketest] ({instance_id}) Applying lint repair change to file: {test_file}"
+                )
+                work_dir = os.path.join(work_dir, "apply")
+                Editor(work_dir).apply(
+                    test_file,
+                    change.modified,
+                    search=change.original,
+                )
+            else:
+                print(
+                    f"[maketest] ({instance_id}) Planned lint repair change has no <original> section, so it will be appended to: {test_file}"
+                )
+                with open(test_file, "a") as f:
+                    f.write("\n")
+                    f.write(change.modified)
+
+        lint_errors_by_line_number_after_repair = lint_in_conda(
+            tcm.conda_path, tcm.venv, lint_command, test_file
+        )
+        if lint_errors_by_line_number_after_repair:
+            print(
+                f"[maketest] ({instance_id}) Lint errors found in test file {test_file} after lint repair:\n{lint_errors_by_line_number_after_repair}"
+            )
 
     # TODO: Don't record appmap data of the test yet
     # succeeded, test_error = run_test(tcm, test_file, appmap=True)
@@ -100,15 +189,17 @@ def maketest(
     #     instance_id = tcm.instance["instance_id"]
     #     index_appmaps(instance_id, log_dir, appmap_command)
 
-    succeeded, test_error = run_test(tcm, test_file)
+    succeeded, test_error = run_test(tcm, test_file, appmap=False)
 
     # Verify that the test_error indicates that the issue is being reproduced
     fails_for_expected_reason = False
     if succeeded:
-        print(f"[maketest] Test case {test_file} succeeded. This is unexpected!")
+        print(
+            f"[maketest] ({instance_id}) Test case {test_file} succeeded. This is unexpected!"
+        )
     else:
         print(
-            f"[maketest] Test case {test_file} failed. This is expected. Let's see if it failed for the right reason."
+            f"[maketest] ({instance_id}) Test case {test_file} failed. This is expected. Let's see if it failed for the right reason."
         )
 
         if "ERROR" in test_error:
@@ -147,19 +238,36 @@ def maketest(
 Emit a single word that indicates whether the test error is consistent with the described issue.
 
 - Emit "yes" if the test error is consistent with the described issue.
+- Emit "maybe" if the test error is possibly consistent with the described issue.
 - Emit "no" if the test error is NOT consistent with the described issue.
 """,
         )
 
-        if whyfailed != "yes":
+        if whyfailed == "no":
+            print(
+                f"[maketest] ({instance_id}) Test case {test_file} DID NOT fail for the expected reason"
+            )
             print(
-                f"[maketest] Test case {test_file} DID NOT fail for the unexpected reason"
+                f"[maketest] ({instance_id}) Reverting test changes to {test_file} and trying again"
             )
+            with open(test_file, "w") as f:
+                f.write(original_test_content)
         else:
             fails_for_expected_reason = True
-            print(f"[maketest] Test case {test_file} failed for the expected reason")
+            print(
+                f"[maketest] ({instance_id}) Test case {test_file} failed, possibly / probably for the expected reason"
+            )
+
+    if instance["repo"] == "django/django":
+        test_directive = test_files_to_modules([test_file])[0]
+    else:
+        test_directive = test_file
 
-    error_summary = None
+    result = TestResult(
+        test_directive=test_directive,
+        verifies_issue=fails_for_expected_reason,
+        error_summary=None,
+    )
     if fails_for_expected_reason:
         error_summary = Editor(os.path.join(work_dir, "summarize")).ask(
             f"""/nocontext A test case is failing.
@@ -184,53 +292,33 @@ def maketest(
 """,
             context=[],
         )
-
-    result = {
-        "test_file": test_file,
-        "succeeded": succeeded,
-        "test_error": test_error,
-        "fails_for_expected_reason": fails_for_expected_reason,
-    }
-    if error_summary:
         result["error_summary"] = error_summary
 
     return result
 
 
-class TestResult(TypedDict):
-    test_file: str
-    error_summary: str
-
-
 def step_maketest(
     tcm,
     issue_file,
     work_dir,
+    lint_command,
     num_attempts,
-) -> Optional[TestResult]:
+) -> List[TestResult]:
     # Try N times to generate a test that fails for the right reason
+    instance_id = tcm.instance["instance_id"]
     test_results = []
     for i in range(num_attempts):
-        test_result = maketest(tcm, issue_file, work_dir, i + 1)
-        if (
-            "fails_for_expected_reason" in test_result
-            and "test_file" in test_result
-            and test_result["fails_for_expected_reason"]
-        ):
-            test_file = test_result["test_file"]
-            error_summary = test_result["error_summary"]
-            print(
-                f"[maketest] Generated test case {test_file} that fails for the right reason"
-            )
-            test_results.append(
-                TestResult(test_file=test_file, error_summary=error_summary)
-            )
-            # TODO: Allow it to generate more than one test, if they are diverse.
-            break
-
-    if len(test_results) == 0:
-        print(
-            "[maketest] Failed to generate a test case that fails for the right reason"
-        )
-
-    return test_results
+        test_result = maketest(tcm, issue_file, work_dir, lint_command, i + 1)
+        if "test_directive" in test_result:
+            if test_result["verifies_issue"]:
+                print(
+                    f"[maketest] ({instance_id}) Test case {test_result['test_directive']} verifies the issue"
+                )
+                return [test_result]
+
+            test_results.append(test_result)
+
+    print(
+        f"[maketest] ({tcm.instance['instance_id']}) No test cases were generated that verify the issue. Returning the first test case for pass-to-pass purposes."
+    )
+    return test_results[0:1]