Skip to content

Commit

Permalink
fix: Anticipate a test may fail without error
Browse files Browse the repository at this point in the history
This has been observed in cases of timeout, for example.
  • Loading branch information
kgilpin committed Aug 14, 2024
1 parent 6f50366 commit fbfc49d
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 37 deletions.
14 changes: 10 additions & 4 deletions solver/solve/steps/run_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import subprocess
from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK
from typing import Optional

from ..run_command import run_command
from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK
from .test_files_to_modules import test_files_to_modules


def run_test(tcm, test_file, appmap=False, files_to_directives=True):
class TestResult:
def __init__(self, succeeded: bool, test_error: Optional[str]):
self.succeeded = succeeded
self.test_error = test_error


def run_test(tcm, test_file, appmap=False, files_to_directives=True) -> TestResult:
print(f"[run_test] Running test {test_file}")

instance = tcm.instance
Expand Down Expand Up @@ -74,4 +80,4 @@ def run_test(tcm, test_file, appmap=False, files_to_directives=True):
# Select log_lines after base_log_line_count
test_error = "\n".join(log_lines[base_log_line_count:])

return (succeeded, test_error)
return TestResult(succeeded, test_error)
55 changes: 32 additions & 23 deletions solver/solve/steps/step_maketest.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,19 +239,29 @@ def maketest(
# instance_id = tcm.instance["instance_id"]
# index_appmaps(instance_id, log_dir, appmap_command)

succeeded, test_error = run_test(tcm, test_file, appmap=False)
test_result = run_test(tcm, test_file, appmap=False)

def revert_test_changes():
with open(test_file, "w") as f:
f.write(original_test_content)

# Verify that the test_error indicates that the issue is being reproduced
fails_for_expected_reason = False
if succeeded:
if test_result.succeeded:
print(
f"[maketest] ({instance_id}) Test case {test_file} succeeded. This is unexpected!"
)
elif not test_result.test_error:
print(
f"[maketest] ({instance_id}) Test case {test_file} failed without any reported error (timeout?). Reverting test changes."
)
revert_test_changes()
else:
print(
f"[maketest] ({instance_id}) Test case {test_file} failed. This is expected. Let's see if it failed for the planned reason."
)

test_error = test_result.test_error
if "ERROR" in test_error:
error_lines = test_error.split("\n")
# Find everything after the first line that includes "ERROR", "FAIL", or "activate successful"
Expand All @@ -262,10 +272,10 @@ def maketest(
)
test_error = "\n".join(error_lines[first_line_index_with_error:])

whyfailed = Editor(
os.path.join(maketest_work_dir, "check"), log_dir=work_dir
).ask(
f"""/nocontext
whyfailed = Editor(
os.path.join(maketest_work_dir, "check"), log_dir=work_dir
).ask(
f"""/nocontext
<error>
{test_error}
Expand All @@ -275,8 +285,8 @@ def maketest(
{issue_content}
</issue>
""",
context=[],
prompt="""## Task
context=[],
prompt="""## Task
A test case has been created that is currently expected to fail due to a known issue.
Expand All @@ -293,23 +303,22 @@ def maketest(
- Emit "maybe" if the test error is possibly consistent with the described issue.
- Emit "no" if the test error is NOT consistent with the described issue.
""",
)

if whyfailed == "no":
print(
f"[maketest] ({instance_id}) Test case {test_file} DID NOT fail for the planned reason"
)
print(
f"[maketest] ({instance_id}) Reverting test changes to {test_file} and trying again"
)
with open(test_file, "w") as f:
f.write(original_test_content)
else:
fails_for_expected_reason = True
print(
f"[maketest] ({instance_id}) It looks like it failed for the planned reason"
)

if whyfailed == "no":
print(
f"[maketest] ({instance_id}) Test case {test_file} DID NOT fail for the planned reason"
)
print(
f"[maketest] ({instance_id}) Reverting test changes to {test_file}."
)
revert_test_changes()
else:
fails_for_expected_reason = True
print(
f"[maketest] ({instance_id}) It looks like it failed for the planned reason"
)

if instance["repo"] == "django/django":
test_directive = test_files_to_modules([test_file])[0]
else:
Expand Down
16 changes: 6 additions & 10 deletions solver/solve/steps/step_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,9 @@ def repair_test(

print(f"[verify/repair] ({instance_id}) Retesting: {test_directive}")

(succeeded, test_output) = run_test(
task_manager, test_directive, files_to_directives=False
)
test_result = run_test(task_manager, test_directive, files_to_directives=False)

if not succeeded:
if not test_result.succeeded:
print(f"[verify/repair] ({instance_id}) Test failed: {test_directive}")
print(
f"[verify/repair] ({instance_id}) Review {task_manager.log_file} for more information"
Expand Down Expand Up @@ -189,24 +187,22 @@ def step_verify(
test_directives_repaired = []
for test_directive in test_directives:
print(f"[verify] ({instance_id}) Running test: {test_directive}")
succeeded, test_output = run_test(
task_manager, test_directive, files_to_directives=False
)
test_result = run_test(task_manager, test_directive, files_to_directives=False)

if not succeeded:
if not test_result.succeeded and test_result.test_error:
repaired = repair_test(
task_manager,
verify_dir,
work_dir,
instance_id,
test_directive,
test_output,
test_result.test_error,
)
if repaired:
test_directives_repaired.append(test_directive)
succeeded = True

if succeeded:
if test_result.succeeded:
test_directives_succeeded.append(test_directive)

if test_directives_repaired:
Expand Down

0 comments on commit fbfc49d

Please sign in to comment.