From 3cfda327ba5ae8fdd3b0e57cb17b391a06456f42 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Tue, 13 Aug 2024 12:19:54 -0400 Subject: [PATCH] fix: Initialize verify_succeeded Fixes https://github.com/getappmap/SWE-bench/issues/100 --- solver/solve.py | 2 +- solver/solve/solver.py | 92 ++++++++++++++---------------- test/solver/test_solution.py | 107 +++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 52 deletions(-) create mode 100644 test/solver/test_solution.py diff --git a/solver/solve.py b/solver/solve.py index 9de90cb..be527d3 100644 --- a/solver/solve.py +++ b/solver/solve.py @@ -176,7 +176,7 @@ def worker_init(data: dict): assert data_dict.maketest_retries is not None output_file = abspath(data_dict.output) - appmap_command = abspath(data_dict.appmap_command) + appmap_command = data_dict.appmap_command try: with TestbedContextManager( diff --git a/solver/solve/solver.py b/solver/solve/solver.py index 01bcf87..3d531ce 100644 --- a/solver/solve/solver.py +++ b/solver/solve/solver.py @@ -150,7 +150,6 @@ def __init__( self.verify = verify def solution_response(self) -> SolutionResponse: - patch_names = [] prepare_test_patch = None prepare_test_num_attempts = 0 test_directives = [] @@ -158,60 +157,51 @@ def solution_response(self) -> SolutionResponse: apply_patch = None lint_repair_patch = None verify_patch = None + verify_succeeded = False verify_test_directives_succeeded = [] patch_name = None patch = None - if solution.prepare_test_response: - patch_names.append("prepare_test") - prepare_test_patch = solution.prepare_test_response.patch - prepare_test_num_attempts = solution.prepare_test_response.num_attempts - is_issue_reproduced = solution.prepare_test_response.is_issue_reproduced() - test_directives = solution.prepare_test_response.test_directives() - - if solution.apply: - patch_names.append("apply") - apply_patch = solution.apply.patch - - if solution.lint_repair: - patch_names.append("lint_repair") - lint_repair_patch = solution.lint_repair.patch - - if solution.verify: - patch_names.append("verify") - verify_succeeded = solution.verify.succeeded - verify_patch = solution.verify.patch - verify_test_directives_succeeded = solution.verify.test_directives_succeeded - - if ( - self.prepare_test_response - and self.prepare_test_response.is_issue_reproduced() - and self.verify - and self.verify.test_directives_succeeded - ): - patch_name = "fail_to_pass" - patch = self.verify.patch - elif ( - self.prepare_test_response - and self.verify - and self.verify.test_directives_succeeded - ): - patch_name = "pass_to_pass" - patch = self.verify.patch - elif ( - self.prepare_test_response - and self.verify - and not self.verify.test_directives_succeeded - ): - patch_name = "pass_to_fail" - patch = self.verify.patch - elif self.lint_repair and self.lint_repair.patch: - patch_name = "lint_repair" - patch = self.lint_repair.patch - elif self.apply and self.apply.patch: - patch_name = "apply" - patch = self.apply.patch - + patch_names = [] + if self.prepare_test_response: + if self.prepare_test_response.patch: + patch = self.prepare_test_response.patch + prepare_test_patch = self.prepare_test_response.patch + prepare_test_num_attempts = self.prepare_test_response.num_attempts + is_issue_reproduced = self.prepare_test_response.is_issue_reproduced() + test_directives = self.prepare_test_response.test_directives() + + if self.apply: + if self.apply.patch: + patch_names.append("apply") + patch = self.apply.patch + apply_patch = self.apply.patch + + if self.lint_repair: + if self.lint_repair.patch: + patch_names.append("lint_repair") + patch = self.lint_repair.patch + lint_repair_patch = self.lint_repair.patch + + if self.verify: + if self.verify.patch: + patch = self.verify.patch + verify_patch = self.verify.patch + verify_succeeded = self.verify.succeeded + verify_test_directives_succeeded = self.verify.test_directives_succeeded + + if ( + self.prepare_test_response + and self.prepare_test_response.is_issue_reproduced() + and self.verify.test_directives_succeeded + ): + patch_names.append("fail_to_pass") + elif self.verify.test_directives_succeeded: + patch_names.append("pass_to_pass") + else: + patch_names.append("pass_to_fail") + + patch_name = patch_names[-1] if patch_names else None return SolutionResponse( patch_name, patch, diff --git a/test/solver/test_solution.py b/test/solver/test_solution.py new file mode 100644 index 0000000..72dec60 --- /dev/null +++ b/test/solver/test_solution.py @@ -0,0 +1,107 @@ +import os +import unittest + +from solver.solve.steps.step_apply import ApplyResponse +from solver.solve.steps.step_lint_repair import LintRepairResponse +from solver.solve.steps.step_maketest import PrepareTestResponse, MaketestResult +from solver.solve.steps.step_verify import VerifyResponse +from solver.solve.solver import ( + Solution, +) + +issue_file = os.path.join(os.path.dirname(__file__), "issue.txt") +log_dir = os.path.join(os.path.dirname(__file__), "..", "..", "logs") + + +class TestSolution(unittest.TestCase): + def test_solution_response_fail_to_pass( + self, + ): + prepare_test_response = PrepareTestResponse( + "prepare_test_patch", + [MaketestResult(test_directive="directive1", is_issue_reproduced=True)], + 1, + ) + apply_response = ApplyResponse(patch="apply_patch") + lint_repair_response = LintRepairResponse(patch="lint_repair_patch") + verify_response = VerifyResponse( + succeeded=True, + patch="verify_patch", + test_directives_succeeded=["directive1"], + ) + + solution = Solution( + prepare_test_response=prepare_test_response, + apply=apply_response, + lint_repair=lint_repair_response, + verify=verify_response, + ) + + solution_response = solution.solution_response() + self.assertEqual(solution_response.patch_name, "fail_to_pass") + self.assertEqual(solution_response.patch, "verify_patch") + self.assertEqual(solution_response.prepare_test_patch, "prepare_test_patch") + self.assertEqual(solution_response.prepare_test_num_attempts, 1) + self.assertEqual(solution_response.test_directives, ["directive1"]) + self.assertTrue(solution_response.is_issue_reproduced) + self.assertEqual(solution_response.apply_patch, "apply_patch") + self.assertEqual(solution_response.lint_repair_patch, "lint_repair_patch") + self.assertTrue(solution_response.verify_succeeded) + self.assertEqual(solution_response.verify_patch, "verify_patch") + self.assertEqual( + solution_response.verify_test_directives_succeeded, ["directive1"] + ) + + def test_solution_response_pass_to_pass( + self, + ): + prepare_test_response = PrepareTestResponse( + None, + [MaketestResult(test_directive="directive1", is_issue_reproduced=False)], + 1, + ) + apply_response = ApplyResponse(patch="apply_patch") + lint_repair_response = LintRepairResponse(patch=None) + verify_response = VerifyResponse( + succeeded=True, + patch=None, + test_directives_succeeded=['the-test'], + ) + + solution = Solution( + prepare_test_response, + apply_response, + lint_repair_response, + verify_response, + ) + solution_response = solution.solution_response() + self.assertEqual(solution_response.patch_name, "pass_to_pass") + self.assertEqual(solution_response.patch, "apply_patch") + + def test_solution_pass_to_fail(self): + prepare_test_response = PrepareTestResponse( + None, + [MaketestResult(test_directive="directive1", is_issue_reproduced=False)], + 1, + ) + apply_response = ApplyResponse(patch="apply_patch") + lint_repair_response = LintRepairResponse(patch=None) + verify_response = VerifyResponse( + succeeded=False, + patch=None, + test_directives_succeeded=[], + ) + + solution = Solution( + prepare_test_response, + apply_response, + lint_repair_response, + verify_response, + ) + solution_response = solution.solution_response() + self.assertEqual(solution_response.patch_name, "pass_to_fail") + self.assertEqual(solution_response.patch, "apply_patch") + + +if __name__ == "__main__": + unittest.main()