can_be_split

Codium-ai · Mar 10, 2024 · 8324e9a · 8324e9a
1 parent c7315c5
commit 8324e9a
Show file tree

Hide file tree

Showing 7 changed files with 71 additions and 14 deletions.
diff --git a/docs/REVIEW.md b/docs/REVIEW.md
@@ -41,7 +41,6 @@ To edit [configurations](./../pr_agent/settings/configuration.toml#L19)  related
 - `persistent_comment`: if set to true, the review comment will be persistent, meaning that every new review request will edit the previous one. Default is true.
 
 #### Enable\\disable features
-- `require_focused_review`: if set to true, the tool will add a section - 'is the PR a focused one'. Default is false.
 - `require_score_review`: if set to true, the tool will add a section that scores the PR. Default is false.
 - `require_tests_review`: if set to true, the tool will add a section that checks if the PR contains tests. Default is true.
 - `require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
@@ -70,6 +70,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True) -> str:
     """    
 
     emojis = {
+        "Can be split": "🔀",
         "Possible issues": "🔍",
         "Score": "🏅",
         "Relevant tests": "🧪",
@@ -90,7 +91,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True) -> str:
 
     for key, value in output_data['review'].items():
         if value is None or value == '' or value == {} or value == []:
-            continue
+            if key.lower() != 'can_be_split':
+                continue
         key_nice = key.replace('_', ' ').capitalize()
         emoji = emojis.get(key_nice, "")
         if gfm_supported:
@@ -99,6 +101,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True) -> str:
             if 'security concerns' in key_nice.lower():
                 value = emphasize_header(value.strip())
                 markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
+            elif 'can be split' in key_nice.lower():
+                markdown_text += process_can_be_split(emoji, value)
             elif 'possible issues' in key_nice.lower():
                 value = value.strip()
                 issues = value.split('\n- ')
@@ -150,6 +154,37 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True) -> str:
     return markdown_text
 
 
+def process_can_be_split(emoji, value):
+    key_nice = "Can this PR be split?"
+    markdown_text = ""
+    if not value or isinstance(value, list) and len(value) == 1:
+        value = "No"
+        markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
+    else:
+        number_of_splits = len(value)
+        markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
+        for i, split in enumerate(value):
+            title = split.get('title', '')
+            relevant_files = split.get('relevant_files', [])
+            if i == 0:
+                markdown_text += f"<td><details><summary>\nSub PR theme: <strong>{title}</strong></summary>\n\n"
+                markdown_text += f"<hr>\n"
+                markdown_text += f"Relevant files:\n"
+                markdown_text += f"<ul>\n"
+                for file in relevant_files:
+                    markdown_text += f"<li>{file}</li>\n"
+                markdown_text += f"</ul>\n\n</details></td></tr>\n"
+            else:
+                markdown_text += f"<tr>\n<td><details><summary>\nSub PR theme: <strong>{title}</strong></summary>\n\n"
+                markdown_text += f"<hr>\n"
+                markdown_text += f"Relevant files:\n"
+                markdown_text += f"<ul>\n"
+                for file in relevant_files:
+                    markdown_text += f"<li>{file}</li>\n"
+                markdown_text += f"</ul>\n\n</details></td></tr>\n"
+    return markdown_text
+
+
 def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:
     """
     Convert a dictionary of data into markdown format.

diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py
@@ -197,6 +197,12 @@ def auto_approve(self) -> bool:
     def calc_pr_statistics(self, pull_request_data: dict):
         return {}
 
+    def get_num_of_files(self):
+        try:
+            return len(self.get_diff_files())
+        except Exception as e:
+            return -1
+
 
 def get_main_pr_language(languages, files) -> str:
     """
@@ -266,6 +272,7 @@ def get_main_pr_language(languages, files) -> str:
 
     return main_language_str
 
+
 class IncrementalPR:
     def __init__(self, is_incremental: bool = False):
         self.is_incremental = is_incremental

diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
@@ -112,6 +112,11 @@ def get_files(self):
                 self.git_files = self.pr.get_files()
             return self.git_files
 
+    def get_num_of_files(self):
+        if self.git_files:
+            return self.git_files.totalCount
+        else:
+            return -1
 
     @retry(exceptions=RateLimitExceeded,
            tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))

diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
@@ -22,10 +22,10 @@ ai_disclaimer=""  # Pro feature, full text for the AI disclaimer
 
 [pr_reviewer] # /review #
 # enable/disable features
-require_focused_review=false
 require_score_review=false
 require_tests_review=true
 require_estimate_effort_to_review=true
+require_can_be_split_review=false
 # soc2
 require_soc2_ticket=false
 soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"

diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -49,7 +49,13 @@ Extra instructions from the user:
 
 The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:
 =====
-class Review(BaseModel)
+{%- if require_can_be_split_review %}
+class SubPR(BaseModel):
+    title: str = Field(description="short and concise title for a sub-PR composed only from the relevant files")
+    relevant_files: List[str] = Field(description="the relevant files of the sub-PR")
+{%- endif %}
+
+class Review(BaseModel):
 {%- if require_estimate_effort_to_review %}
     estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.")
 {%- endif %}
@@ -61,23 +67,23 @@ class Review(BaseModel)
 {%- endif %}
 {%- if question_str %}
     insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
-{%- endif %}
-{%- if require_focused %}
-    focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.")
 {%- endif %}
     possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
     security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
+{%- if require_can_be_split_review %}
+    can_be_split: List[SubPR] = Field(description="Can this PR, with its {{ num_pr_files }} changed files, be clearly split into smaller sub-PRs, that can be reviewed and merged independently, regardless of the order ? If yes, provide a title and list the relevant files for each sub-PR. Make sure that the sub-PRs are indeed independent, without any code dependencies between them. Try not to create too many sub-PRs. For example, logic changes and corresponding documentation should be grouped together. Output empty list if the PR cannot be split.")
+{%- endif %}
 {%- if num_code_suggestions > 0 %}
 
-class CodeSuggestion(BaseModel)
+class CodeSuggestion(BaseModel):
     relevant_file: str = Field(description="the relevant file full path")
     language: str = Field(description="the language of the relevant file")
     suggestion: str = Field(description="a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.")
     relevant_line: str = Field(description="a single code line taken from the relevant file, to which the suggestion applies. The code line should start with a '+'. Make sure to output the line exactly as it appears in the relevant file")
 {%- endif %}
 {%- if num_code_suggestions > 0 %}
 
-class PRReview(BaseModel)
+class PRReview(BaseModel):
     review: Review
     code_feedback: List[CodeSuggestion]
 {%- else %}
@@ -100,14 +106,18 @@ review:
 {%- endif %}
   relevant_tests: |
     No
-{%- if require_focused %}
-  focused_pr: |
-    no, because ...
-{%- endif %}
   possible_issues: |
     No
   security_concerns: |
     No
+{%- if require_can_be_split_review %}
+    can_be_split: |
+    title: |
+      ...
+    - relevant_files:
+      - ...
+      - ...
+{%- endif %}
 {%- if num_code_suggestions > 0 %}
 code_feedback
 - relevant_file: |

diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
@@ -56,10 +56,11 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False,
             "description": self.git_provider.get_pr_description(),
             "language": self.main_language,
             "diff": "",  # empty diff for initial calculation
+            "num_pr_files": self.git_provider.get_num_of_files(),
             "require_score": get_settings().pr_reviewer.require_score_review,
             "require_tests": get_settings().pr_reviewer.require_tests_review,
-            "require_focused": get_settings().pr_reviewer.require_focused_review,
             "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
+            'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
             'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
             'question_str': question_str,
             'answer_str': answer_str,