From 727eea2b62418672f6e2146eb2b7ca1a07636df3 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 12 Nov 2023 15:00:06 +0200 Subject: [PATCH 01/17] s1 --- pr_agent/algo/utils.py | 11 ++- pr_agent/settings/pr_description_prompts.toml | 80 ++++++++----------- pr_agent/tools/pr_description.py | 37 +++++---- 3 files changed, 60 insertions(+), 68 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 58f0ccb91..6a7d62cff 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -336,11 +336,14 @@ def set_custom_labels(variables): labels_list = f" - {labels_list}" if labels_list else "" variables["custom_labels"] = labels_list return - final_labels = "" + #final_labels = "" + #for k, v in labels.items(): + # final_labels += f" - {k} ({v['description']})\n" + #variables["custom_labels"] = final_labels + #variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" + variables["custom_labels_class"] = "class Labels(Enum):" for k, v in labels.items(): - final_labels += f" - {k} ({v['description']})\n" - variables["custom_labels"] = final_labels - variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" + variables["custom_labels_class"] += f"\n {k.lower().replace(' ','_')} = '{k}' # {v['description']}" def get_user_labels(current_labels: List[str] = None): diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index cfb429484..9b2cf861f 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -13,67 +13,57 @@ Extra instructions from the user: ' {% endif %} -You must use the following YAML schema to format your answer: -```yaml -PR Title: - type: string - description: an informative title for the PR, describing its main theme -PR Type: - type: string - enum: - - Bug fix - - Tests - - Refactoring - - Enhancement - - Documentation - - Other + +The output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions: +' +Class PRType(Enum): + bug_fix = "Bug fix" + tests = "Tests" + refactoring = "Refactoring" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" + {%- if enable_custom_labels %} -PR Labels: - type: array - description: One or more labels that describe the PR labels. Don't output the description in the parentheses. - items: - type: string - enum: -{{ custom_labels }} + +{{ custom_labels_class }} {%- endif %} -PR Description: - type: string - description: an informative and concise description of the PR. - {%- if use_bullet_points %} Use bullet points. {% endif %} -PR Main Files Walkthrough: - type: array - maxItems: 10 - description: |- - a walkthrough of the PR changes. Review main files, and shortly describe the changes in each file (up to 10 most important files). - items: - filename: - type: string - description: the relevant file full path - changes in file: - type: string - description: minimal and concise description of the changes in the relevant file -``` + +class FileWalkthrough(BaseModel): + filename: str = Field(description="the relevant file full path") + changes_in_file: str = Field(description="minimal and concise description of the changes in the relevant file") + +Class PRDescription(BaseModel): + title: str = Field(description="an informative title for the PR, describing its main theme") + type: List[PRType] = Field(description="one or more types that describe the PR type") + description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}") +{%- if enable_custom_labels %} + labels: List[Labels] = Field(description="one or more custom labels that describe the PR") +{%- endif %} + main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) +' Example output: ```yaml -PR Title: |- - ... -PR Type: +title: |- ... +type: +- ... +- ... {%- if enable_custom_labels %} -PR Labels: +labels: - ... - ... {%- endif %} -PR Description: |- +description: |- ... -PR Main Files Walkthrough: +main_files_walkthrough: - ... - ... ``` -Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. +Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|-') """ user="""PR Info: diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 611523eaf..f17570332 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -44,8 +44,7 @@ def __init__(self, pr_url: str, args: list = None): "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), "enable_custom_labels": get_settings().config.enable_custom_labels, - "custom_labels": "", - "custom_labels_examples": "", + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' } self.user_description = self.git_provider.get_user_description() @@ -172,16 +171,16 @@ def _prepare_labels(self) -> List[str]: pr_types = [] # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Labels' in self.data: - if type(self.data['PR Labels']) == list: - pr_types = self.data['PR Labels'] - elif type(self.data['PR Labels']) == str: - pr_types = self.data['PR Labels'].split(',') - elif 'PR Type' in self.data: - if type(self.data['PR Type']) == list: - pr_types = self.data['PR Type'] - elif type(self.data['PR Type']) == str: - pr_types = self.data['PR Type'].split(',') + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') + elif 'type' in self.data: + if type(self.data['type']) == list: + pr_types = self.data['type'] + elif type(self.data['type']) == str: + pr_types = self.data['type'].split(',') return pr_types def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]: @@ -193,12 +192,12 @@ def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]: else: ai_header = "" - ai_type = self.data.get('PR Type') + ai_type = self.data.get('type') if ai_type and not re.search(r'', body): pr_type = f"{ai_header}{ai_type}" body = body.replace('pr_agent:type', pr_type) - ai_summary = self.data.get('PR Description') + ai_summary = self.data.get('description') if ai_summary and not re.search(r'', body): summary = f"{ai_header}{ai_summary}" body = body.replace('pr_agent:summary', summary) @@ -228,16 +227,16 @@ def _prepare_pr_answer(self) -> Tuple[str, str]: # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format markdown_text = "" # Don't display 'PR Labels' - if 'PR Labels' in self.data and self.git_provider.is_supported("get_labels"): - self.data.pop('PR Labels') + if 'labels' in self.data and self.git_provider.is_supported("get_labels"): + self.data.pop('labels') if not get_settings().pr_description.enable_pr_type: - self.data.pop('PR Type') + self.data.pop('type') for key, value in self.data.items(): markdown_text += f"## {key}\n\n" markdown_text += f"{value}\n\n" # Remove the 'PR Title' key from the dictionary - ai_title = self.data.pop('PR Title', self.vars["title"]) + ai_title = self.data.pop('title', self.vars["title"]) if get_settings().pr_description.keep_original_user_title: # Assign the original PR title to the 'title' variable title = self.vars["title"] @@ -256,7 +255,7 @@ def _prepare_pr_answer(self) -> Tuple[str, str]: pr_body += "
files:\n\n" for file in value: filename = file['filename'].replace("'", "`") - description = file['changes in file'] + description = file['changes_in_file'] pr_body += f'- `{filename}`: {description}\n' if self.git_provider.is_supported("gfm_markdown"): pr_body +="
\n" From e9891fc5301bbf94877ceccbac41ff8195e087ad Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 12 Nov 2023 16:37:53 +0200 Subject: [PATCH 02/17] s1 --- pr_agent/algo/utils.py | 2 +- pr_agent/cli.py | 16 ++++--- pr_agent/settings/pr_custom_labels.toml | 45 +++++++++---------- pr_agent/settings/pr_description_prompts.toml | 2 +- pr_agent/tools/pr_description.py | 2 +- pr_agent/tools/pr_generate_labels.py | 15 +++---- 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 6a7d62cff..b4ed2b396 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -341,7 +341,7 @@ def set_custom_labels(variables): # final_labels += f" - {k} ({v['description']})\n" #variables["custom_labels"] = final_labels #variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" - variables["custom_labels_class"] = "class Labels(Enum):" + variables["custom_labels_class"] = "class Label(Enum):" for k, v in labels.items(): variables["custom_labels_class"] += f"\n {k.lower().replace(' ','_')} = '{k}' # {v['description']}" diff --git a/pr_agent/cli.py b/pr_agent/cli.py index 6728db9f1..60948db5e 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -21,18 +21,22 @@ def run(inargs=None): - cli.py --issue_url=... similar_issue Supported commands: --review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. +- review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. --ask / ask_question [question] - Ask a question about the PR. +- ask / ask_question [question] - Ask a question about the PR. --describe / describe_pr - Modify the PR title and description based on the PR's contents. +- describe / describe_pr - Modify the PR title and description based on the PR's contents. --improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. +- improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback --reflect - Ask the PR author questions about the PR. +- reflect - Ask the PR author questions about the PR. --update_changelog - Update the changelog based on the PR's contents. +- update_changelog - Update the changelog based on the PR's contents. + +- add_docs + +- generate_labels Configuration: diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index f295798a4..bcc1c1ffb 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -11,38 +11,35 @@ Extra instructions from the user: ' {% endif %} -You must use the following YAML schema to format your answer: -```yaml -PR Type: - type: array -{%- if enable_custom_labels %} - description: One or more labels that describe the PR type. Don't output the description in the parentheses. -{%- endif %} - items: - type: string - enum: +The output must be a YAML object equivalent to type $Labels, according to the following Pydantic definitions: +' {%- if enable_custom_labels %} -{{ custom_labels }} + +{{ custom_labels_class }} + {%- else %} - - Bug fix - - Tests - - Refactoring - - Enhancement - - Documentation - - Other +class Label(Enum): + bug_fix = "Bug fix" + tests = "Tests" + refactoring = "Refactoring" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" {%- endif %} +class Labels(BaseModel): + labels: List[Label] +' + + Example output: ```yaml -PR Type: -{%- if enable_custom_labels %} -{{ custom_labels_examples }} -{%- else %} - - Bug fix -{%- endif %} +labels: +- ... +- ... ``` -Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. +Answer should be a valid YAML, and nothing else. """ user="""PR Info: diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 9b2cf861f..bace848e7 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -38,7 +38,7 @@ Class PRDescription(BaseModel): type: List[PRType] = Field(description="one or more types that describe the PR type") description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}") {%- if enable_custom_labels %} - labels: List[Labels] = Field(description="one or more custom labels that describe the PR") + labels: List[Label] = Field(description="one or more custom labels that describe the PR") {%- endif %} main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) ' diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index f17570332..c0eb66068 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -44,7 +44,7 @@ def __init__(self, pr_url: str, args: list = None): "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), "enable_custom_labels": get_settings().config.enable_custom_labels, - "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function } self.user_description = self.git_provider.get_user_description() diff --git a/pr_agent/tools/pr_generate_labels.py b/pr_agent/tools/pr_generate_labels.py index e413e96fd..45c504b3d 100644 --- a/pr_agent/tools/pr_generate_labels.py +++ b/pr_agent/tools/pr_generate_labels.py @@ -43,9 +43,8 @@ def __init__(self, pr_url: str, args: list = None): "use_bullet_points": get_settings().pr_description.use_bullet_points, "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), - "custom_labels": "", - "custom_labels_examples": "", "enable_custom_labels": get_settings().config.enable_custom_labels, + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function } # Initialize the token handler @@ -159,11 +158,11 @@ def _prepare_data(self): def _prepare_labels(self) -> List[str]: pr_types = [] - # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Type' in self.data: - if type(self.data['PR Type']) == list: - pr_types = self.data['PR Type'] - elif type(self.data['PR Type']) == str: - pr_types = self.data['PR Type'].split(',') + # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types' + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') return pr_types From 0e932af2e3202109c5b810efa197e67408e95c92 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 13 Nov 2023 12:01:08 +0200 Subject: [PATCH 03/17] multi line --- pr_agent/algo/utils.py | 4 ++-- pr_agent/settings/pr_description_prompts.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index b4ed2b396..8d74f48d7 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -343,8 +343,8 @@ def set_custom_labels(variables): #variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" variables["custom_labels_class"] = "class Label(Enum):" for k, v in labels.items(): - variables["custom_labels_class"] += f"\n {k.lower().replace(' ','_')} = '{k}' # {v['description']}" - + description = v['description'].strip('\n').replace('\n', '\\n') + variables["custom_labels_class"] += f"\n {k.lower().replace(' ', '_')} = '{k}' # {description}" def get_user_labels(current_labels: List[str] = None): """ diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index bace848e7..b9b8a8cd7 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -59,8 +59,8 @@ labels: description: |- ... main_files_walkthrough: - - ... - - ... +- ... +- ... ``` Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|-') From f5c6dd55b883261cffb1ecc89e98431cc11d91b5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 13 Nov 2023 12:04:58 +0200 Subject: [PATCH 04/17] triple quote --- pr_agent/settings/custom_labels.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pr_agent/settings/custom_labels.toml b/pr_agent/settings/custom_labels.toml index 43e14b0e3..9c751d0eb 100644 --- a/pr_agent/settings/custom_labels.toml +++ b/pr_agent/settings/custom_labels.toml @@ -3,16 +3,16 @@ enable_custom_labels=false ## template for custom labels #[custom_labels."Bug fix"] -#description = "Fixes a bug in the code" +#description = """Fixes a bug in the code""" #[custom_labels."Tests"] -#description = "Adds or modifies tests" +#description = """Adds or modifies tests""" #[custom_labels."Bug fix with tests"] -#description = "Fixes a bug in the code and adds or modifies tests" +#description = """Fixes a bug in the code and adds or modifies tests""" #[custom_labels."Refactoring"] -#description = "Code refactoring without changing functionality" +#description = """Code refactoring without changing functionality""" #[custom_labels."Enhancement"] -#description = "Adds new features or functionality" +#description = """Adds new features or functionality""" #[custom_labels."Documentation"] -#description = "Adds or modifies documentation" +#description = """Adds or modifies documentation""" #[custom_labels."Other"] -#description = "Other changes that do not fit in any of the above categories" \ No newline at end of file +#description = """Other changes that do not fit in any of the above categories""" \ No newline at end of file From a8dddd199991c30f94715c0b52f9e8b897e1ea40 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 13 Nov 2023 12:14:18 +0200 Subject: [PATCH 05/17] prompt --- pr_agent/settings/pr_custom_labels.toml | 12 +++++++----- pr_agent/settings/pr_description_prompts.toml | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index bcc1c1ffb..821ec8d25 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -1,8 +1,10 @@ [pr_custom_labels_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to label the type of the PR content. -- Make sure not to focus the new PR code (the '+' lines). -- If needed, each YAML output should be in block scalar format ('|-') +system="""You are CodiumAI-PR-Reviewer, a language model designed to review git Pull Requests (PR). +Your task is to provide labels that describe the PR content. +{%- if enable_custom_labels %} +Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR. +{%- endif %} + {%- if extra_instructions %} Extra instructions from the user: @@ -28,7 +30,7 @@ class Label(Enum): {%- endif %} class Labels(BaseModel): - labels: List[Label] + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR") ' diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index b9b8a8cd7..d59e585d4 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -38,7 +38,7 @@ Class PRDescription(BaseModel): type: List[PRType] = Field(description="one or more types that describe the PR type") description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}") {%- if enable_custom_labels %} - labels: List[Label] = Field(description="one or more custom labels that describe the PR") + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR") {%- endif %} main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) ' From 0df054295828c8b1395a1b084a89b99688200d0b Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 13 Nov 2023 15:55:35 +0200 Subject: [PATCH 06/17] prompt --- pr_agent/algo/utils.py | 2 +- pr_agent/settings/pr_custom_labels.toml | 4 ++-- pr_agent/settings/pr_description_prompts.toml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 8d74f48d7..bd91477c3 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -341,7 +341,7 @@ def set_custom_labels(variables): # final_labels += f" - {k} ({v['description']})\n" #variables["custom_labels"] = final_labels #variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" - variables["custom_labels_class"] = "class Label(Enum):" + variables["custom_labels_class"] = "class Label(str, Enum):" for k, v in labels.items(): description = v['description'].strip('\n').replace('\n', '\\n') variables["custom_labels_class"] += f"\n {k.lower().replace(' ', '_')} = '{k}' # {description}" diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index 821ec8d25..f61a208c5 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -20,7 +20,7 @@ The output must be a YAML object equivalent to type $Labels, according to the fo {{ custom_labels_class }} {%- else %} -class Label(Enum): +class Label(str, Enum): bug_fix = "Bug fix" tests = "Tests" refactoring = "Refactoring" @@ -30,7 +30,7 @@ class Label(Enum): {%- endif %} class Labels(BaseModel): - labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR") + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") ' diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index d59e585d4..514a19916 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -16,7 +16,7 @@ Extra instructions from the user: The output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions: ' -Class PRType(Enum): +class PRType(str, Enum): bug_fix = "Bug fix" tests = "Tests" refactoring = "Refactoring" @@ -35,10 +35,10 @@ class FileWalkthrough(BaseModel): Class PRDescription(BaseModel): title: str = Field(description="an informative title for the PR, describing its main theme") - type: List[PRType] = Field(description="one or more types that describe the PR type") + type: List[PRType] = Field(description="one or more types that describe the PR type. . Return the label value, not the name.") description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}") {%- if enable_custom_labels %} - labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR") + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") {%- endif %} main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) ' From b3238e90f2d205c5920ea60ece5901e216f4d4eb Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 08:10:01 +0200 Subject: [PATCH 07/17] s --- pr_agent/git_providers/git_provider.py | 54 ++++++++++++------- pr_agent/settings/pr_add_docs.toml | 2 +- pr_agent/settings/pr_description_prompts.toml | 9 ++-- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 05122f9c3..d929ed379 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass +from pr_agent.algo.language_handler import language_extension_map # enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED) from enum import Enum @@ -173,26 +174,39 @@ def get_main_pr_language(languages, files) -> str: extension_list.append(file.filename.rsplit('.')[-1]) # get the most common extension - most_common_extension = max(set(extension_list), key=extension_list.count) - - # look for a match. TBD: add more languages, do this systematically - if most_common_extension == 'py' and top_language == 'python' or \ - most_common_extension == 'js' and top_language == 'javascript' or \ - most_common_extension == 'ts' and top_language == 'typescript' or \ - most_common_extension == 'go' and top_language == 'go' or \ - most_common_extension == 'java' and top_language == 'java' or \ - most_common_extension == 'c' and top_language == 'c' or \ - most_common_extension == 'cpp' and top_language == 'c++' or \ - most_common_extension == 'cs' and top_language == 'c#' or \ - most_common_extension == 'swift' and top_language == 'swift' or \ - most_common_extension == 'php' and top_language == 'php' or \ - most_common_extension == 'rb' and top_language == 'ruby' or \ - most_common_extension == 'rs' and top_language == 'rust' or \ - most_common_extension == 'scala' and top_language == 'scala' or \ - most_common_extension == 'kt' and top_language == 'kotlin' or \ - most_common_extension == 'pl' and top_language == 'perl' or \ - most_common_extension == top_language: - main_language_str = top_language + most_common_extension = '.' + max(set(extension_list), key=extension_list.count) + try: + if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]: + main_language_str = top_language + else: + for language, extensions in language_extension_map.items(): + if most_common_extension in extensions: + main_language_str = language + break + except Exception as e: + get_logger().exception(f"Failed to get main language: {e}") + pass + + ## old approach: + # most_common_extension = max(set(extension_list), key=extension_list.count) + # if most_common_extension == 'py' and top_language == 'python' or \ + # most_common_extension == 'js' and top_language == 'javascript' or \ + # most_common_extension == 'ts' and top_language == 'typescript' or \ + # most_common_extension == 'tsx' and top_language == 'typescript' or \ + # most_common_extension == 'go' and top_language == 'go' or \ + # most_common_extension == 'java' and top_language == 'java' or \ + # most_common_extension == 'c' and top_language == 'c' or \ + # most_common_extension == 'cpp' and top_language == 'c++' or \ + # most_common_extension == 'cs' and top_language == 'c#' or \ + # most_common_extension == 'swift' and top_language == 'swift' or \ + # most_common_extension == 'php' and top_language == 'php' or \ + # most_common_extension == 'rb' and top_language == 'ruby' or \ + # most_common_extension == 'rs' and top_language == 'rust' or \ + # most_common_extension == 'scala' and top_language == 'scala' or \ + # most_common_extension == 'kt' and top_language == 'kotlin' or \ + # most_common_extension == 'pl' and top_language == 'perl' or \ + # most_common_extension == top_language: + # main_language_str = top_language except Exception as e: get_logger().exception(e) diff --git a/pr_agent/settings/pr_add_docs.toml b/pr_agent/settings/pr_add_docs.toml index 31b7195c6..ffb7d01d6 100644 --- a/pr_agent/settings/pr_add_docs.toml +++ b/pr_agent/settings/pr_add_docs.toml @@ -103,7 +103,7 @@ Description: '{{description}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{language}}' {%- endif %} diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 2a51b324b..f1633e467 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -2,8 +2,8 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. Your task is to provide full description of a Pull Request (PR) content. - Make sure to focus on the new PR code (the '+' lines). -- Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference. -- Emphasize first the most important changes, and then the less important ones. +- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. +- Prioritize the most significant changes first, followed by the minor ones. - If needed, each YAML output should be in block scalar format ('|-') {%- if extra_instructions %} @@ -68,12 +68,15 @@ Answer should be a valid YAML, and nothing else. Each YAML output MUST be after """ user="""PR Info: + Previous title: '{{title}}' + Previous description: '{{description}}' + Branch: '{{branch}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} From d7df4287f8612e21a3ac63d1dbd4af21defc0ef8 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 08:17:16 +0200 Subject: [PATCH 08/17] feat: Update PR prompts in toml files to enhance readability and consistency --- pr_agent/settings/pr_code_suggestions_prompts.toml | 2 +- pr_agent/settings/pr_custom_labels.toml | 6 ++++-- pr_agent/settings/pr_description_prompts.toml | 4 +++- .../settings/pr_information_from_user_prompts.toml | 8 +++++++- pr_agent/settings/pr_questions_prompts.toml | 11 +++++++++-- pr_agent/settings/pr_reviewer_prompts.toml | 9 ++++++++- pr_agent/settings/pr_update_changelog_prompts.toml | 11 +++++++++-- 7 files changed, 41 insertions(+), 10 deletions(-) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 42ec74413..455d66124 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -120,7 +120,7 @@ Description: '{{description}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index 976258dc3..dada15180 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -51,12 +51,14 @@ Previous description: '{{description}}' Branch: '{{branch}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index f1633e467..931f92c3a 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -81,7 +81,9 @@ Main PR language: '{{ language }}' {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_information_from_user_prompts.toml b/pr_agent/settings/pr_information_from_user_prompts.toml index 8d628f7a6..7b1b6fdbc 100644 --- a/pr_agent/settings/pr_information_from_user_prompts.toml +++ b/pr_agent/settings/pr_information_from_user_prompts.toml @@ -16,15 +16,21 @@ Questions to better understand the PR: user="""PR Info: Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: +' {{commit_messages_str}} +' {%- endif %} diff --git a/pr_agent/settings/pr_questions_prompts.toml b/pr_agent/settings/pr_questions_prompts.toml index e306684d2..dc7a313d8 100644 --- a/pr_agent/settings/pr_questions_prompts.toml +++ b/pr_agent/settings/pr_questions_prompts.toml @@ -7,16 +7,23 @@ Make sure not to repeat modifications already implemented in the new PR code (th """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index b75c296ab..cece22f96 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -179,16 +179,23 @@ Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'desc """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: +' {{commit_messages_str}} +' {%- endif %} {%- if question_str %} diff --git a/pr_agent/settings/pr_update_changelog_prompts.toml b/pr_agent/settings/pr_update_changelog_prompts.toml index e9133e34f..9d00f2515 100644 --- a/pr_agent/settings/pr_update_changelog_prompts.toml +++ b/pr_agent/settings/pr_update_changelog_prompts.toml @@ -15,16 +15,23 @@ Extra instructions from the user: """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} From 9465b7b577e90f93210205449a93aaf83b15da8e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 08:29:47 +0200 Subject: [PATCH 09/17] refactor: Move clip_tokens function from pr_processing to utils module, and add tests --- pr_agent/algo/pr_processing.py | 31 +----------------- pr_agent/algo/utils.py | 32 +++++++++++++++++++ .../git_providers/azuredevops_provider.py | 3 +- pr_agent/git_providers/git_provider.py | 2 +- pr_agent/git_providers/github_provider.py | 4 +-- pr_agent/git_providers/gitlab_provider.py | 4 +-- tests/unittest/test_clip_tokens.py | 19 +++++++++++ 7 files changed, 58 insertions(+), 37 deletions(-) create mode 100644 tests/unittest/test_clip_tokens.py diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 6063deced..4c1352f0e 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,7 +10,7 @@ from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages from pr_agent.algo.file_filter import filter_ignored -from pr_agent.algo.token_handler import TokenHandler, get_token_encoder +from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import get_max_tokens from pr_agent.config_loader import get_settings from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE @@ -326,35 +326,6 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], return position, absolute_position -def clip_tokens(text: str, max_tokens: int) -> str: - """ - Clip the number of tokens in a string to a maximum number of tokens. - - Args: - text (str): The string to clip. - max_tokens (int): The maximum number of tokens allowed in the string. - - Returns: - str: The clipped string. - """ - if not text: - return text - - try: - encoder = get_token_encoder() - num_input_tokens = len(encoder.encode(text)) - if num_input_tokens <= max_tokens: - return text - num_chars = len(text) - chars_per_token = num_chars / num_input_tokens - num_output_chars = int(chars_per_token * max_tokens) - clipped_text = text[:num_output_chars] - return clipped_text - except Exception as e: - get_logger().warning(f"Failed to clip tokens: {e}") - return text - - def get_pr_multi_diffs(git_provider: GitProvider, token_handler: TokenHandler, model: str, diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index b9aaee945..730740981 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -11,6 +11,7 @@ from starlette_context import context from pr_agent.algo import MAX_TOKENS +from pr_agent.algo.token_handler import get_token_encoder from pr_agent.config_loader import get_settings, global_settings from pr_agent.log import get_logger @@ -378,3 +379,34 @@ def get_max_tokens(model): max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model) # get_logger().debug(f"limiting max tokens to {max_tokens_model}") return max_tokens_model + + +def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str: + """ + Clip the number of tokens in a string to a maximum number of tokens. + + Args: + text (str): The string to clip. + max_tokens (int): The maximum number of tokens allowed in the string. + add_three_dots (bool, optional): A boolean indicating whether to add three dots at the end of the clipped + Returns: + str: The clipped string. + """ + if not text: + return text + + try: + encoder = get_token_encoder() + num_input_tokens = len(encoder.encode(text)) + if num_input_tokens <= max_tokens: + return text + num_chars = len(text) + chars_per_token = num_chars / num_input_tokens + num_output_chars = int(chars_per_token * max_tokens) + clipped_text = text[:num_output_chars] + if add_three_dots: + clipped_text += "...(truncated)" + return clipped_text + except Exception as e: + get_logger().warning(f"Failed to clip tokens: {e}") + return text diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 6a404532a..ca11b9d86 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -14,9 +14,8 @@ except ImportError: AZURE_DEVOPS_AVAILABLE = False -from ..algo.pr_processing import clip_tokens from ..config_loader import get_settings -from ..algo.utils import load_large_diff +from ..algo.utils import load_large_diff, clip_tokens from ..algo.language_handler import is_valid_file from .git_provider import EDIT_TYPE, FilePatchInfo diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index d929ed379..d0012b5e3 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -63,7 +63,7 @@ def get_pr_description_full(self) -> str: def get_pr_description(self, *, full: bool = True) -> str: from pr_agent.config_loader import get_settings - from pr_agent.algo.pr_processing import clip_tokens + from pr_agent.algo.utils import clip_tokens max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) description = self.get_pr_description_full() if full else self.get_user_description() if max_tokens_description: diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 634b86947..46afbad6c 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -8,8 +8,8 @@ from starlette_context import context from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff, clip_tokens from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 078ca9dd8..2eb00ce1e 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -7,8 +7,8 @@ from gitlab import GitlabGetError from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff, clip_tokens from ..config_loader import get_settings from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider from ..log import get_logger diff --git a/tests/unittest/test_clip_tokens.py b/tests/unittest/test_clip_tokens.py new file mode 100644 index 000000000..cc52ab7ef --- /dev/null +++ b/tests/unittest/test_clip_tokens.py @@ -0,0 +1,19 @@ + +# Generated by CodiumAI + +import pytest + +from pr_agent.algo.utils import clip_tokens + + +class TestClipTokens: + def test_clip(self): + text = "line1\nline2\nline3\nline4\nline5\nline6" + max_tokens = 25 + result = clip_tokens(text, max_tokens) + assert result == text + + max_tokens = 10 + result = clip_tokens(text, max_tokens) + expected_results = 'line1\nline2\nline3\nli...(truncated)' + assert result == expected_results From 8db2e3b2a0f76f654715c8c4d64c25fa335c9a8c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 08:42:04 +0200 Subject: [PATCH 10/17] feat: Enhance readability in toml files and add verbosity level logging in pr_generate_labels.py --- pr_agent/settings/pr_custom_labels.toml | 10 +++++++--- pr_agent/settings/pr_description_prompts.toml | 11 ++++++++--- pr_agent/settings/pr_reviewer_prompts.toml | 2 ++ pr_agent/tools/pr_generate_labels.py | 3 +++ 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index dada15180..01efa5a50 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -1,5 +1,5 @@ [pr_custom_labels_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git Pull Requests (PR). +system="""You are CodiumAI-PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide labels that describe the PR content. {%- if enable_custom_labels %} Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR. @@ -46,9 +46,13 @@ Answer should be a valid YAML, and nothing else. """ user="""PR Info: + Previous title: '{{title}}' -Previous description: '{{description}}' -Branch: '{{branch}}' + +Branch: '{{ branch }}' + +Description: '{{ description }}' + {%- if language %} Main PR language: '{{ language }}' diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 931f92c3a..96062ab2c 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -1,6 +1,6 @@ [pr_description_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to provide full description of a Pull Request (PR) content. +system="""You are CodiumAI-PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to provide full description of a Pr content. - Make sure to focus on the new PR code (the '+' lines). - Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. - Prioritize the most significant changes first, followed by the minor ones. @@ -71,7 +71,10 @@ user="""PR Info: Previous title: '{{title}}' -Previous description: '{{description}}' +{%- if description %} + +Previous description: '{{ description }}' +{%- endif %} Branch: '{{branch}}' {%- if language %} @@ -91,6 +94,8 @@ The PR Git Diff: ``` {{diff}} ``` + + Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. Response (should be a valid YAML, and nothing else): diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index cece22f96..e87220802 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -215,6 +215,8 @@ The PR Git Diff: ``` {{diff}} ``` + + Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions. Focus on the '+' lines. Response (should be a valid YAML, and nothing else): diff --git a/pr_agent/tools/pr_generate_labels.py b/pr_agent/tools/pr_generate_labels.py index 45c504b3d..6ea322a4e 100644 --- a/pr_agent/tools/pr_generate_labels.py +++ b/pr_agent/tools/pr_generate_labels.py @@ -147,6 +147,9 @@ async def _get_prediction(self, model: str) -> str: user=user_prompt ) + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"\nAI response:\n{response}") + return response def _prepare_data(self): From d8ae32fc55817a98ca35789056d1474c22b78229 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 08:52:55 +0200 Subject: [PATCH 11/17] language_extension_map --- pr_agent/algo/language_handler.py | 5 +++-- pr_agent/git_providers/codecommit_provider.py | 4 +++- pr_agent/git_providers/git_provider.py | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index 66e850256..b4c02bee4 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -3,8 +3,7 @@ from pr_agent.config_loader import get_settings -language_extension_map_org = get_settings().language_extension_map_org -language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py # noqa: E501 bad_extensions = get_settings().bad_extensions.default @@ -29,6 +28,8 @@ def sort_files_by_main_languages(languages: Dict, files: list): # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True) # get all extensions for the languages main_extensions = [] + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} for language in languages_sorted_list: if language.lower() in language_extension_map: main_extensions.append(language_extension_map[language.lower()]) diff --git a/pr_agent/git_providers/codecommit_provider.py b/pr_agent/git_providers/codecommit_provider.py index a48368493..399f0a944 100644 --- a/pr_agent/git_providers/codecommit_provider.py +++ b/pr_agent/git_providers/codecommit_provider.py @@ -6,9 +6,9 @@ from pr_agent.git_providers.codecommit_client import CodeCommitClient -from ..algo.language_handler import is_valid_file, language_extension_map from ..algo.utils import load_large_diff from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from ..config_loader import get_settings from ..log import get_logger @@ -269,6 +269,8 @@ def get_languages(self): # where each dictionary item is a language name. # We build that language->extension dictionary here in main_extensions_flat. main_extensions_flat = {} + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} for language, extensions in language_extension_map.items(): for ext in extensions: main_extensions_flat[ext] = language diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index d0012b5e3..a341f43a2 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -1,11 +1,11 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from pr_agent.algo.language_handler import language_extension_map # enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED) from enum import Enum from typing import Optional +from pr_agent.config_loader import get_settings from pr_agent.log import get_logger @@ -176,6 +176,9 @@ def get_main_pr_language(languages, files) -> str: # get the most common extension most_common_extension = '.' + max(set(extension_list), key=extension_list.count) try: + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]: main_language_str = top_language else: From 0326b7e4acde31a9dc076622cc6559ffdd257014 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 09:05:45 +0200 Subject: [PATCH 12/17] refactor: Update PR prompts in toml files for clarity and consistency --- pr_agent/settings/pr_add_docs.toml | 2 +- pr_agent/settings/pr_code_suggestions_prompts.toml | 4 ++-- pr_agent/settings/pr_description_prompts.toml | 6 +++--- pr_agent/settings/pr_reviewer_prompts.toml | 2 +- pr_agent/settings/pr_sort_code_suggestions_prompts.toml | 6 +++--- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pr_agent/settings/pr_add_docs.toml b/pr_agent/settings/pr_add_docs.toml index ffb7d01d6..fbf4b4751 100644 --- a/pr_agent/settings/pr_add_docs.toml +++ b/pr_agent/settings/pr_add_docs.toml @@ -1,6 +1,6 @@ [pr_add_docs_prompt] system="""You are a language model called PR-Code-Documentation Agent, that specializes in generating documentation for code. -Your task is to generate meaningfull {{ docs_for_language }} to a PR (the '+' lines). +Your task is to generate meaningfull {{ docs_for_language }} to a PR (lines starting with '+'). Example for a PR Diff input: ' diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 455d66124..db5f95fea 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,6 +1,6 @@ [pr_code_suggestions_prompt] -system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR). -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR (the '+' lines in the diff). +system="""You are a language model called PR-Reviewer, that specializes in suggesting code improvements for a Pull Request (PR). +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+'). Example for a PR Diff input: ' diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 96062ab2c..e675575bf 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -1,9 +1,9 @@ [pr_description_prompt] system="""You are CodiumAI-PR-Reviewer, a language model designed to review a git Pull Request (PR). -Your task is to provide full description of a Pr content. -- Make sure to focus on the new PR code (the '+' lines). +Your task is to provide full description the PR content. +- Make sure to focus on the new PR code (lines starting with '+'). - Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. -- Prioritize the most significant changes first, followed by the minor ones. +- Prioritize the most significant PR changes first, followed by the minor ones. - If needed, each YAML output should be in block scalar format ('|-') {%- if extra_instructions %} diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index e87220802..d631d3d1a 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -1,5 +1,5 @@ [pr_review_prompt] -system="""You are PR-Reviewer, a language model designed to review git pull requests. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide constructive and concise feedback for the PR, and also provide meaningful code suggestions. Example PR Diff input: diff --git a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml index 16b6e8618..f4a3f5bff 100644 --- a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml @@ -2,10 +2,10 @@ system=""" """ -user="""You are given a list of code suggestions to improve a PR: - +user="""You are given a list of code suggestions to improve a git Pull Request (PR): +' {{ suggestion_str|trim }} - +' Your task is to sort the code suggestions by their order of importance, and return a list with sorting order. The sorting order is a list of pairs, where each pair contains the index of the suggestion in the original list. From 1a28c7778325bf2c996a6da6d6c02ef1025f8651 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 09:08:33 +0200 Subject: [PATCH 13/17] Previous description --- pr_agent/settings/pr_description_prompts.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index e675575bf..500f31875 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -73,7 +73,10 @@ Previous title: '{{title}}' {%- if description %} -Previous description: '{{ description }}' +Previous description: +' +{{ description }} +' {%- endif %} Branch: '{{branch}}' From 690c1136039d9af9987f3a8692f2e55ce397ed55 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 09:17:42 +0200 Subject: [PATCH 14/17] refactor: Improve clarity and consistency in pr_code_suggestions_prompts.toml and pr_reviewer_prompts.toml files --- .../settings/pr_code_suggestions_prompts.toml | 2 +- pr_agent/settings/pr_reviewer_prompts.toml | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index db5f95fea..4b7522721 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,5 +1,5 @@ [pr_code_suggestions_prompt] -system="""You are a language model called PR-Reviewer, that specializes in suggesting code improvements for a Pull Request (PR). +system="""You are PR-Reviewer, a language model that specializes in suggesting code improvements for a Pull Request (PR). Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+'). Example for a PR Diff input: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index d631d3d1a..11170518b 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -1,6 +1,7 @@ [pr_review_prompt] system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide constructive and concise feedback for the PR, and also provide meaningful code suggestions. +The review should focus on new code added in the PR diff (lines starting with '+') Example PR Diff input: ' @@ -22,14 +23,12 @@ code line that already existed in the file.... ... ' -The review should focus on new code added in the PR (lines starting with '+'), and not on code that already existed in the file (lines starting with '-', or without prefix). - {%- if num_code_suggestions > 0 %} - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. -- Suggestions should focus on improving the new code added in the PR (lines starting with '+') +- Suggestions should focus on improving the new code added in the PR diff (lines starting with '+') {%- endif %} {%- if extra_instructions %} @@ -184,7 +183,13 @@ Title: '{{title}}' Branch: '{{branch}}' -Description: '{{description}}' +{%- if description %} + +Description: +' +{{description}} +' +{%- endif %} {%- if language %} @@ -217,8 +222,6 @@ The PR Git Diff: ``` -Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions. Focus on the '+' lines. - Response (should be a valid YAML, and nothing else): ```yaml """ From 14d4ca8c7427d3f145c73d196fe29fde939dcb96 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 09:22:19 +0200 Subject: [PATCH 15/17] PR --- pr_agent/settings/pr_custom_labels.toml | 2 +- pr_agent/settings/pr_description_prompts.toml | 2 +- pr_agent/settings/pr_information_from_user_prompts.toml | 2 +- pr_agent/settings/pr_questions_prompts.toml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index 01efa5a50..ddcc8cb0d 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -1,5 +1,5 @@ [pr_custom_labels_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review a git Pull Request (PR). +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide labels that describe the PR content. {%- if enable_custom_labels %} Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR. diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 500f31875..9e5000c93 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -1,5 +1,5 @@ [pr_description_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review a git Pull Request (PR). +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide full description the PR content. - Make sure to focus on the new PR code (lines starting with '+'). - Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. diff --git a/pr_agent/settings/pr_information_from_user_prompts.toml b/pr_agent/settings/pr_information_from_user_prompts.toml index 7b1b6fdbc..ca4cbe3cd 100644 --- a/pr_agent/settings/pr_information_from_user_prompts.toml +++ b/pr_agent/settings/pr_information_from_user_prompts.toml @@ -1,5 +1,5 @@ [pr_information_from_user_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author. The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR. You should prefer asking yes\\no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two. diff --git a/pr_agent/settings/pr_questions_prompts.toml b/pr_agent/settings/pr_questions_prompts.toml index dc7a313d8..63569197f 100644 --- a/pr_agent/settings/pr_questions_prompts.toml +++ b/pr_agent/settings/pr_questions_prompts.toml @@ -1,6 +1,6 @@ [pr_questions_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to answer questions about the new PR code (the '+' lines), and provide feedback. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to answer questions about the new PR code (lines starting with '+'), and provide feedback. Be informative, constructive, and give examples. Try to be as specific as possible. Don't avoid answering the questions. You must answer the questions, as best as you can, without adding unrelated content. Make sure not to repeat modifications already implemented in the new PR code (the '+' lines). From aa73eb2841c8fa6cadc40beacc1773fe651277f5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 09:24:33 +0200 Subject: [PATCH 16/17] PR --- pr_agent/settings/pr_description_prompts.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 9e5000c93..9aefe0daf 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -1,6 +1,6 @@ [pr_description_prompt] system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). -Your task is to provide full description the PR content. +Your task is to provide a full description for the PR content. - Make sure to focus on the new PR code (lines starting with '+'). - Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. - Prioritize the most significant PR changes first, followed by the minor ones. From 668041c09fc28ebcfe3782b219900e82b2eef1b2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 26 Nov 2023 09:32:02 +0200 Subject: [PATCH 17/17] Code suggestions guidelines: --- pr_agent/settings/pr_reviewer_prompts.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 11170518b..b3e8f9b4b 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -24,11 +24,13 @@ code line that already existed in the file.... ' {%- if num_code_suggestions > 0 %} + +Code suggestions guidelines: - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. -- Suggestions should focus on improving the new code added in the PR diff (lines starting with '+') +- Suggestions should focus on the new code added in the PR diff (lines starting with '+') {%- endif %} {%- if extra_instructions %}