Skip to content

Commit

Permalink
Merge pull request Codium-ai#546 from Codium-ai/tr/backticks_review
Browse files Browse the repository at this point in the history
Single-label for suggestions
  • Loading branch information
mrT23 committed Dec 24, 2023
2 parents 4b13f91 + eec3550 commit fc37460
Show file tree
Hide file tree
Showing 14 changed files with 151 additions and 145 deletions.
1 change: 1 addition & 0 deletions docs/IMPROVE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Under the section 'pr_code_suggestions', the [configuration file](./../pr_agent/
- `num_code_suggestions`: number of code suggestions provided by the 'improve' tool. Default is 4.
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
- `rank_suggestions`: if set to true, the tool will rank the suggestions, based on importance. Default is false.
- `include_improved_code`: if set to true, the tool will include an improved code implementation in the suggestion. Default is true.

#### params for '/improve --extended' mode
- `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'improve' tool, per chunk. Default is 8.
Expand Down
84 changes: 46 additions & 38 deletions pr_agent/algo/pr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,65 +264,73 @@ def _get_all_deployments(all_models: List[str]) -> List[str]:

def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
relevant_file: str,
relevant_line_in_file: str) -> Tuple[int, int]:
"""
Find the line number and absolute position of a relevant line in a file.
Args:
diff_files (List[FilePatchInfo]): A list of FilePatchInfo objects representing the patches of files.
relevant_file (str): The name of the file where the relevant line is located.
relevant_line_in_file (str): The content of the relevant line.
Returns:
Tuple[int, int]: A tuple containing the line number and absolute position of the relevant line in the file.
"""
relevant_line_in_file: str,
absolute_position: int = None) -> Tuple[int, int]:
position = -1
absolute_position = -1
if absolute_position is None:
absolute_position = -1
re_hunk_header = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")

for file in diff_files:
if file.filename and (file.filename.strip() == relevant_file):
patch = file.patch
patch_lines = patch.splitlines()

# try to find the line in the patch using difflib, with some margin of error
matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
patch_lines, n=3, cutoff=0.93)
if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
relevant_line_in_file = matches_difflib[0]

delta = 0
start1, size1, start2, size2 = 0, 0, 0, 0
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if relevant_line_in_file in line and line[0] != '-':
position = i
absolute_position = start2 + delta - 1
break

if position == -1 and relevant_line_in_file[0] == '+':
no_plus_line = relevant_line_in_file[1:].lstrip()
if absolute_position != -1: # matching absolute to relative
for i, line in enumerate(patch_lines):
# new hunk
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if no_plus_line in line and line[0] != '-':
# The model might add a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
#
absolute_position_curr = start2 + delta - 1

if absolute_position_curr == absolute_position:
position = i
break
else:
# try to find the line in the patch using difflib, with some margin of error
matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
patch_lines, n=3, cutoff=0.93)
if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
relevant_line_in_file = matches_difflib[0]


for i, line in enumerate(patch_lines):
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if relevant_line_in_file in line and line[0] != '-':
position = i
absolute_position = start2 + delta - 1
break

if position == -1 and relevant_line_in_file[0] == '+':
no_plus_line = relevant_line_in_file[1:].lstrip()
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
delta = 0
match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'):
delta += 1

if no_plus_line in line and line[0] != '-':
# The model might add a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
position = i
absolute_position = start2 + delta - 1
break
return position, absolute_position


Expand Down
3 changes: 0 additions & 3 deletions pr_agent/git_providers/azuredevops_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,6 @@ def remove_initial_comment(self):
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("Azure DevOps provider does not support publishing inline comment yet")

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("Azure DevOps provider does not support creating inline comments yet")

def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError("Azure DevOps provider does not support publishing inline comments yet")

Expand Down
6 changes: 4 additions & 2 deletions pr_agent/git_providers/bitbucket_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ def remove_comment(self, comment):
get_logger().exception(f"Failed to remove comment, error: {e}")

# funtion to create_inline_comment
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(), relevant_file.strip('`'), relevant_line_in_file)
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),
relevant_file.strip('`'),
relevant_line_in_file, absolute_position)
if position == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
Expand Down
7 changes: 5 additions & 2 deletions pr_agent/git_providers/bitbucket_server_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,14 @@ def remove_comment(self, comment):
pass

# funtion to create_inline_comment
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
absolute_position: int = None):

position, absolute_position = find_line_number_of_relevant_line_in_file(
self.get_diff_files(),
relevant_file.strip('`'),
relevant_line_in_file
relevant_line_in_file,
absolute_position
)
if position == -1:
if get_settings().config.verbosity_level >= 2:
Expand Down
3 changes: 0 additions & 3 deletions pr_agent/git_providers/codecommit_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,6 @@ def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html
raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("CodeCommit provider does not support creating inline comments yet")

def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

Expand Down
5 changes: 0 additions & 5 deletions pr_agent/git_providers/gerrit_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,6 @@ def publish_inline_comment(self, body: str, relevant_file: str,
'Publishing inline comments is not implemented for the gerrit '
'provider')

def create_inline_comment(self, body: str, relevant_file: str,
relevant_line_in_file: str):
raise NotImplementedError(
'Creating inline comments is not implemented for the gerrit '
'provider')

def publish_labels(self, labels):
# Not applicable to the local git provider,
Expand Down
6 changes: 3 additions & 3 deletions pr_agent/git_providers/git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ def publish_persistent_comment(self, pr_comment: str, initial_header: str, updat
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
pass

@abstractmethod
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
pass
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
absolute_position: int = None):
raise NotImplementedError("This git provider does not support creating inline comments yet")

@abstractmethod
def publish_inline_comments(self, comments: list[dict]):
Expand Down
8 changes: 6 additions & 2 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,12 @@ def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in
self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])


def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files, relevant_file.strip('`'), relevant_line_in_file)
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
absolute_position: int = None):
position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files,
relevant_file.strip('`'),
relevant_line_in_file,
absolute_position)
if position == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):
raise NotImplementedError("Gitlab provider does not support creating inline comments yet")

def create_inline_comments(self, comments: list[dict]):
Expand Down
3 changes: 0 additions & 3 deletions pr_agent/git_providers/local_git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ def publish_comment(self, pr_comment: str, is_temporary: bool = False):
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')

def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError('Creating inline comments is not implemented for the local git provider')

def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')

Expand Down
1 change: 1 addition & 0 deletions pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ include_generated_by_header=true
[pr_code_suggestions] # /improve #
num_code_suggestions=4
summarize = false
include_improved_code = true
extra_instructions = ""
rank_suggestions = false
# params for '/improve --extended' mode
Expand Down
72 changes: 24 additions & 48 deletions pr_agent/settings/pr_code_suggestions_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ __old hunk__
Specific instructions:
- Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions.
- Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects.
- Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on enhancment, best practice, performance, maintainability, and other aspects.
- Don't suggest to add docstring, type hints, or comments.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
Expand All @@ -49,65 +49,41 @@ Extra instructions from the user:
======
{%- endif %}
The output must be a YAML object equivalent to type PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="the relevant file full path")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
existing_code: str = Field(description="a code snippet, showing the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers")
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers")
label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'bug', 'performance', 'enhancement', 'possible issue', 'best practice', 'maintainability', etc. Other labels are also allowed")
class PRCodeSuggestions(BaseModel):
code_suggestions: List[CodeSuggestion]
=====
You must use the following YAML schema to format your answer:
```yaml
Code suggestions:
type: array
minItems: 1
maxItems: {{ num_code_suggestions }}
uniqueItems: true
items:
relevant file:
type: string
description: the relevant file full path
suggestion content:
type: string
description: |-
a concrete suggestion for meaningfully improving the new PR code.
existing code:
type: string
description: |-
a code snippet showing the relevant code lines from a '__new hunk__' section.
It must be contiguous, correctly formatted and indented, and without line numbers.
relevant lines start:
type: integer
description: |-
The relevant line number from a '__new hunk__' section where the suggestion starts (inclusive).
Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
relevant lines end:
type: integer
description: |-
The relevant line number from a '__new hunk__' section where the suggestion ends (inclusive).
Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
improved code:
type: string
description: |-
a new code snippet that can be used to replace the relevant lines in '__new hunk__' code.
Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.
```
Example output:
```yaml
Code suggestions:
- relevant file: |-
code_suggestions:
- relevant_file: |-
src/file1.py
suggestion content: |-
suggestion_content: |-
Add a docstring to func1()
existing code: |-
existing_code: |-
def func1():
relevant lines start: |-
12
relevant lines end: |-
12
improved code: |-
relevant_lines_start: 12
relevant_lines_end: 12
improved_code: |-
...
label: |-
...
...
```
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
"""

user="""PR Info:
Expand Down
Loading

0 comments on commit fc37460

Please sign in to comment.