Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix go comments bug #392

Merged
merged 9 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/models/edit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ impl Edit {
matched_rule: "Delete Range".to_string(),
}
}

pub(crate) fn is_delete(&self) -> bool {
self.replacement_string.trim().is_empty()
}
}

impl fmt::Display for Edit {
Expand Down
116 changes: 111 additions & 5 deletions src/models/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,46 @@ use std::str::FromStr;

use getset::Getters;
use serde_derive::Deserialize;
use tree_sitter::{Parser, Query};
use tree_sitter::{Node, Parser, Query, Range};
use tree_sitter_traversal::{traverse, Order};

use crate::utilities::parse_toml;

use super::{
default_configs::{default_language, GO, JAVA, KOTLIN, PYTHON, SWIFT, TSX, TYPESCRIPT},
edit::Edit,
outgoing_edges::Edges,
rule::Rules,
scopes::{ScopeConfig, ScopeGenerator},
source_code_unit::SourceCodeUnit,
};

#[derive(Debug, Clone, Getters, PartialEq)]
pub struct PiranhaLanguage {
/// The extension of the language FIXME: - https://github.com/uber/piranha/issues/365
#[get = "pub"]
name: String,
/// the language (enum)
#[get = "pub"]
supported_language: SupportedLanguage,
/// the language (As tree sitter model)
#[get = "pub"]
language: tree_sitter::Language,
/// Built-in rules for the language
#[get = "pub(crate)"]
rules: Option<Rules>,
/// Built-in edges for the language
#[get = "pub(crate)"]
edges: Option<Edges>,
/// Scope configurations for the language
#[get = "pub(crate)"]
scopes: Vec<ScopeGenerator>,
/// The node kinds to be considered when searching for comments
#[get = "pub"]
comment_nodes: Vec<String>,
/// The node kinds to be ignored when searching for comments
#[get = "pub"]
ignore_nodes_for_comments: Vec<String>,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The property i mentioned in the PR description.

}

#[derive(Deserialize, Debug, Clone, PartialEq)]
Expand All @@ -62,10 +75,6 @@ impl Default for SupportedLanguage {
}

impl PiranhaLanguage {
pub fn is_comment(&self, kind: String) -> bool {
self.comment_nodes().contains(&kind)
}

pub fn create_query(&self, query_str: String) -> Query {
let query = Query::new(self.language, query_str.as_str());
if let Ok(q) = query {
Expand Down Expand Up @@ -132,6 +141,7 @@ impl std::str::FromStr for PiranhaLanguage {
.scopes()
.to_vec(),
comment_nodes: vec!["line_comment".to_string(), "block_comment".to_string()],
ignore_nodes_for_comments: vec![],
})
}
GO => {
Expand All @@ -147,6 +157,7 @@ impl std::str::FromStr for PiranhaLanguage {
.scopes()
.to_vec(),
comment_nodes: vec!["comment".to_string()],
ignore_nodes_for_comments: vec!["block".to_string(), "statement_list".to_string()],
})
}
KOTLIN => {
Expand All @@ -162,6 +173,7 @@ impl std::str::FromStr for PiranhaLanguage {
.scopes()
.to_vec(),
comment_nodes: vec!["comment".to_string()],
ignore_nodes_for_comments: vec![],
})
}
PYTHON => Ok(PiranhaLanguage {
Expand All @@ -172,6 +184,7 @@ impl std::str::FromStr for PiranhaLanguage {
edges: None,
scopes: vec![],
comment_nodes: vec![],
ignore_nodes_for_comments: vec![],
}),
SWIFT => {
let rules: Rules = parse_toml(include_str!("../cleanup_rules/swift/rules.toml"));
Expand All @@ -188,6 +201,7 @@ impl std::str::FromStr for PiranhaLanguage {
comment_nodes: vec!["comment".to_string(), "multiline_comment".to_string()],
rules: Some(rules),
edges: Some(edges),
ignore_nodes_for_comments: vec![],
})
}
TYPESCRIPT => Ok(PiranhaLanguage {
Expand All @@ -198,6 +212,7 @@ impl std::str::FromStr for PiranhaLanguage {
edges: None,
scopes: vec![],
comment_nodes: vec![],
ignore_nodes_for_comments: vec![],
}),
TSX => Ok(PiranhaLanguage {
name: language.to_string(),
Expand All @@ -207,8 +222,99 @@ impl std::str::FromStr for PiranhaLanguage {
edges: None,
scopes: vec![],
comment_nodes: vec![],
ignore_nodes_for_comments: vec![],
}),
_ => Err("Language not supported"),
}
}
}

impl SourceCodeUnit {
/// Checks if the given node kind is a comment in the language (i.e. &self)
pub(crate) fn is_comment(&self, kind: String) -> bool {
self
.piranha_arguments()
.language()
.comment_nodes()
.contains(&kind)
}

/// Checks if the given node should be ignored when searching comments
pub(crate) fn should_ignore_node_for_comment_search(&self, node: Node) -> bool {
node.end_byte() - node.start_byte() == 1
|| self
.piranha_arguments()
.language()
.ignore_nodes_for_comments()
.contains(&node.kind().to_string())
}

/// This function reports the range of the comment associated to the deleted element.
///
/// # Arguments:
/// * delete_edit : The edit that deleted the element
/// * buffer: Number of lines that we want to look up to find associated comment
///
/// # Algorithm :
/// Get all the nodes that either start and end at [row]
/// If **all** nodes are comments
/// * return the range of the comment
/// If the [row] has no node that either starts/ends there:
/// * recursively call this method for [row] -1 (until buffer is positive)
/// This function reports the range of the comment associated to the deleted element.
///
/// # Arguments:
/// * row : The row number where the deleted element started
/// * buffer: Number of lines that we want to look up to find associated comment
///
/// # Algorithm :
/// Get all the nodes that either start and end at [row]
/// If **all** nodes are comments
/// * return the range of the comment
/// If the [row] has no node that either starts/ends there:
/// * recursively call this method for [row] -1 (until buffer is positive)
pub(crate) fn _get_nearest_comment_range(
&mut self, delete_edit: &Edit, buffer: usize,
) -> Option<Range> {
let start_byte = delete_edit.p_match().range().start_byte;
let row = delete_edit.p_match().range().start_point.row - buffer;
// Get all nodes that start or end on `updated_row`.
let mut relevant_nodes_found = false;
let mut relevant_nodes_are_comments = true;
let mut comment_range = None;
// Since the previous edit was a delete, the start and end of the replacement range is [start_byte].
let node = self
.root_node()
.descendant_for_byte_range(start_byte, start_byte)
.unwrap_or_else(|| self.root_node());

// Search for all nodes starting/ending in the current row.
// if it is not amongst the `language.ignore_node_for_comment`, then check if it is a comment
// If the node is a comment, then return its range
for node in traverse(node.walk(), Order::Post) {
if self.should_ignore_node_for_comment_search(node) {
continue;
}

if node.start_position().row == row || node.end_position().row == row {
relevant_nodes_found = true;
let is_comment: bool = self.is_comment(node.kind().to_string());
relevant_nodes_are_comments = relevant_nodes_are_comments && is_comment;
if is_comment {
comment_range = Some(node.range());
}
}
}

if relevant_nodes_found {
if relevant_nodes_are_comments {
return comment_range;
}
} else if buffer <= *self.piranha_arguments().cleanup_comments_buffer() {
// We pass [start_byte] itself, because we know that parent of the current row is the parent of the above row too.
// If that's not the case, its okay, because we will not find any comments in these scenarios.
return self._get_nearest_comment_range(delete_edit, buffer + 1);
}
None
}
}
85 changes: 7 additions & 78 deletions src/models/piranha_arguments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ use pyo3::{
types::PyDict,
};
use regex::Regex;
use tree_sitter::{InputEdit, Range};
use tree_sitter_traversal::{traverse, Order};
use tree_sitter::InputEdit;

use std::collections::HashMap;

Expand Down Expand Up @@ -276,7 +275,7 @@ fn get_rule_graph(_arg: &PiranhaArguments) -> RuleGraph {

// TODO: Move to `PiranhaArgumentBuilder`'s _validate - https://github.com/uber/piranha/issues/387
// Get the user-defined rule graph (if any) via the Python/Rust API
let mut user_defined_rules = _arg.rule_graph().clone();
let mut user_defined_rules: RuleGraph = _arg.rule_graph().clone();
// In the scenario when rules/edges are passed as toml files
if !_arg.path_to_configurations().is_empty() {
user_defined_rules = read_user_config_files(_arg.path_to_configurations())
Expand Down Expand Up @@ -332,81 +331,11 @@ mod piranha_arguments_test;
impl SourceCodeUnit {
/// Delete the comment associated to the deleted code element
pub(crate) fn _delete_associated_comment(
&mut self, edit: &Edit, applied_edit: &mut InputEdit, parser: &mut tree_sitter::Parser,
) {
// Check if the edit kind is "DELETE something"
if *self.piranha_arguments().cleanup_comments() && edit.replacement_string().is_empty() {
let deleted_at = edit.p_match().range().start_point.row;
if let Some(comment_range) = self._get_comment_at_line(
deleted_at,
*self.piranha_arguments().cleanup_comments_buffer(),
edit.p_match().range().start_byte,
) {
debug!("Deleting an associated comment");
*applied_edit = self._apply_edit(&Edit::delete_range(self.code(), comment_range), parser);
}
}
}

/// This function reports the range of the comment associated to the deleted element.
///
/// # Arguments:
/// * row : The row number where the deleted element started
/// * buffer: Number of lines that we want to look up to find associated comment
///
/// # Algorithm :
/// Get all the nodes that either start and end at [row]
/// If **all** nodes are comments
/// * return the range of the comment
/// If the [row] has no node that either starts/ends there:
/// * recursively call this method for [row] -1 (until buffer is positive)
/// This function reports the range of the comment associated to the deleted element.
///
/// # Arguments:
/// * row : The row number where the deleted element started
/// * buffer: Number of lines that we want to look up to find associated comment
///
/// # Algorithm :
/// Get all the nodes that either start and end at [row]
/// If **all** nodes are comments
/// * return the range of the comment
/// If the [row] has no node that either starts/ends there:
/// * recursively call this method for [row] -1 (until buffer is positive)
fn _get_comment_at_line(
&mut self, row: usize, buffer: usize, start_byte: usize,
) -> Option<Range> {
// Get all nodes that start or end on `updated_row`.
let mut relevant_nodes_found = false;
let mut relevant_nodes_are_comments = true;
let mut comment_range = None;
// Since the previous edit was a delete, the start and end of the replacement range is [start_byte].
let node = self
.root_node()
.descendant_for_byte_range(start_byte, start_byte)
.unwrap_or_else(|| self.root_node());

for node in traverse(node.walk(), Order::Post) {
if node.start_position().row == row || node.end_position().row == row {
relevant_nodes_found = true;
let is_comment: bool = self
.piranha_arguments()
.language()
.is_comment(node.kind().to_string());
relevant_nodes_are_comments = relevant_nodes_are_comments && is_comment;
if is_comment {
comment_range = Some(node.range());
}
}
}

if relevant_nodes_found {
if relevant_nodes_are_comments {
return comment_range;
}
} else if buffer > 0 {
// We pass [start_byte] itself, because we know that parent of the current row is the parent of the above row too.
// If that's not the case, its okay, because we will not find any comments in these scenarios.
return self._get_comment_at_line(row - 1, buffer - 1, start_byte);
&mut self, edit: &Edit, parser: &mut tree_sitter::Parser,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from apply_edit we call _delete_associated_comment and from _delete_associated_comment we recursively call apply_edit. This ensures that can delete a series of comments.

Let's say apply_edit deletes int foo in the below example.

// some 
// comment 
int foo;

becomes

// some 
// comment 

now _delete_associated_comment is triggered, and now we produce

// some 

Since _delete_associated_comment is effectively called recursively

<all comments are deleted>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have this documented in a comment in the code in addition to in the PR review comments? Possibly in the documentation for this function.

) -> Option<InputEdit> {
if let Some(comment_range) = self._get_nearest_comment_range(edit, 0) {
debug!("Deleting an associated comment");
return Some(self.apply_edit(&Edit::delete_range(self.code(), comment_range), parser));
}
None
}
Expand Down
25 changes: 12 additions & 13 deletions src/models/source_code_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,6 @@ impl SourceCodeUnit {
self.perform_delete_consecutive_new_lines();
}

pub(crate) fn apply_edit(&mut self, edit: &Edit, parser: &mut Parser) -> InputEdit {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inlined apply_edit, this simplifies the code and also kinda solves this bug further.

Earlier we had a function called apply_edit which called _apply_edit and then called _delete_associated_comment . Now _delete_associated_comment called _apply_edit (not apply_edit ), therefore _delete_associated_comment was not getting invoked recursively, and therefore we were not deleting a series of comments.

I just inlined apply_edit into _apply_edit and renamed _apply_edit to apply_edit. Now when apply_edit is called from _delete_associated_comment, it ensures that _delete_associated_comment is called recursively. (I adjusted all the signatures and simplified the code)

// Get the tree_sitter's input edit representation
let mut applied_edit = self._apply_edit(edit, parser);
self._delete_associated_comment(edit, &mut applied_edit, parser);
applied_edit
}

/// Applies an edit to the source code unit
/// # Arguments
/// * `replace_range` - the range of code to be replaced
Expand All @@ -330,25 +323,31 @@ impl SourceCodeUnit {
/// The `edit:InputEdit` performed.
///
/// Note - Causes side effect. - Updates `self.ast` and `self.code`
pub(crate) fn _apply_edit(&mut self, edit: &Edit, parser: &mut Parser) -> InputEdit {
let mut edit_to_apply = edit.clone();
pub(crate) fn apply_edit(&mut self, edit: &Edit, parser: &mut Parser) -> InputEdit {
let mut edit: Edit = edit.clone();
// Check if the edit is a `Delete` operation then delete trailing comma
if edit.replacement_string().trim().is_empty() {
edit_to_apply = self.delete_trailing_comma(edit);
if edit.is_delete() {
edit = self.delete_trailing_comma(&edit);
}
// Get the tree_sitter's input edit representation
let (new_source_code, ts_edit) = get_tree_sitter_edit(self.code.clone(), &edit_to_apply);
let (new_source_code, ts_edit) = get_tree_sitter_edit(self.code.clone(), &edit);
// Apply edit to the tree
self.ast.edit(&ts_edit);
self._replace_file_contents_and_re_parse(&new_source_code, parser, true);
if self.ast.root_node().has_error() {
if self.root_node().has_error() {
let msg = format!(
"Produced syntactically incorrect source code {}",
self.code()
);
error!("{}", msg);
panic!("{}", msg);
}
// Check if the edit is a `Delete` operation then delete associated comment
if edit.is_delete() && *self.piranha_arguments().cleanup_comments() {
if let Some(deleted_comment) = self._delete_associated_comment(&edit, parser) {
return deleted_comment;
}
}
ts_edit
}

Expand Down
2 changes: 1 addition & 1 deletion src/tests/test_piranha_go.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ create_rewrite_tests! {
substitutions= substitutions! {
"treated" => "true",
"treated_complement" => "false"
};
}, cleanup_comments = true;
test_const_same_file: "feature_flag/system_1/const_same_file", 1,
substitutions= substitutions! {
"stale_flag_name" => "staleFlag",
Expand Down
Loading