Skip to content

Commit

Permalink
Fuzzy-match lines when applying edits from the assistant (#12056)
Browse files Browse the repository at this point in the history
This uses Jaro-Winkler similarity for now, which seemed to produce
pretty good results in my tests. We can easily swap it with something
else if needed.

Release Notes:

- N/A
  • Loading branch information
as-cii committed May 20, 2024
1 parent 0b8c168 commit 3a79aa8
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 74 deletions.
17 changes: 12 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/assistant/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ serde.workspace = true
serde_json.workspace = true
settings.workspace = true
smol.workspace = true
strsim = "0.11"
telemetry_events.workspace = true
theme.workspace = true
tiktoken-rs.workspace = true
Expand Down
6 changes: 3 additions & 3 deletions crates/assistant/src/assistant_panel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3058,9 +3058,9 @@ impl ConversationEditor {
.entry(buffer)
.or_insert(Vec::<(Range<language::Anchor>, _)>::new());
for suggestion in suggestions {
let ranges =
fuzzy_search_lines(snapshot.as_rope(), &suggestion.old_text);
if let Some(range) = ranges.first() {
if let Some(range) =
fuzzy_search_lines(snapshot.as_rope(), &suggestion.old_text)
{
let edit_start = snapshot.anchor_after(range.start);
let edit_end = snapshot.anchor_before(range.end);
if let Err(ix) = edits.binary_search_by(|(range, _)| {
Expand Down
153 changes: 87 additions & 66 deletions crates/assistant/src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,75 @@ use std::ops::Range;
///
/// Returns a vector of ranges of byte offsets in the buffer corresponding
/// to the entire lines of the buffer.
pub fn fuzzy_search_lines(haystack: &Rope, needle: &str) -> Vec<Range<usize>> {
let mut matches = Vec::new();
pub fn fuzzy_search_lines(haystack: &Rope, needle: &str) -> Option<Range<usize>> {
const SIMILARITY_THRESHOLD: f64 = 0.8;

let mut best_match: Option<(Range<usize>, f64)> = None; // (range, score)
let mut haystack_lines = haystack.chunks().lines();
let mut haystack_line_start = 0;
while let Some(haystack_line) = haystack_lines.next() {
while let Some(mut haystack_line) = haystack_lines.next() {
let next_haystack_line_start = haystack_line_start + haystack_line.len() + 1;
let mut trimmed_needle_lines = needle.lines().map(|line| line.trim());
if Some(haystack_line.trim()) == trimmed_needle_lines.next() {
let match_start = haystack_line_start;
let mut match_end = next_haystack_line_start;
let matched = loop {
match (haystack_lines.next(), trimmed_needle_lines.next()) {
(Some(haystack_line), Some(needle_line)) => {
// Haystack line differs from needle line: not a match.
if haystack_line.trim() == needle_line {
match_end = haystack_lines.offset();
} else {
break false;
}
let mut advanced_to_next_haystack_line = false;

let mut matched = true;
let match_start = haystack_line_start;
let mut match_end = next_haystack_line_start;
let mut match_score = 0.0;
let mut needle_lines = needle.lines().peekable();
while let Some(needle_line) = needle_lines.next() {
let similarity = line_similarity(haystack_line, needle_line);
if similarity >= SIMILARITY_THRESHOLD {
match_end = haystack_lines.offset();
match_score += similarity;

if needle_lines.peek().is_some() {
if let Some(next_haystack_line) = haystack_lines.next() {
advanced_to_next_haystack_line = true;
haystack_line = next_haystack_line;
} else {
matched = false;
break;
}
// We exhausted the haystack but not the query: not a match.
(None, Some(_)) => break false,
// We exhausted the query: it's a match.
(_, None) => break true,
} else {
break;
}
};

if matched {
matches.push(match_start..match_end)
} else {
matched = false;
break;
}
}

// Advance to the next line.
haystack_lines.seek(next_haystack_line_start);
if matched
&& best_match
.as_ref()
.map(|(_, best_score)| match_score > *best_score)
.unwrap_or(true)
{
best_match = Some((match_start..match_end, match_score));
}

if advanced_to_next_haystack_line {
haystack_lines.seek(next_haystack_line_start);
}
haystack_line_start = next_haystack_line_start;
}
matches

best_match.map(|(range, _)| range)
}

/// Calculates the similarity between two lines, ignoring leading and trailing whitespace,
/// using the Jaro-Winkler distance.
///
/// Returns a value between 0.0 and 1.0, where 1.0 indicates an exact match.
fn line_similarity(line1: &str, line2: &str) -> f64 {
strsim::jaro_winkler(line1.trim(), line2.trim())
}

#[cfg(test)]
mod test {
use super::*;
use gpui::{AppContext, Context as _};
use language::{Buffer, OffsetRangeExt};
use language::Buffer;
use unindent::Unindent as _;
use util::test::marked_text_ranges;

Expand Down Expand Up @@ -79,17 +103,11 @@ mod test {
);
»
assert_eq!(
« assert_eq!(
"something",
"else",
);
if b {
« assert_eq!(
1 + 2,
3,
);
» }
»
}
"#
.unindent(),
Expand All @@ -99,7 +117,7 @@ mod test {
let buffer = cx.new_model(|cx| Buffer::local(&text, cx));
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot());

let actual_ranges = fuzzy_search_lines(
let actual_range = fuzzy_search_lines(
snapshot.as_rope(),
&"
assert_eq!(
Expand All @@ -108,43 +126,46 @@ mod test {
);
"
.unindent(),
);
assert_eq!(
actual_ranges,
expected_ranges,
"actual: {:?}, expected: {:?}",
actual_ranges
.iter()
.map(|range| range.to_point(&snapshot))
.collect::<Vec<_>>(),
expected_ranges
.iter()
.map(|range| range.to_point(&snapshot))
.collect::<Vec<_>>()
);
)
.unwrap();
assert_eq!(actual_range, expected_ranges[0]);

let actual_ranges = fuzzy_search_lines(
let actual_range = fuzzy_search_lines(
snapshot.as_rope(),
&"
assert_eq!(
1 + 2,
3,
);
);
"
.unindent(),
)
.unwrap();
assert_eq!(actual_range, expected_ranges[0]);

let actual_range = fuzzy_search_lines(
snapshot.as_rope(),
&"
asst_eq!(
\"something\",
\"els\"
)
"
.unindent(),
)
.unwrap();
assert_eq!(actual_range, expected_ranges[1]);

let actual_range = fuzzy_search_lines(
snapshot.as_rope(),
&"
assert_eq!(
2 + 1,
3,
);
"
.unindent(),
);
assert_eq!(
actual_ranges,
expected_ranges,
"actual: {:?}, expected: {:?}",
actual_ranges
.iter()
.map(|range| range.to_point(&snapshot))
.collect::<Vec<_>>(),
expected_ranges
.iter()
.map(|range| range.to_point(&snapshot))
.collect::<Vec<_>>()
);
assert_eq!(actual_range, None);
}
}

0 comments on commit 3a79aa8

Please sign in to comment.