Skip to content

Commit

Permalink
Always try highlighting small enough changes
Browse files Browse the repository at this point in the history
  • Loading branch information
walles committed Dec 31, 2020
1 parent 6f1639d commit b960a2d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 20 deletions.
28 changes: 22 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,16 +432,16 @@ mod tests {

#[test]
fn test_remove_trailing_newline() {
let mut input = "-hej\n\
+hej\n\
let mut input = "-hejhopp\n\
+hejhopp\n\
\\ No newline at end of file\n\
"
.as_bytes();

let expected = format!(
"{}\n{}\n{}\n",
old(&format!("-hej{}⏎", INVERSE_VIDEO)),
new("+hej"),
old(&format!("-hejhopp{}⏎", INVERSE_VIDEO)),
new("+hejhopp"),
format!(
"{}\\ No newline at end of file{}",
NO_EOF_NEWLINE_COLOR, NORMAL
Expand All @@ -450,7 +450,15 @@ mod tests {

let mut actual: Vec<u8> = Vec::new();
highlight_diff(&mut input, &mut actual);
assert_eq!(std::str::from_utf8(&actual).unwrap(), expected);
// collect()ing into line vectors inside of this assert() statement
// splits test failure output into lines, making it easier to digest.
assert_eq!(
std::str::from_utf8(&actual)
.unwrap()
.lines()
.collect::<Vec<_>>(),
expected.lines().collect::<Vec<_>>()
);
}

#[test]
Expand All @@ -469,7 +477,15 @@ mod tests {

let mut output: Vec<u8> = Vec::new();
highlight_diff(&mut input, &mut output);
assert_eq!(std::str::from_utf8(&output).unwrap(), expected);
// collect()ing into line vectors inside of this assert() statement
// splits test failure output into lines, making it easier to digest.
assert_eq!(
std::str::from_utf8(&output)
.unwrap()
.lines()
.collect::<Vec<_>>(),
expected.lines().collect::<Vec<_>>()
);
}

#[test]
Expand Down
50 changes: 36 additions & 14 deletions src/refiner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@ use diffus::{
/// it.
const MAX_HIGHLIGHT_PERCENTAGE: usize = 30;

/// If it's only this few highlighted chars, we'll just highligh anyway without
/// checking the `MAX_HIGHLIGHT_PERCENTAGE`.
const OK_HIGHLIGHT_COUNT: usize = 10;

const LARGE_BYTE_COUNT_CHANGE_PERCENT: usize = 100;
const SMALL_BYTE_COUNT_CHANGE: usize = 10;

/// Format old and new lines in OLD and NEW colors.
///
Expand Down Expand Up @@ -54,6 +51,10 @@ fn simple_format(old_text: &str, new_text: &str) -> Vec<String> {
/// Returns a vector of ANSI highlighted lines
#[must_use]
pub fn format(old_text: &str, new_text: &str) -> Vec<String> {
if old_text.is_empty() || new_text.is_empty() {
return simple_format(old_text, new_text);
}

// This check makes us faster, please use the benchmark.py script before and
// after if you change this.
if is_large_byte_count_change(old_text, new_text) {
Expand Down Expand Up @@ -107,19 +108,18 @@ pub fn format(old_text: &str, new_text: &str) -> Vec<String> {
}
}

let highlighted_old_text = old_collector.render();
let highlighted_new_text = new_collector.render();

let highlighted_bytes_count =
old_collector.highlighted_chars_count() + new_collector.highlighted_chars_count();
let bytes_count = old_collector.chars_count() + new_collector.chars_count();

// Don't highlight too much
if highlighted_bytes_count <= OK_HIGHLIGHT_COUNT {
// Few enough highlights, Just do it (tm)
} else if (100 * highlighted_bytes_count) / bytes_count > MAX_HIGHLIGHT_PERCENTAGE {
if (100 * highlighted_bytes_count) / bytes_count > MAX_HIGHLIGHT_PERCENTAGE {
return simple_format(old_text, new_text);
}

let highlighted_old_text = old_collector.render();
let highlighted_new_text = new_collector.render();
return to_lines(&highlighted_old_text, &highlighted_new_text);
}

Expand All @@ -131,11 +131,15 @@ fn is_large_byte_count_change(old_text: &str, new_text: &str) -> bool {
let high_count = max(old_text.len(), new_text.len());
let low_count = min(old_text.len(), new_text.len());

if high_count - low_count <= SMALL_BYTE_COUNT_CHANGE {
return false;
}

// "+ 99" makes the result round up, so 0->0, 1->2.
let low_count_plus_percentage =
(low_count * (LARGE_BYTE_COUNT_CHANGE_PERCENT + 100) + 99) / 100;

return high_count > low_count_plus_percentage;
return high_count >= low_count_plus_percentage;
}

#[must_use]
Expand Down Expand Up @@ -233,9 +237,27 @@ mod tests {
#[test]
fn test_is_large_byte_count_change() {
assert_eq!(is_large_byte_count_change("", ""), false);
assert_eq!(is_large_byte_count_change("", "x"), true);
assert_eq!(is_large_byte_count_change("x", "x"), false);
assert_eq!(is_large_byte_count_change("x", "xy"), false);
assert_eq!(is_large_byte_count_change("x", "xyz"), true);

assert_eq!(
is_large_byte_count_change("", &"x".repeat(SMALL_BYTE_COUNT_CHANGE)),
false
);
assert_eq!(
is_large_byte_count_change("", &"x".repeat(SMALL_BYTE_COUNT_CHANGE + 1)),
true
);

// Verify that doubling the length counts as large
let base_len = SMALL_BYTE_COUNT_CHANGE * 2;
let double_len = base_len * 2;
let almost_double_len = double_len - 1;
assert_eq!(
is_large_byte_count_change(&"x".repeat(base_len), &"y".repeat(almost_double_len)),
false
);
assert_eq!(
is_large_byte_count_change(&"x".repeat(base_len), &"y".repeat(double_len)),
true
);
}
}
6 changes: 6 additions & 0 deletions src/token_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub struct TokenCollector {
tokens: Vec<StyledToken>,
bytes_count: usize,
highlighted_bytes_count: usize,
rendered: bool,
}

impl Style {
Expand Down Expand Up @@ -83,6 +84,7 @@ impl TokenCollector {
tokens: Vec::new(),
bytes_count: 0,
highlighted_bytes_count: 0,
rendered: false,
};
}

Expand Down Expand Up @@ -140,6 +142,7 @@ impl TokenCollector {

#[must_use]
pub fn render(&mut self) -> String {
assert!(!self.rendered);
let mut current_row: Vec<StyledToken> = Vec::new();
let mut rendered = String::new();

Expand All @@ -165,14 +168,17 @@ impl TokenCollector {
rendered.push_str(rendered_row);
}

self.rendered = true;
return rendered;
}

pub fn chars_count(&self) -> usize {
assert!(self.rendered); // It's the rendering that does the counting
return self.bytes_count;
}

pub fn highlighted_chars_count(&self) -> usize {
assert!(self.rendered); // It's the rendering that does the counting
return self.highlighted_bytes_count;
}
}
Expand Down

0 comments on commit b960a2d

Please sign in to comment.