Always try highlighting small enough changes

walles · Dec 31, 2020 · b960a2d · b960a2d
1 parent 6f1639d
commit b960a2d
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 20 deletions.
diff --git a/src/main.rs b/src/main.rs
@@ -432,16 +432,16 @@ mod tests {
 
     #[test]
     fn test_remove_trailing_newline() {
-        let mut input = "-hej\n\
-            +hej\n\
+        let mut input = "-hejhopp\n\
+            +hejhopp\n\
             \\ No newline at end of file\n\
             "
         .as_bytes();
 
         let expected = format!(
             "{}\n{}\n{}\n",
-            old(&format!("-hej{}⏎", INVERSE_VIDEO)),
-            new("+hej"),
+            old(&format!("-hejhopp{}⏎", INVERSE_VIDEO)),
+            new("+hejhopp"),
             format!(
                 "{}\\ No newline at end of file{}",
                 NO_EOF_NEWLINE_COLOR, NORMAL
@@ -450,7 +450,15 @@ mod tests {
 
         let mut actual: Vec<u8> = Vec::new();
         highlight_diff(&mut input, &mut actual);
-        assert_eq!(std::str::from_utf8(&actual).unwrap(), expected);
+        // collect()ing into line vectors inside of this assert() statement
+        // splits test failure output into lines, making it easier to digest.
+        assert_eq!(
+            std::str::from_utf8(&actual)
+                .unwrap()
+                .lines()
+                .collect::<Vec<_>>(),
+            expected.lines().collect::<Vec<_>>()
+        );
     }
 
     #[test]
@@ -469,7 +477,15 @@ mod tests {
 
         let mut output: Vec<u8> = Vec::new();
         highlight_diff(&mut input, &mut output);
-        assert_eq!(std::str::from_utf8(&output).unwrap(), expected);
+        // collect()ing into line vectors inside of this assert() statement
+        // splits test failure output into lines, making it easier to digest.
+        assert_eq!(
+            std::str::from_utf8(&output)
+                .unwrap()
+                .lines()
+                .collect::<Vec<_>>(),
+            expected.lines().collect::<Vec<_>>()
+        );
     }
 
     #[test]

diff --git a/src/refiner.rs b/src/refiner.rs
@@ -15,11 +15,8 @@ use diffus::{
 /// it.
 const MAX_HIGHLIGHT_PERCENTAGE: usize = 30;
 
-/// If it's only this few highlighted chars, we'll just highligh anyway without
-/// checking the `MAX_HIGHLIGHT_PERCENTAGE`.
-const OK_HIGHLIGHT_COUNT: usize = 10;
-
 const LARGE_BYTE_COUNT_CHANGE_PERCENT: usize = 100;
+const SMALL_BYTE_COUNT_CHANGE: usize = 10;
 
 /// Format old and new lines in OLD and NEW colors.
 ///
@@ -54,6 +51,10 @@ fn simple_format(old_text: &str, new_text: &str) -> Vec<String> {
 /// Returns a vector of ANSI highlighted lines
 #[must_use]
 pub fn format(old_text: &str, new_text: &str) -> Vec<String> {
+    if old_text.is_empty() || new_text.is_empty() {
+        return simple_format(old_text, new_text);
+    }
+
     // This check makes us faster, please use the benchmark.py script before and
     // after if you change this.
     if is_large_byte_count_change(old_text, new_text) {
@@ -107,19 +108,18 @@ pub fn format(old_text: &str, new_text: &str) -> Vec<String> {
         }
     }
 
+    let highlighted_old_text = old_collector.render();
+    let highlighted_new_text = new_collector.render();
+
     let highlighted_bytes_count =
         old_collector.highlighted_chars_count() + new_collector.highlighted_chars_count();
     let bytes_count = old_collector.chars_count() + new_collector.chars_count();
 
     // Don't highlight too much
-    if highlighted_bytes_count <= OK_HIGHLIGHT_COUNT {
-        // Few enough highlights, Just do it (tm)
-    } else if (100 * highlighted_bytes_count) / bytes_count > MAX_HIGHLIGHT_PERCENTAGE {
+    if (100 * highlighted_bytes_count) / bytes_count > MAX_HIGHLIGHT_PERCENTAGE {
         return simple_format(old_text, new_text);
     }
 
-    let highlighted_old_text = old_collector.render();
-    let highlighted_new_text = new_collector.render();
     return to_lines(&highlighted_old_text, &highlighted_new_text);
 }
 
@@ -131,11 +131,15 @@ fn is_large_byte_count_change(old_text: &str, new_text: &str) -> bool {
     let high_count = max(old_text.len(), new_text.len());
     let low_count = min(old_text.len(), new_text.len());
 
+    if high_count - low_count <= SMALL_BYTE_COUNT_CHANGE {
+        return false;
+    }
+
     // "+ 99" makes the result round up, so 0->0, 1->2.
     let low_count_plus_percentage =
         (low_count * (LARGE_BYTE_COUNT_CHANGE_PERCENT + 100) + 99) / 100;
 
-    return high_count > low_count_plus_percentage;
+    return high_count >= low_count_plus_percentage;
 }
 
 #[must_use]
@@ -233,9 +237,27 @@ mod tests {
     #[test]
     fn test_is_large_byte_count_change() {
         assert_eq!(is_large_byte_count_change("", ""), false);
-        assert_eq!(is_large_byte_count_change("", "x"), true);
-        assert_eq!(is_large_byte_count_change("x", "x"), false);
-        assert_eq!(is_large_byte_count_change("x", "xy"), false);
-        assert_eq!(is_large_byte_count_change("x", "xyz"), true);
+
+        assert_eq!(
+            is_large_byte_count_change("", &"x".repeat(SMALL_BYTE_COUNT_CHANGE)),
+            false
+        );
+        assert_eq!(
+            is_large_byte_count_change("", &"x".repeat(SMALL_BYTE_COUNT_CHANGE + 1)),
+            true
+        );
+
+        // Verify that doubling the length counts as large
+        let base_len = SMALL_BYTE_COUNT_CHANGE * 2;
+        let double_len = base_len * 2;
+        let almost_double_len = double_len - 1;
+        assert_eq!(
+            is_large_byte_count_change(&"x".repeat(base_len), &"y".repeat(almost_double_len)),
+            false
+        );
+        assert_eq!(
+            is_large_byte_count_change(&"x".repeat(base_len), &"y".repeat(double_len)),
+            true
+        );
     }
 }
diff --git a/src/token_collector.rs b/src/token_collector.rs
@@ -38,6 +38,7 @@ pub struct TokenCollector {
     tokens: Vec<StyledToken>,
     bytes_count: usize,
     highlighted_bytes_count: usize,
+    rendered: bool,
 }
 
 impl Style {
@@ -83,6 +84,7 @@ impl TokenCollector {
             tokens: Vec::new(),
             bytes_count: 0,
             highlighted_bytes_count: 0,
+            rendered: false,
         };
     }
 
@@ -140,6 +142,7 @@ impl TokenCollector {
 
     #[must_use]
     pub fn render(&mut self) -> String {
+        assert!(!self.rendered);
         let mut current_row: Vec<StyledToken> = Vec::new();
         let mut rendered = String::new();
 
@@ -165,14 +168,17 @@ impl TokenCollector {
             rendered.push_str(rendered_row);
         }
 
+        self.rendered = true;
         return rendered;
     }
 
     pub fn chars_count(&self) -> usize {
+        assert!(self.rendered); // It's the rendering that does the counting
         return self.bytes_count;
     }
 
     pub fn highlighted_chars_count(&self) -> usize {
+        assert!(self.rendered); // It's the rendering that does the counting
         return self.highlighted_bytes_count;
     }
 }