From 6e5300485806356d9726a0cd61e1690ad0a3cf66 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 6 Feb 2017 09:36:04 -0800 Subject: [PATCH 1/2] Use Iterator::eq in tests This fixes false negatives when the iterators are different lengths. --- src/test.rs | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/src/test.rs b/src/test.rs index e9dc53c..776c824 100644 --- a/src/test.rs +++ b/src/test.rs @@ -30,40 +30,24 @@ fn test_graphemes() { &["\u{600}", "\u{20}", "\u{20}"]), ]; - for &(s, g) in TEST_SAME { + for &(s, g) in TEST_SAME.iter() { // test forward iterator - assert!(UnicodeSegmentation::graphemes(s, true) - .zip(g.iter().cloned()) - .all(|(a,b)| a == b)); - assert!(UnicodeSegmentation::graphemes(s, false) - .zip(g.iter().cloned()) - .all(|(a,b)| a == b)); + assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned())); // test reverse iterator - assert!(UnicodeSegmentation::graphemes(s, true).rev() - .zip(g.iter().rev().cloned()) - .all(|(a,b)| a == b)); - assert!(UnicodeSegmentation::graphemes(s, false).rev() - .zip(g.iter().rev().cloned()) - .all(|(a,b)| a == b)); + assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned())); } for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) { // test forward iterator - assert!(UnicodeSegmentation::graphemes(s, true) - .zip(gt.iter().cloned()) - .all(|(a,b)| a == b), "{:?}", s); - assert!(UnicodeSegmentation::graphemes(s, false) - .zip(gf.iter().cloned()) - .all(|(a,b)| a == b)); + assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned())); // test reverse iterator - assert!(UnicodeSegmentation::graphemes(s, true).rev() - .zip(gt.iter().rev().cloned()) - .all(|(a,b)| a == b)); - assert!(UnicodeSegmentation::graphemes(s, false).rev() - .zip(gf.iter().rev().cloned()) - .all(|(a,b)| a == b)); + assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned())); + assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned())); } // test the indices iterators From 3d593f8510f2c59ae9103ea398d736e740bd15b8 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 6 Feb 2017 09:32:02 -0800 Subject: [PATCH 2/2] Fix state of reverse iterator after ZWJ Fixes #14. --- src/grapheme.rs | 4 ++-- src/test.rs | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/grapheme.rs b/src/grapheme.rs index fed1777..fdc5855 100644 --- a/src/grapheme.rs +++ b/src/grapheme.rs @@ -344,8 +344,8 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> { take_curr = false; break; }, - Zwj => match cat { // char to right is (GAZ|EBG) - gr::GC_ZWJ => continue, // rule GB11: ZWJ x (GAZ|EBG) + Zwj => match cat { // char to right is (GAZ|EBG) + gr::GC_ZWJ => FindExtend, // rule GB11: ZWJ x (GAZ|EBG) _ => { take_curr = false; break; diff --git a/src/test.rs b/src/test.rs index 776c824..3c43574 100644 --- a/src/test.rs +++ b/src/test.rs @@ -30,7 +30,13 @@ fn test_graphemes() { &["\u{600}", "\u{20}", "\u{20}"]), ]; - for &(s, g) in TEST_SAME.iter() { + pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[ + // family emoji (more than two emoji joined by ZWJ) + ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}", + &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]), + ]; + + for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) { // test forward iterator assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned())); assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));