Skip to content

Commit

Permalink
improve emoji width calculation
Browse files Browse the repository at this point in the history
I noticed while scrolling `emoji-test.txt` that some of the combined
emoji sequences rendered very poorly.  This was due to the unicode
width being reported as up to 4 in some cases.

Digging into it, I discovered that the unicode width crate uses a
standard calculation that doesn't take emoji combination sequences
into account (see unicode-rs/unicode-width#4).

This commit takes a dep on the xi-unicode crate as a lightweight way
to gain access to emoji tables and test whether a given grapheme is
part of a combining sequence of emoji.
  • Loading branch information
wez committed Nov 5, 2019
1 parent a9bdca6 commit 1ab438c
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/font/system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ impl GlyphInfo {
info: &harfbuzz::hb_glyph_info_t,
pos: &harfbuzz::hb_glyph_position_t,
) -> GlyphInfo {
use unicode_width::UnicodeWidthStr;
let num_cells = UnicodeWidthStr::width(text) as u8;
use termwiz::cell::unicode_column_width;
let num_cells = unicode_column_width(text) as u8;
GlyphInfo {
#[cfg(debug_assertions)]
text: text.into(),
Expand Down
3 changes: 1 addition & 2 deletions term/src/terminalstate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use termwiz::escape::osc::{ChangeColorPair, ColorOrQuery, ITermFileData, ITermPr
use termwiz::escape::{Action, ControlCode, Esc, EscCode, OneBased, OperatingSystemCommand, CSI};
use termwiz::hyperlink::Rule as HyperlinkRule;
use termwiz::image::{ImageCell, ImageData, TextureCoordinate};
use unicode_width::UnicodeWidthStr;

struct TabStop {
tabs: Vec<bool>,
Expand Down Expand Up @@ -1988,7 +1987,7 @@ impl<'a> Performer<'a> {
// they occupy a cell so that we can re-emit them when we output them.
// If we didn't do this, then we'd effectively filter them out from
// the model, which seems like a lossy design choice.
let print_width = UnicodeWidthStr::width(g).max(1);
let print_width = unicode_column_width(g).max(1);

if !self.insert && x + print_width >= width {
pen.set_wrapped(true);
Expand Down
29 changes: 28 additions & 1 deletion term/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,14 @@ fn assert_lines_equal(lines: &[Line], expect_lines: &[Line], compare: Compare) {
if compare.contains(Compare::TEXT) {
let line_str = line.as_str();
let expect_str = expect.as_str();
assert_eq!(line_str, expect_str, "line {} text didn't match", idx,);
assert_eq!(
line_str,
expect_str,
"line {} text didn't match '{}' vs '{}'",
idx,
line_str.escape_default(),
expect_str.escape_default()
);
}
}

Expand Down Expand Up @@ -543,6 +550,26 @@ fn test_scroll_margins() {
assert_all_contents(&term, &["1", "2", "3", "W", " ", "a"]);
}

#[test]
fn test_emoji_with_modifier() {
let waving_hand = "\u{1f44b}";
let waving_hand_dark_tone = "\u{1f44b}\u{1f3ff}";

let mut term = TestTerm::new(3, 5, 0);
term.print(waving_hand);
term.print("\r\n");
term.print(waving_hand_dark_tone);

assert_all_contents(
&term,
&[
&format!("{} ", waving_hand),
&format!("{} ", waving_hand_dark_tone),
" ",
],
);
}

#[test]
fn test_hyperlinks() {
let mut term = TestTerm::new(3, 5, 0);
Expand Down
1 change: 1 addition & 0 deletions termwiz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ smallvec = "0.6"
terminfo = "0.6"
unicode-segmentation = "1.5"
unicode-width = "0.1"
xi-unicode = "0.2"
vtparse = { version="0.1", path="../vtparse" }

[dev-dependencies]
Expand Down
61 changes: 60 additions & 1 deletion termwiz/src/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ impl Cell {

/// Returns the number of cells visually occupied by this grapheme
pub fn width(&self) -> usize {
UnicodeWidthStr::width(self.str())
grapheme_column_width(self.str())
}

/// Returns the attributes of the cell
Expand All @@ -301,6 +301,31 @@ impl Cell {
}
}

/// Returns the number of cells visually occupied by a sequence
/// of graphemes
pub fn unicode_column_width(s: &str) -> usize {
use unicode_segmentation::UnicodeSegmentation;
s.graphemes(true).map(grapheme_column_width).sum()
}

/// Returns the number of cells visually occupied by a grapheme.
/// The input string must be a single grapheme.
pub fn grapheme_column_width(s: &str) -> usize {
// Due to this issue:
// https://github.com/unicode-rs/unicode-width/issues/4
// we cannot simply use the unicode-width crate to compute
// the desired value.
// Let's check for emoji-ness for ourselves first
use xi_unicode::EmojiExt;
for c in s.chars() {
if c.is_emoji_modifier_base() || c.is_emoji_modifier() {
// treat modifier sequences as double wide
return 2;
}
}
UnicodeWidthStr::width(s)
}

/// Models a change in the attributes of a cell in a stream of changes.
/// Each variant specifies one of the possible attributes; the corresponding
/// value holds the new value to be used for that attribute.
Expand Down Expand Up @@ -334,4 +359,38 @@ mod test {
assert_eq!(cell.str(), " ");
}
}

#[test]
fn test_width() {
let foot = "\u{1f9b6}";
eprintln!("foot chars");
for c in foot.chars() {
eprintln!("char: {:?}", c);
use xi_unicode::EmojiExt;
eprintln!("xi emoji: {}", c.is_emoji());
eprintln!("xi emoji_mod: {}", c.is_emoji_modifier());
eprintln!("xi emoji_mod_base: {}", c.is_emoji_modifier_base());
}
assert_eq!(unicode_column_width(foot), 2, "{} should be 2", foot);

let women_holding_hands_dark_skin_tone_medium_light_skin_tone =
"\u{1F469}\u{1F3FF}\u{200D}\u{1F91D}\u{200D}\u{1F469}\u{1F3FC}";

// Ensure that we can hold this longer grapheme sequence in the cell
// and correctly return its string contents!
let cell = Cell::new_grapheme(
women_holding_hands_dark_skin_tone_medium_light_skin_tone,
CellAttributes::default(),
);
assert_eq!(
cell.str(),
women_holding_hands_dark_skin_tone_medium_light_skin_tone
);
assert_eq!(
cell.width(),
2,
"width of {} should be 2",
women_holding_hands_dark_skin_tone_medium_light_skin_tone
);
}
}
6 changes: 3 additions & 3 deletions termwiz/src/lineedit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@
//! Alt-b, Alt-Left | Move the cursor backwards one word
//! Alt-f, Alt-Right | Move the cursor forwards one word
use crate::caps::{Capabilities, ProbeHintsBuilder};
use crate::cell::unicode_column_width;
use crate::input::{InputEvent, KeyCode, KeyEvent, Modifiers};
use crate::surface::{Change, Position};
use crate::terminal::{new_terminal, Terminal};
use failure::{err_msg, Fallible};
use unicode_segmentation::GraphemeCursor;
use unicode_width::UnicodeWidthStr;

mod actions;
mod history;
Expand Down Expand Up @@ -157,7 +157,7 @@ impl<T: Terminal> LineEditor<T> {
let mut prompt_width = 0;
for ele in host.render_prompt(&self.prompt) {
if let OutputElement::Text(ref t) = ele {
prompt_width += UnicodeWidthStr::width(t.as_str());
prompt_width += unicode_column_width(t.as_str());
}
changes.push(ele.into());
}
Expand All @@ -174,7 +174,7 @@ impl<T: Terminal> LineEditor<T> {
// It might feel more right to count the number of graphemes in
// the string, but this doesn't render correctly for glyphs that
// are double-width. Nothing about unicode is easy :-/
let grapheme_count = UnicodeWidthStr::width(&self.line[0..self.cursor]);
let grapheme_count = unicode_column_width(&self.line[0..self.cursor]);
changes.push(Change::CursorPosition {
x: Position::Absolute(prompt_width + grapheme_count),
y: Position::NoChange,
Expand Down

0 comments on commit 1ab438c

Please sign in to comment.