Skip to content

Commit

Permalink
Mark U+A8FA DEVANAGARI CARET as zero-width
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed May 20, 2024
1 parent 55eae0c commit b55863b
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 1 deletion.
4 changes: 4 additions & 0 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ def load_zero_widths() -> "list[bool]":
# This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
zw_map[0x070F] = True

# U+A8FA DEVANAGARI CARET
# https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
zw_map[0xA8FA] = True

return zw_map


Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
//! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
//! 6. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
Expand Down
6 changes: 5 additions & 1 deletion tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ fn test_prepended_concatenation_marks() {
#[test]
fn test_syriac_abbreviation_mark() {
assert_eq!('\u{070F}'.width(), Some(0));
assert_eq!("\u{070F}".width(), 0);
}


Expand Down Expand Up @@ -137,6 +136,11 @@ fn test_marks() {
assert_eq!('\u{09BE}'.width(), Some(0));
}

#[test]
fn test_devanagari_caret() {
assert_eq!('\u{A8FA}'.width(), Some(0));
}

#[test]
fn test_canonical_equivalence() {
let norm_file = BufReader::new(
Expand Down

0 comments on commit b55863b

Please sign in to comment.