diff --git a/README.md b/README.md index a75f039..00a2baf 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ hypher = "0.1" disable the `alloc` feature, but then overly long words lead to a panic. - Support for many languages. - No unsafe code, no dependencies, no std. +- Hyphenation character awareness: `Lang::hyphenation_character()` returns + `None` for Indic scripts where visual hyphenation is not conventional. ## Example ```rust diff --git a/src/lang.rs b/src/lang.rs index 5568ace..ef996ad 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -368,6 +368,40 @@ impl Lang { } } + /// The default character used to join syllables. + /// + /// Returns `Some('\u{ad}')` (SOFT HYPHEN) for most languages, but `None` + /// for Indic scripts where visual hyphenation is not conventional. + pub fn hyphenation_character(self) -> Option { + match self { + #[cfg(feature = "assamese")] + Self::Assamese => None, + #[cfg(feature = "bengali")] + Self::Bengali => None, + #[cfg(feature = "gujarati")] + Self::Gujarati => None, + #[cfg(feature = "hindi")] + Self::Hindi => None, + #[cfg(feature = "kannada")] + Self::Kannada => None, + #[cfg(feature = "malayalam")] + Self::Malayalam => None, + #[cfg(feature = "marathi")] + Self::Marathi => None, + #[cfg(feature = "oriya")] + Self::Oriya => None, + #[cfg(feature = "panjabi")] + Self::Panjabi => None, + #[cfg(feature = "sanskrit")] + Self::Sanskrit => None, + #[cfg(feature = "tamil")] + Self::Tamil => None, + #[cfg(feature = "telugu")] + Self::Telugu => None, + _ => Some('\u{ad}'), + } + } + fn root(self) -> State<'static> { match self { #[cfg(feature = "afrikaans")] diff --git a/tests/generate.rs b/tests/generate.rs index c4f40fd..8c3ead8 100644 --- a/tests/generate.rs +++ b/tests/generate.rs @@ -160,6 +160,30 @@ fn write_lang( writeln!(w, " }}")?; writeln!(w)?; + // Implementation of `hyphenation_character`. + writeln!(w, " /// The default character used to join syllables.")?; + writeln!(w, " ///")?; + writeln!(w, " /// Returns `Some('\\u{{ad}}')` (SOFT HYPHEN) for most languages, but `None`")?; + writeln!( + w, + " /// for Indic scripts where visual hyphenation is not conventional." + )?; + writeln!(w, " pub fn hyphenation_character(self) -> Option {{")?; + writeln!(w, " match self {{")?; + for &(name, _, _, script, ..) in languages { + if !is_indic_script(script) { + continue; + } + let feature = name.to_lowercase(); + write!(w, " ")?; + write_cfg(w, &feature)?; + writeln!(w, " Self::{name} => None,")?; + } + writeln!(w, " _ => Some('\\u{{ad}}'),")?; + writeln!(w, " }}")?; + writeln!(w, " }}")?; + writeln!(w)?; + // Implementation of `root`. writeln!(w, " fn root(self) -> State<'static> {{")?; writeln!(w, " match self {{")?; @@ -175,6 +199,14 @@ fn write_lang( writeln!(w, "}}") } +/// Returns true for Indic scripts where visual hyphenation is not conventional. +fn is_indic_script(script: &str) -> bool { + matches!( + script, + "Beng" | "Deva" | "Gujr" | "Guru" | "Knda" | "Mlym" | "Orya" | "Taml" | "Telu" + ) +} + fn write_cfg(w: &mut String, feature: &str) -> fmt::Result { writeln!(w, r#"#[cfg(feature = "{feature}")]"#) }