diff --git a/packages/preview/auto-canto/0.2.3/LICENSE b/packages/preview/auto-canto/0.2.3/LICENSE new file mode 100644 index 0000000000..3c5fb63ae1 --- /dev/null +++ b/packages/preview/auto-canto/0.2.3/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Vincent Tam + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/preview/auto-canto/0.2.3/README.md b/packages/preview/auto-canto/0.2.3/README.md new file mode 100644 index 0000000000..f378e5a5e3 --- /dev/null +++ b/packages/preview/auto-canto/0.2.3/README.md @@ -0,0 +1,147 @@ +## auto-canto + +This Typst package provides automatic Cantonese segmentation and romanization +(Jyutping (粵拼) and Yale (耶魯)) by wrapping the +[`rust-canto`](https://crates.io/crates/rust-canto) Rust crate as a WebAssembly +plugin. It integrates seamlessly with the +[`pycantonese-parser`](https://github.com/VincentTam/pycantonese-parser/) +package to render beautiful Cantonese text with ruby characters. + +--- + +### Features + +* **Automatic Segmentation**: Breaks Cantonese sentences into meaningful words +using a dictionary-based trie. +* **Multiple Romanizations**: Supports both **Jyutping** and **Yale** (numeric +or diacritics). +* **High Performance**: Powered by a Rust-compiled WASM plugin for fast +processing. +* **Typst Integration**: Provides a `quick-render` function that handles both +segmentation and styling in one go. + +--- + +### Usage + +To use this package, ensure the `rust_canto.wasm` file is in your project directory. + +```typ +#import "@preview/auto-canto:0.2.3": quick-render + +// 36pt font +// use Libertinus Serif first (for ruby text) +// before falling back to Noto Serif CJK HK (for Chinese characters) +#set text(36pt, font: ("Libertinus Serif", "Noto Serif CJK HK")) + +// 1. Basic rendering (defaults to Jyutping) +#quick-render[都會大學入面3%人識用AB膠] + +// 2. Rendering with Yale romanization +#quick-render(romanization: "yale")[ +平時會成日睇書 +] + +// 3. Customizing the underlying parser's style +#let my-text = "廣東話好難學" +#let my-style = (rb-size: 0.7em, rb-color: blue) +#let quicker-renderer = quick-render.with(style: my-style, visual-tones: false) +#quicker-renderer(my-text) +``` + +![example output](example.png) + +Live demo on YouTube: https://youtu.be/ivUu91eDfvY + +#### Jyutcitzi (粵切字) support (optional) + +This package can render Jyutcizi above Chinese characters, provided that the +user has imported the +[`se-jyutcitzi`](https://typst.app/universe/package/se-jyutcitzi) Typst package. + +To ensure that a clean dependency, the user has to pass the `jyutcitzi()` +function from `se-jyutcitzi` package to the `jyutcit-ruby()` function in this +package. + +```typ +#import "@preview/se-jyutcitzi:0.3.2": * +#import "@preview/auto-canto:0.2.3": * +// #set page(height: auto, width: auto, margin: 1pt) +#set text(24pt, font: "Chiron GoRound TC") +#set par(justify: true) + +// Customize Jyutcitzi display +#let default-style = ( + rb-color: rgb("#ff0000"), // Annotation text color + rb-size: 0.8em, // Annotation text size + word-sep: 0.2em, // Chinese words separation + char-jp-sep: 0.2em, // vertical space between words and Jyutping above +) + +#let mytxt = [ + 你識唔識講廣東話?就算你識講廣東話都好,都可以遇到啲好𠮩𠹌嘅字,就算係粵語母語者都好,都未必識得寫,最後要用abcd先得,就好似「bibu車」噉。 + + 所以,我呢個package一定幫到你。仲唔快啲下載? +] +#jyutcit-ruby(mytxt, jyutcitzi: jyutcitzi) +``` + +![example Jyutcitzi output](jyutcitzi.png) + +--- + +### API Reference + +#### `quick-render(it, ..args)` + +The primary high-level function. It fetches data from the WASM plugin and +forwards it to the parser. + +* `it`: The item containing the Cantonese string to process. +* `..args`: Named arguments forwarded to [`render-word-groups`](https://github.com/VincentTam/pycantonese-parser/blob/7ed67e5d/src/renderer.typ#L10-L15) +(e.g. `romanization`, `style`). + +#### `jyutcit-ruby(it, jyutcitzi: none, style: (:))` + +Renders Cantonese text with **Jyutcitzi** annotations above each word. + +* **Note**: Requiresthe +[`jyutcitzi`](https://github.com/VincentTam/jyutcitzi/blob/f0083491/lib.typ#L5) +function from the `se-jyutcitzi` package passed as an argument. +* `it`: The item containing the Cantonese string to process. +* `jyutcitzi`: Named argument for the Jyutcitzi function. +* `style`: a dictionary for the following four keys + - `rb-color`: ruby text color + - `rb-size`: ruby text size (in em) + - `word-sep`: horizontal separation between words (in em) + - `char-jp-sep`: vertical separation between ruby text and main text (in em) + +#### `annotate(txt)` + +Returns the raw segmented data as an array of dictionaries. + +* **Return format**: `array` of `{word: str, jyutping: str, yale: array}`. + +#### `to-yale-numeric(jp-str)` / `to-yale-diacritics(jp-str)` + +Utility functions to convert space-delimited Jyutping strings into Yale format. + +* `numeric`: "gwong2 dung1 waa2" → "gwong2 dung1 wa2". +* `diacritics`: "gwong2 dung1 waa2" → "gwóngdūngwá". + +--- + +### Project Structure + +* `lib.typ`: The main entry point containing the Typst wrappers. +* `rust_canto.wasm`: The WebAssembly binary compiled from the `rust-canto` +crate. +* `typst.toml`: Package metadata and dependencies. + +### License + +MIT + +### Contributing + +Contributions are welcome! Please open an issue or submit a pull request. diff --git a/packages/preview/auto-canto/0.2.3/example.png b/packages/preview/auto-canto/0.2.3/example.png new file mode 100644 index 0000000000..ec4dccad2c Binary files /dev/null and b/packages/preview/auto-canto/0.2.3/example.png differ diff --git a/packages/preview/auto-canto/0.2.3/jyutcitzi.png b/packages/preview/auto-canto/0.2.3/jyutcitzi.png new file mode 100644 index 0000000000..b7e0ae53c5 Binary files /dev/null and b/packages/preview/auto-canto/0.2.3/jyutcitzi.png differ diff --git a/packages/preview/auto-canto/0.2.3/lib.typ b/packages/preview/auto-canto/0.2.3/lib.typ new file mode 100644 index 0000000000..85c33f5f36 --- /dev/null +++ b/packages/preview/auto-canto/0.2.3/lib.typ @@ -0,0 +1,102 @@ +#import "@preview/canto-parser:0.2.1": * + +// Load the plugin +#let canto = plugin("rust_canto.wasm") + +/// Internal helper to convert string to bytes for the WASM plugin +#let _to_bytes(txt) = { + if type(txt) == str { bytes(txt) } else { txt } +} + +/// Internal helper to extract text +#let _extract-text(it) = { + if type(it) == str { + it + } else if type(it) == content { + if it == parbreak() { + "\n\n" // double newline → WASM plugin's segmenter emits two \n tokens + } else if it == linebreak() { + "\n" // single newline → WASM plugin's segmenter emits one \n token + } else if it.has("text") { + it.text + } else if it.has("children") { + it.children.map(_extract-text).join("") + } else if it.has("body") { + _extract-text(it.body) + } else { + "" + } + } else { + "" + } +} + +/// Annotates text into a list of dictionaries containing word, jyutping, and Yale. +/// Returns: array of {word: str, jyutping: str, yale: array} +#let annotate(txt) = { + json(canto.annotate(_to_bytes(txt))) +} + +/// Converts a space-delimited Jyutping string to Yale with tone numbers. +/// Example: "gwong2 dung1 waa2" → "gwong2 dung1 wa2" +#let to-yale-numeric(jp-str) = { + str(canto.to_yale_numeric(_to_bytes(jp-str))) +} + +/// Converts a space-delimited Jyutping string to Yale with diacritics. +/// Example: "gwong2 dung1 waa2" → "gwóngdūngwá" +#let to-yale-diacritics(jp-str) = { + str(canto.to_yale_diacritics(_to_bytes(jp-str))) +} + +/// A flexible wrapper that segments text and forwards all styling +/// parameters to the parser's rendering function. +/// - it: The item containing Cantonese string to process +/// - args: Captures named arguments like romanization: "yale" or "jyutping" +#let quick-render(it, ..args) = { + // 1. Extract text from item + let txt = _extract-text(it) + + // 2. Get the data from the WASM plugin + let data = json(canto.annotate(bytes(txt))) + + // 3. Forward the data and all extra arguments to the parser + render-word-groups(data, ..args) +} + +/// Render Cantonese text with jyutcitzi annotations above each word. +/// The caller must pass the `jyutcitzi` function from @preview/se-jyutcitzi. +/// +/// Example: +/// #import "@preview/se-jyutcitzi:0.3.2": jyutcitzi +/// #import "@preview/auto-canto:0.2.1": jyutcit-ruby +/// #jyutcit-ruby(it, jyutcitzi: jyutcitzi) +#let jyutcit-ruby(it, jyutcitzi: none, style: (:)) = { + assert(jyutcitzi != none, message: "jyutcit-ruby requires the jyutcitzi function from @preview/se-jyutcitzi") + let default-style = ( + rb-color: rgb("#ff0000"), + rb-size: 0.8em, + word-sep: 0.2em, + char-jp-sep: 0.2em, + ) + let s = default-style + style + let data = annotate(_extract-text(it)) + [ + #for item in data { + if item.word == "\n" { text[\ ]; continue } + let ruby-txt = jyutcitzi(item.jyutping) + if ruby-txt != none { + box(stack( + dir: ttb, + spacing: s.char-jp-sep, + align(center, text(s.rb-size, s.rb-color, ruby-txt)), + align(bottom + center, box(height: 1em, text(1em, item.word))), + )) + } else { + text(1em, item.word) + } + h(s.word-sep) + } + #h(-s.word-sep) + ] +} diff --git a/packages/preview/auto-canto/0.2.3/rust_canto.wasm b/packages/preview/auto-canto/0.2.3/rust_canto.wasm new file mode 100644 index 0000000000..de599252f2 Binary files /dev/null and b/packages/preview/auto-canto/0.2.3/rust_canto.wasm differ diff --git a/packages/preview/auto-canto/0.2.3/typst.toml b/packages/preview/auto-canto/0.2.3/typst.toml new file mode 100644 index 0000000000..9c2253bb45 --- /dev/null +++ b/packages/preview/auto-canto/0.2.3/typst.toml @@ -0,0 +1,28 @@ +[package] +name = "auto-canto" +version = "0.2.3" +entrypoint = "lib.typ" +authors = ["Vincent Tam "] +license = "MIT" +description = "Automatic conversion to Cantonese romanizations from Chinese characters." +repository = "https://github.com/VincentTam/auto-canto" +keywords = [ + "Cantonese", + "jyutping", + "yale", + "Chinese", + "conversion", +] +categories = [ + "text", + "languages", + "utility", +] +disciplines = [ + "education", + "linguistics", +] +compiler = "0.14.0" +exclude = ["example.png"] + +[tool]