Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support one byte characters in CMaps #32

Merged
merged 4 commits into from
Apr 17, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 61 additions & 7 deletions src/font.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::marker::PhantomData;

use super::*;

/// Writer for a _Type-1 font dictionary_.
Expand Down Expand Up @@ -653,13 +655,17 @@ impl<'a> Cmap<'a> {
deref!('a, Cmap<'a> => Stream<'a>, stream);

/// A builder for a `/ToUnicode` character map stream.
pub struct UnicodeCmap {
pub struct UnicodeCmap<G = u16> {
buf: Vec<u8>,
mappings: Vec<u8>,
count: i32,
glyph_id: PhantomData<G>,
}

impl UnicodeCmap {
impl<G> UnicodeCmap<G>
where
G: GlyphId,
{
/// Create a new, empty unicode character map.
pub fn new(name: Name, info: SystemInfo) -> Self {
// https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5014.CIDFont_Spec.pdf
Expand Down Expand Up @@ -710,25 +716,34 @@ impl UnicodeCmap {

// We just cover the whole unicode codespace.
buf.extend(b"1 begincodespacerange\n");
buf.extend(b"<0000> <ffff>\n");
buf.push(b'<');
G::MIN.push(&mut buf);
buf.extend(b"> <");
G::MAX.push(&mut buf);
buf.extend(b">\n");
buf.extend(b"endcodespacerange\n");

Self { buf, mappings: vec![], count: 0 }
Self {
buf,
mappings: vec![],
count: 0,
glyph_id: PhantomData,
}
}

/// Add a mapping from a glyph ID to a codepoint.
pub fn pair(&mut self, glyph: u16, codepoint: char) {
pub fn pair(&mut self, glyph: G, codepoint: char) {
self.pair_with_multiple(glyph, [codepoint]);
}

/// Add a mapping from a glyph ID to multiple codepoints.
pub fn pair_with_multiple(
&mut self,
glyph: u16,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
) {
self.mappings.push(b'<');
self.mappings.push_hex_u16(glyph);
glyph.push(&mut self.mappings);
self.mappings.extend(b"> <");

for c in codepoints {
Expand Down Expand Up @@ -775,6 +790,45 @@ impl UnicodeCmap {
}
}

/// Type3 fonts require (in Acrobat at least) IDs in CMaps to be encoded with
/// one byte only, whereas other font types use two bytes.
///
/// This trait provides an abstraction to support both.
pub trait GlyphId: private::Sealed {}

impl GlyphId for u8 {}

impl GlyphId for u16 {}

/// Module to seal the `GlyphId` trait.
mod private {
use crate::buf::BufExt;

pub trait Sealed {
const MIN: Self;
const MAX: Self;
fn push(self, buf: &mut Vec<u8>);
}

impl Sealed for u8 {
const MIN: Self = u8::MIN;
const MAX: Self = u8::MAX;

fn push(self, buf: &mut Vec<u8>) {
buf.push_hex(self);
}
}

impl Sealed for u16 {
const MIN: Self = u16::MIN;
const MAX: Self = u16::MAX;

fn push(self, buf: &mut Vec<u8>) {
buf.push_hex_u16(self);
}
}
}

/// Specifics about a character collection.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub struct SystemInfo<'a> {
Expand Down
Loading