Skip to content

Commit

Permalink
Get subsetting working with MacRoman and cp1252 encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
jamis committed Dec 30, 2008
1 parent 7362354 commit f695434
Show file tree
Hide file tree
Showing 8 changed files with 262 additions and 21 deletions.
88 changes: 88 additions & 0 deletions lib/ttfunk/encoding/mac_roman.rb
@@ -0,0 +1,88 @@
module TTFunk
module Encoding
class MacRoman
TO_UNICODE = Hash.new { |h,k| k }
TO_UNICODE.update(
0x81 => 0x00C5, 0x82 => 0x00C7, 0x83 => 0x00C9, 0x84 => 0x00D1, 0x85 => 0x00D6,
0x86 => 0x00DC, 0x87 => 0x00E1, 0x88 => 0x00E0, 0x89 => 0x00E2, 0x8A => 0x00E4,
0x8B => 0x00E3, 0x8C => 0x00E5, 0x8D => 0x00E7, 0x8E => 0x00E9, 0x8F => 0x00E8,
0x90 => 0x00EA, 0x91 => 0x00EB, 0x92 => 0x00ED, 0x93 => 0x00EC, 0x94 => 0x00EE,
0x95 => 0x00EF, 0x96 => 0x00F1, 0x97 => 0x00F3, 0x98 => 0x00F2, 0x99 => 0x00F4,
0x9A => 0x00F6, 0x9B => 0x00F5, 0x9C => 0x00FA, 0x9D => 0x00F9, 0x9E => 0x00FB,
0x9F => 0x00FC, 0xA0 => 0x2020, 0xA1 => 0x00B0, 0xA4 => 0x00A7, 0xA5 => 0x2022,
0xA6 => 0x00B6, 0xA7 => 0x00DF, 0xA8 => 0x00AE, 0xAA => 0x2122, 0xAB => 0x00B4,
0xAC => 0x00A8, 0xAD => 0x2260, 0xAE => 0x00C6, 0xAF => 0x00D8, 0xB0 => 0x221E,
0xB2 => 0x2264, 0xB3 => 0x2265, 0xB4 => 0x00A5, 0xB6 => 0x2202, 0xB7 => 0x2211,
0xB8 => 0x220F, 0xB9 => 0x03C0, 0xBA => 0x222B, 0xBB => 0x00AA, 0xBC => 0x00BA,
0xBD => 0x03A9, 0xBE => 0x00E6, 0xBF => 0x00F8, 0xC0 => 0x00BF, 0xC1 => 0x00A1,
0xC2 => 0x00AC, 0xC3 => 0x221A, 0xC4 => 0x0192, 0xC5 => 0x2248, 0xC6 => 0x2206,
0xC7 => 0x00AB, 0xC8 => 0x00BB, 0xC9 => 0x2026, 0xCA => 0x00A0, 0xCB => 0x00C0,
0xCC => 0x00C3, 0xCD => 0x00D5, 0xCE => 0x0152, 0xCF => 0x0153, 0xD0 => 0x2013,
0xD1 => 0x2014, 0xD2 => 0x201C, 0xD3 => 0x201D, 0xD4 => 0x2018, 0xD5 => 0x2019,
0xD6 => 0x00F7, 0xD7 => 0x25CA, 0xD8 => 0x00FF, 0xD9 => 0x0178, 0xDA => 0x2044,
0xDB => 0x20AC, 0xDC => 0x2039, 0xDD => 0x203A, 0xDE => 0xFB01, 0xDF => 0xFB02,
0xE0 => 0x2021, 0xE1 => 0x00B7, 0xE2 => 0x201A, 0xE3 => 0x201E, 0xE4 => 0x2030,
0xE5 => 0x00C2, 0xE6 => 0x00CA, 0xE7 => 0x00C1, 0xE8 => 0x00CB, 0xE9 => 0x00C8,
0xEA => 0x00CD, 0xEB => 0x00CE, 0xEC => 0x00CF, 0xED => 0x00CC, 0xEE => 0x00D3,
0xEF => 0x00D4, 0xF0 => 0xF8FF, 0xF1 => 0x00D2, 0xF2 => 0x00DA, 0xF3 => 0x00DB,
0xF4 => 0x00D9, 0xF5 => 0x0131, 0xF6 => 0x02C6, 0xF7 => 0x02DC, 0xF8 => 0x00AF,
0xF9 => 0x02D8, 0xFA => 0x02D9, 0xFB => 0x02DA, 0xFC => 0x00B8, 0xFD => 0x02DD,
0xFE => 0x02DB, 0xFF => 0x02C7
)

FROM_UNICODE = Hash.new { |h,k| k > 0xFF ? nil : k }
FROM_UNICODE.update(TO_UNICODE.invert)

# Maps MacRoman codes to their corresponding index in the Postscript glyph
# table (see TTFunk::Table::Post::Format10). If any entry in this array is a string,
# it is a postscript glyph that is not in the standard list, and which should be
# emitted specially in the TTF postscript table ('post', see format 2).
POSTSCRIPT_GLYPH_MAPPING = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x0F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x1F
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, # 0x2F
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, # 0x3F
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, # 0x4F
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, # 0x5F
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, # 0x6F
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 0, # 0x7F
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, # 0x8F
114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, # 0x9F
130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, # 0xAF
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, # 0xBF
162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, # 0xCF
178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, "Euro", 190, 191, 192, 193, # 0xDF
194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, # 0xEF
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225 # 0xFF
]

def self.covers?(character)
!FROM_UNICODE[character].nil?
end

def self.to_utf8(string)
to_unicode_codepoints(string.unpack("C*")).pack("U*")
end

def self.to_unicode(string)
to_unicode_codepoints(string.unpack("C*")).pack("n*")
end

def self.from_utf8(string)
from_unicode_codepoints(string.unpack("U*")).pack("C*")
end

def self.from_unicode(string)
from_unicode_codepoints(string.unpack("n*")).pack("C*")
end

def self.to_unicode_codepoints(array)
array.map { |code| TO_UNICODE[code] }
end

def self.from_unicode_codepoints(array)
array.map { |code| FROM_UNICODE[code] || 0 }
end
end
end
end
69 changes: 69 additions & 0 deletions lib/ttfunk/encoding/windows_1252.rb
@@ -0,0 +1,69 @@
module TTFunk
module Encoding
class Windows1252
TO_UNICODE = Hash.new { |h,k| k }
TO_UNICODE.update(
0x80 => 0x20AC, 0x82 => 0x201A, 0x83 => 0x0192, 0x84 => 0x201E, 0x85 => 0x2026,
0x86 => 0x2020, 0x87 => 0x2021, 0x88 => 0x02C6, 0x89 => 0x2030, 0x8A => 0x0160,
0x8B => 0x2039, 0x8C => 0x0152, 0x8E => 0x017D, 0x91 => 0x2018, 0x92 => 0x2019,
0x93 => 0x201C, 0x94 => 0x201D, 0x95 => 0x2022, 0x96 => 0x2013, 0x97 => 0x2014,
0x98 => 0x02DC, 0x99 => 0x2122, 0x9A => 0x0161, 0x9B => 0x203A, 0x9C => 0x0152,
0x9E => 0x017E, 0x9F => 0x0178
)

FROM_UNICODE = Hash.new { |h,k| k > 0xFF ? nil : k }
FROM_UNICODE.update(TO_UNICODE.invert)

# Maps Windows-1252 codes to their corresponding index in the Postscript glyph
# table (see TTFunk::Table::Post::Format10). If any entry in this array is a string,
# it is a postscript glyph that is not in the standard list, and which should be
# emitted specially in the TTF postscript table ('post', see format 2).
POSTSCRIPT_GLYPH_MAPPING = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 0,
"Euro", 0, 196, 166, 197, 171, 130, 194, 216, 198, 228, 190, 176, 0, 230, 0,
0, 182, 183, 180, 181, 135, 178, 179, 217, 140, 229, 191, 177, 0, 231, 186,
3, 163, 132, 133, 189, 150, 232, 134, 142, 139, 157, 169, 164, 16, 138, 218,
131, 147, 242, 243, 141, 151, 136, 195, 222, 241, 158, 170, 245, 244, 246, 162,
173, 201, 199, 174, 98, 99, 144, 100, 203, 101, 200, 202, 207, 204, 205, 206,
233, 102, 211, 208, 209, 175, 103, 240, 145, 214, 212, 213, 104, 235, 237, 137,
106, 105, 107, 109, 108, 110, 160, 111, 113, 112, 114, 115, 117, 116, 118, 119,
234, 120, 122, 121, 123, 125, 124, 184, 161, 127, 126, 128, 129, 236, 238, 186
]

def self.covers?(character)
!FROM_UNICODE[character].nil?
end

def self.to_utf8(string)
to_unicode_codepoints(string.unpack("C*")).pack("U*")
end

def self.to_unicode(string)
to_unicode_codepoints(string.unpack("C*")).pack("n*")
end

def self.from_utf8(string)
from_unicode_codepoints(string.unpack("U*")).pack("C*")
end

def self.from_unicode(string)
from_unicode_codepoints(string.unpack("n*")).pack("C*")
end

def self.to_unicode_codepoints(array)
array.map { |code| TO_UNICODE[code] }
end

def self.from_unicode_codepoints(array)
array.map { |code| FROM_UNICODE[code] || 0 }
end
end
end
end
48 changes: 39 additions & 9 deletions lib/ttfunk/subset.rb
Expand Up @@ -4,28 +4,51 @@
require 'ttfunk/table/hmtx'
require 'ttfunk/table/kern'
require 'ttfunk/table/loca'
require 'ttfunk/encoding/mac_roman'
require 'ttfunk/encoding/windows_1252'

module TTFunk
class Subset
attr_reader :original
attr_reader :encoding

def initialize(original)
def initialize(original, encoding)
@original = original
@subset = Set.new([0])
@encoding = encoding
@subset = Set.new
end

def use(characters)
@subset.merge(characters)
def use(character)
@subset << character
end

def encode
def covers?(character)
case @encoding
when :unicode then true
when :mac_roman then Encoding::MacRoman.covers?(character)
when :windows_1252 then Encoding::Windows1252.covers?(character)
else false
end
end

def from_unicode(character)
case @encoding
when :unicode then character
when :mac_roman then Encoding::MacRoman::FROM_UNICODE[character]
when :windows_1252 then Encoding::Windows1252::FROM_UNICODE[character]
else nil
end
end

def encode(options={})
cmap = original.cmap.unicode.first

# map unicode -> corresponding glyph id in original font
charmap = @subset.inject({}) { |map, code| map[code] = cmap[code]; map }
cmap_table = TTFunk::Table::Cmap.encode(charmap)
cmap_table = TTFunk::Table::Cmap.encode(charmap, @encoding)

glyph_ids = @subset.map { |character| cmap[character] }
glyphs = collect_glyphs(glyph_ids)
glyph_ids = @subset.map { |character| cmap[character] } << 0
glyphs = collect_glyphs(glyph_ids.uniq)

old2new_glyph = cmap_table[:charmap].inject({}) { |map, (code, ids)| map[ids[:old]] = ids[:new]; map }
next_glyph_id = cmap_table[:max_glyph_id]
Expand All @@ -41,7 +64,6 @@ def encode

glyf_table = TTFunk::Table::Glyf.encode(glyphs, new2old_glyph, old2new_glyph)
loca_table = TTFunk::Table::Loca.encode(glyf_table[:offsets])
kern_table = TTFunk::Table::Kern.encode(original.kerning, old2new_glyph)
hmtx_table = TTFunk::Table::Hmtx.encode(original.horizontal_metrics, new2old_glyph)
hhea_table = TTFunk::Table::Hhea.encode(original.horizontal_header, hmtx_table)
maxp_table = TTFunk::Table::Maxp.encode(original.maximum_profile, old2new_glyph)
Expand All @@ -50,6 +72,14 @@ def encode
name_table = TTFunk::Table::Name.encode(original.name)
head_table = TTFunk::Table::Head.encode(original.header, loca_table)

# for PDF's, the kerning info is all included in the PDF as the text is
# drawn. Thus, the PDF readers do not actually use the kerning info in
# embedded fonts. If the library is used for something else, the generated
# subfont may need a kerning table... in that case, you need to opt into it.
if options[:kerning]
kern_table = TTFunk::Table::Kern.encode(original.kerning, old2new_glyph)
end

tables = { 'cmap' => cmap_table[:table],
'glyf' => glyf_table[:table],
'loca' => loca_table[:table],
Expand Down
4 changes: 2 additions & 2 deletions lib/ttfunk/table/cmap.rb
Expand Up @@ -4,8 +4,8 @@ class Cmap < Table
attr_reader :version
attr_reader :tables

def self.encode(charmap)
result = Cmap::Subtable.encode(charmap)
def self.encode(charmap, encoding)
result = Cmap::Subtable.encode(charmap, encoding)

# pack 'version' and 'table-count'
result[:table] = [0, 1, result.delete(:subtable)].pack("nnA*")
Expand Down
32 changes: 32 additions & 0 deletions lib/ttfunk/table/cmap/format00.rb
@@ -1,3 +1,6 @@
require 'ttfunk/encoding/mac_roman'
require 'ttfunk/encoding/windows_1252'

module TTFunk
class Table
class Cmap
Expand All @@ -6,6 +9,35 @@ module Format00
attr_reader :language
attr_reader :code_map

# Expects a hash mapping unicode character codes to glyph ids (where the
# glyph ids are from the original font). Returns a hash including
# a new map (:charmap) that maps the characters in charmap to a
# another hash containing both the old (:old) and new (:new) glyph
# ids. The returned hash also includes a :subtable key, which contains
# the encoded subtable for the given charmap.
def self.encode(charmap, encoding)
translator = case encoding
when :mac_roman then TTFunk::Encoding::MacRoman
else raise NotImplementedError, "encoding #{encoding.inspect} is not supported"
end

next_id = 0
glyph_indexes = Array.new(256, 0)
glyph_map = { 0 => 0 }

new_map = charmap.keys.sort.inject({}) do |map, code|
glyph_map[charmap[code]] ||= next_id += 1
map[code] = { :old => charmap[code], :new => glyph_map[charmap[code]] }
glyph_indexes[translator::FROM_UNICODE[code]] = glyph_map[charmap[code]]
map
end

# format, length, language, indices
subtable = [0, 262, 0, *glyph_indexes].pack("nnnC*")

{ :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id+1 }
end

def [](code)
@code_map[code] || 0
end
Expand Down
16 changes: 10 additions & 6 deletions lib/ttfunk/table/cmap/format04.rb
Expand Up @@ -12,19 +12,23 @@ module Format04
# another hash containing both the old (:old) and new (:new) glyph
# ids. The returned hash also includes a :subtable key, which contains
# the encoded subtable for the given charmap.
def self.encode(charmap)
def self.encode(charmap, encoding)
end_codes = []
start_codes = []
next_id = 0
last = nil
last = difference = nil

glyph_map = { 0 => 0 }
new_map = charmap.keys.sort.inject({}) do |map, code|
map[code] = { :old => charmap[code], :new => next_id }
next_id += 1
old = charmap[code]
glyph_map[old] ||= next_id += 1
map[code] = { :old => old, :new => glyph_map[old] }

if last.nil? || code != last+1
delta = glyph_map[old] - code
if last.nil? || delta != difference
end_codes << last if last
start_codes << code
difference = delta
end
last = code

Expand Down Expand Up @@ -72,7 +76,7 @@ def self.encode(charmap)
subtable << end_codes.pack("n*") << "\0\0" << start_codes.pack("n*")
subtable << deltas.pack("n*") << range_offsets.pack("n*") << glyph_indices.pack("n*")

{ :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id }
{ :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id+1 }
end

def [](code)
Expand Down
24 changes: 21 additions & 3 deletions lib/ttfunk/table/cmap/subtable.rb
Expand Up @@ -10,10 +10,28 @@ class Subtable
attr_reader :encoding_id
attr_reader :format

def self.encode(charmap)
result = Format04.encode(charmap)
ENCODING_MAPPINGS = {
:windows_1252 => { :platform_id => 0, :encoding_id => 0 },
:mac_roman => { :platform_id => 1, :encoding_id => 0 },
:unicode => { :platform_id => 0, :encoding_id => 0 }
}

def self.encode(charmap, encoding)
case encoding
when :mac_roman
result = Format00.encode(charmap, encoding)
when :windows_1252, :unicode
result = Format04.encode(charmap, encoding)
else
raise NotImplementedError, "encoding #{encoding.inspect} is not supported"
end

mapping = ENCODING_MAPPINGS[encoding]

# platform-id, encoding-id, offset
result[:subtable] = [0, 0, 12, result[:subtable]].pack("nnNA*")
result[:subtable] = [mapping[:platform_id], mapping[:encoding_id],
12, result[:subtable]].pack("nnNA*")

return result
end

Expand Down
2 changes: 1 addition & 1 deletion lib/ttfunk/table/hmtx.rb
Expand Up @@ -9,7 +9,7 @@ class Hmtx < Table

def self.encode(hmtx, mapping)
metrics = mapping.keys.sort.map do |new_id|
metric = hmtx.for(new_id)
metric = hmtx.for(mapping[new_id])
[metric.advance_width, metric.left_side_bearing]
end

Expand Down

0 comments on commit f695434

Please sign in to comment.