Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Get subsetting working with MacRoman and cp1252 encodings

  • Loading branch information...
commit f6954342b5060ce48f6dd14a4ac84343a8fe1bb2 1 parent 7362354
@jamis jamis authored
View
88 lib/ttfunk/encoding/mac_roman.rb
@@ -0,0 +1,88 @@
+module TTFunk
+ module Encoding
+ class MacRoman
+ TO_UNICODE = Hash.new { |h,k| k }
+ TO_UNICODE.update(
+ 0x81 => 0x00C5, 0x82 => 0x00C7, 0x83 => 0x00C9, 0x84 => 0x00D1, 0x85 => 0x00D6,
+ 0x86 => 0x00DC, 0x87 => 0x00E1, 0x88 => 0x00E0, 0x89 => 0x00E2, 0x8A => 0x00E4,
+ 0x8B => 0x00E3, 0x8C => 0x00E5, 0x8D => 0x00E7, 0x8E => 0x00E9, 0x8F => 0x00E8,
+ 0x90 => 0x00EA, 0x91 => 0x00EB, 0x92 => 0x00ED, 0x93 => 0x00EC, 0x94 => 0x00EE,
+ 0x95 => 0x00EF, 0x96 => 0x00F1, 0x97 => 0x00F3, 0x98 => 0x00F2, 0x99 => 0x00F4,
+ 0x9A => 0x00F6, 0x9B => 0x00F5, 0x9C => 0x00FA, 0x9D => 0x00F9, 0x9E => 0x00FB,
+ 0x9F => 0x00FC, 0xA0 => 0x2020, 0xA1 => 0x00B0, 0xA4 => 0x00A7, 0xA5 => 0x2022,
+ 0xA6 => 0x00B6, 0xA7 => 0x00DF, 0xA8 => 0x00AE, 0xAA => 0x2122, 0xAB => 0x00B4,
+ 0xAC => 0x00A8, 0xAD => 0x2260, 0xAE => 0x00C6, 0xAF => 0x00D8, 0xB0 => 0x221E,
+ 0xB2 => 0x2264, 0xB3 => 0x2265, 0xB4 => 0x00A5, 0xB6 => 0x2202, 0xB7 => 0x2211,
+ 0xB8 => 0x220F, 0xB9 => 0x03C0, 0xBA => 0x222B, 0xBB => 0x00AA, 0xBC => 0x00BA,
+ 0xBD => 0x03A9, 0xBE => 0x00E6, 0xBF => 0x00F8, 0xC0 => 0x00BF, 0xC1 => 0x00A1,
+ 0xC2 => 0x00AC, 0xC3 => 0x221A, 0xC4 => 0x0192, 0xC5 => 0x2248, 0xC6 => 0x2206,
+ 0xC7 => 0x00AB, 0xC8 => 0x00BB, 0xC9 => 0x2026, 0xCA => 0x00A0, 0xCB => 0x00C0,
+ 0xCC => 0x00C3, 0xCD => 0x00D5, 0xCE => 0x0152, 0xCF => 0x0153, 0xD0 => 0x2013,
+ 0xD1 => 0x2014, 0xD2 => 0x201C, 0xD3 => 0x201D, 0xD4 => 0x2018, 0xD5 => 0x2019,
+ 0xD6 => 0x00F7, 0xD7 => 0x25CA, 0xD8 => 0x00FF, 0xD9 => 0x0178, 0xDA => 0x2044,
+ 0xDB => 0x20AC, 0xDC => 0x2039, 0xDD => 0x203A, 0xDE => 0xFB01, 0xDF => 0xFB02,
+ 0xE0 => 0x2021, 0xE1 => 0x00B7, 0xE2 => 0x201A, 0xE3 => 0x201E, 0xE4 => 0x2030,
+ 0xE5 => 0x00C2, 0xE6 => 0x00CA, 0xE7 => 0x00C1, 0xE8 => 0x00CB, 0xE9 => 0x00C8,
+ 0xEA => 0x00CD, 0xEB => 0x00CE, 0xEC => 0x00CF, 0xED => 0x00CC, 0xEE => 0x00D3,
+ 0xEF => 0x00D4, 0xF0 => 0xF8FF, 0xF1 => 0x00D2, 0xF2 => 0x00DA, 0xF3 => 0x00DB,
+ 0xF4 => 0x00D9, 0xF5 => 0x0131, 0xF6 => 0x02C6, 0xF7 => 0x02DC, 0xF8 => 0x00AF,
+ 0xF9 => 0x02D8, 0xFA => 0x02D9, 0xFB => 0x02DA, 0xFC => 0x00B8, 0xFD => 0x02DD,
+ 0xFE => 0x02DB, 0xFF => 0x02C7
+ )
+
+ FROM_UNICODE = Hash.new { |h,k| k > 0xFF ? nil : k }
+ FROM_UNICODE.update(TO_UNICODE.invert)
+
+ # Maps MacRoman codes to their corresponding index in the Postscript glyph
+ # table (see TTFunk::Table::Post::Format10). If any entry in this array is a string,
+ # it is a postscript glyph that is not in the standard list, and which should be
+ # emitted specially in the TTF postscript table ('post', see format 2).
+ POSTSCRIPT_GLYPH_MAPPING = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x0F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 0x1F
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, # 0x2F
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, # 0x3F
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, # 0x4F
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, # 0x5F
+ 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, # 0x6F
+ 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 0, # 0x7F
+ 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, # 0x8F
+ 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, # 0x9F
+ 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, # 0xAF
+ 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, # 0xBF
+ 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, # 0xCF
+ 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, "Euro", 190, 191, 192, 193, # 0xDF
+ 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, # 0xEF
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225 # 0xFF
+ ]
+
+ def self.covers?(character)
+ !FROM_UNICODE[character].nil?
+ end
+
+ def self.to_utf8(string)
+ to_unicode_codepoints(string.unpack("C*")).pack("U*")
+ end
+
+ def self.to_unicode(string)
+ to_unicode_codepoints(string.unpack("C*")).pack("n*")
+ end
+
+ def self.from_utf8(string)
+ from_unicode_codepoints(string.unpack("U*")).pack("C*")
+ end
+
+ def self.from_unicode(string)
+ from_unicode_codepoints(string.unpack("n*")).pack("C*")
+ end
+
+ def self.to_unicode_codepoints(array)
+ array.map { |code| TO_UNICODE[code] }
+ end
+
+ def self.from_unicode_codepoints(array)
+ array.map { |code| FROM_UNICODE[code] || 0 }
+ end
+ end
+ end
+end
View
69 lib/ttfunk/encoding/windows_1252.rb
@@ -0,0 +1,69 @@
+module TTFunk
+ module Encoding
+ class Windows1252
+ TO_UNICODE = Hash.new { |h,k| k }
+ TO_UNICODE.update(
+ 0x80 => 0x20AC, 0x82 => 0x201A, 0x83 => 0x0192, 0x84 => 0x201E, 0x85 => 0x2026,
+ 0x86 => 0x2020, 0x87 => 0x2021, 0x88 => 0x02C6, 0x89 => 0x2030, 0x8A => 0x0160,
+ 0x8B => 0x2039, 0x8C => 0x0152, 0x8E => 0x017D, 0x91 => 0x2018, 0x92 => 0x2019,
+ 0x93 => 0x201C, 0x94 => 0x201D, 0x95 => 0x2022, 0x96 => 0x2013, 0x97 => 0x2014,
+ 0x98 => 0x02DC, 0x99 => 0x2122, 0x9A => 0x0161, 0x9B => 0x203A, 0x9C => 0x0152,
+ 0x9E => 0x017E, 0x9F => 0x0178
+ )
+
+ FROM_UNICODE = Hash.new { |h,k| k > 0xFF ? nil : k }
+ FROM_UNICODE.update(TO_UNICODE.invert)
+
+ # Maps Windows-1252 codes to their corresponding index in the Postscript glyph
+ # table (see TTFunk::Table::Post::Format10). If any entry in this array is a string,
+ # it is a postscript glyph that is not in the standard list, and which should be
+ # emitted specially in the TTF postscript table ('post', see format 2).
+ POSTSCRIPT_GLYPH_MAPPING = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
+ 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+ 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 0,
+ "Euro", 0, 196, 166, 197, 171, 130, 194, 216, 198, 228, 190, 176, 0, 230, 0,
+ 0, 182, 183, 180, 181, 135, 178, 179, 217, 140, 229, 191, 177, 0, 231, 186,
+ 3, 163, 132, 133, 189, 150, 232, 134, 142, 139, 157, 169, 164, 16, 138, 218,
+ 131, 147, 242, 243, 141, 151, 136, 195, 222, 241, 158, 170, 245, 244, 246, 162,
+ 173, 201, 199, 174, 98, 99, 144, 100, 203, 101, 200, 202, 207, 204, 205, 206,
+ 233, 102, 211, 208, 209, 175, 103, 240, 145, 214, 212, 213, 104, 235, 237, 137,
+ 106, 105, 107, 109, 108, 110, 160, 111, 113, 112, 114, 115, 117, 116, 118, 119,
+ 234, 120, 122, 121, 123, 125, 124, 184, 161, 127, 126, 128, 129, 236, 238, 186
+ ]
+
+ def self.covers?(character)
+ !FROM_UNICODE[character].nil?
+ end
+
+ def self.to_utf8(string)
+ to_unicode_codepoints(string.unpack("C*")).pack("U*")
+ end
+
+ def self.to_unicode(string)
+ to_unicode_codepoints(string.unpack("C*")).pack("n*")
+ end
+
+ def self.from_utf8(string)
+ from_unicode_codepoints(string.unpack("U*")).pack("C*")
+ end
+
+ def self.from_unicode(string)
+ from_unicode_codepoints(string.unpack("n*")).pack("C*")
+ end
+
+ def self.to_unicode_codepoints(array)
+ array.map { |code| TO_UNICODE[code] }
+ end
+
+ def self.from_unicode_codepoints(array)
+ array.map { |code| FROM_UNICODE[code] || 0 }
+ end
+ end
+ end
+end
View
48 lib/ttfunk/subset.rb
@@ -4,28 +4,51 @@
require 'ttfunk/table/hmtx'
require 'ttfunk/table/kern'
require 'ttfunk/table/loca'
+require 'ttfunk/encoding/mac_roman'
+require 'ttfunk/encoding/windows_1252'
module TTFunk
class Subset
attr_reader :original
+ attr_reader :encoding
- def initialize(original)
+ def initialize(original, encoding)
@original = original
- @subset = Set.new([0])
+ @encoding = encoding
+ @subset = Set.new
end
- def use(characters)
- @subset.merge(characters)
+ def use(character)
+ @subset << character
end
- def encode
+ def covers?(character)
+ case @encoding
+ when :unicode then true
+ when :mac_roman then Encoding::MacRoman.covers?(character)
+ when :windows_1252 then Encoding::Windows1252.covers?(character)
+ else false
+ end
+ end
+
+ def from_unicode(character)
+ case @encoding
+ when :unicode then character
+ when :mac_roman then Encoding::MacRoman::FROM_UNICODE[character]
+ when :windows_1252 then Encoding::Windows1252::FROM_UNICODE[character]
+ else nil
+ end
+ end
+
+ def encode(options={})
cmap = original.cmap.unicode.first
+ # map unicode -> corresponding glyph id in original font
charmap = @subset.inject({}) { |map, code| map[code] = cmap[code]; map }
- cmap_table = TTFunk::Table::Cmap.encode(charmap)
+ cmap_table = TTFunk::Table::Cmap.encode(charmap, @encoding)
- glyph_ids = @subset.map { |character| cmap[character] }
- glyphs = collect_glyphs(glyph_ids)
+ glyph_ids = @subset.map { |character| cmap[character] } << 0
+ glyphs = collect_glyphs(glyph_ids.uniq)
old2new_glyph = cmap_table[:charmap].inject({}) { |map, (code, ids)| map[ids[:old]] = ids[:new]; map }
next_glyph_id = cmap_table[:max_glyph_id]
@@ -41,7 +64,6 @@ def encode
glyf_table = TTFunk::Table::Glyf.encode(glyphs, new2old_glyph, old2new_glyph)
loca_table = TTFunk::Table::Loca.encode(glyf_table[:offsets])
- kern_table = TTFunk::Table::Kern.encode(original.kerning, old2new_glyph)
hmtx_table = TTFunk::Table::Hmtx.encode(original.horizontal_metrics, new2old_glyph)
hhea_table = TTFunk::Table::Hhea.encode(original.horizontal_header, hmtx_table)
maxp_table = TTFunk::Table::Maxp.encode(original.maximum_profile, old2new_glyph)
@@ -50,6 +72,14 @@ def encode
name_table = TTFunk::Table::Name.encode(original.name)
head_table = TTFunk::Table::Head.encode(original.header, loca_table)
+ # for PDF's, the kerning info is all included in the PDF as the text is
+ # drawn. Thus, the PDF readers do not actually use the kerning info in
+ # embedded fonts. If the library is used for something else, the generated
+ # subfont may need a kerning table... in that case, you need to opt into it.
+ if options[:kerning]
+ kern_table = TTFunk::Table::Kern.encode(original.kerning, old2new_glyph)
+ end
+
tables = { 'cmap' => cmap_table[:table],
'glyf' => glyf_table[:table],
'loca' => loca_table[:table],
View
4 lib/ttfunk/table/cmap.rb
@@ -4,8 +4,8 @@ class Cmap < Table
attr_reader :version
attr_reader :tables
- def self.encode(charmap)
- result = Cmap::Subtable.encode(charmap)
+ def self.encode(charmap, encoding)
+ result = Cmap::Subtable.encode(charmap, encoding)
# pack 'version' and 'table-count'
result[:table] = [0, 1, result.delete(:subtable)].pack("nnA*")
View
32 lib/ttfunk/table/cmap/format00.rb
@@ -1,3 +1,6 @@
+require 'ttfunk/encoding/mac_roman'
+require 'ttfunk/encoding/windows_1252'
+
module TTFunk
class Table
class Cmap
@@ -6,6 +9,35 @@ module Format00
attr_reader :language
attr_reader :code_map
+ # Expects a hash mapping unicode character codes to glyph ids (where the
+ # glyph ids are from the original font). Returns a hash including
+ # a new map (:charmap) that maps the characters in charmap to a
+ # another hash containing both the old (:old) and new (:new) glyph
+ # ids. The returned hash also includes a :subtable key, which contains
+ # the encoded subtable for the given charmap.
+ def self.encode(charmap, encoding)
+ translator = case encoding
+ when :mac_roman then TTFunk::Encoding::MacRoman
+ else raise NotImplementedError, "encoding #{encoding.inspect} is not supported"
+ end
+
+ next_id = 0
+ glyph_indexes = Array.new(256, 0)
+ glyph_map = { 0 => 0 }
+
+ new_map = charmap.keys.sort.inject({}) do |map, code|
+ glyph_map[charmap[code]] ||= next_id += 1
+ map[code] = { :old => charmap[code], :new => glyph_map[charmap[code]] }
+ glyph_indexes[translator::FROM_UNICODE[code]] = glyph_map[charmap[code]]
+ map
+ end
+
+ # format, length, language, indices
+ subtable = [0, 262, 0, *glyph_indexes].pack("nnnC*")
+
+ { :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id+1 }
+ end
+
def [](code)
@code_map[code] || 0
end
View
16 lib/ttfunk/table/cmap/format04.rb
@@ -12,19 +12,23 @@ module Format04
# another hash containing both the old (:old) and new (:new) glyph
# ids. The returned hash also includes a :subtable key, which contains
# the encoded subtable for the given charmap.
- def self.encode(charmap)
+ def self.encode(charmap, encoding)
end_codes = []
start_codes = []
next_id = 0
- last = nil
+ last = difference = nil
+ glyph_map = { 0 => 0 }
new_map = charmap.keys.sort.inject({}) do |map, code|
- map[code] = { :old => charmap[code], :new => next_id }
- next_id += 1
+ old = charmap[code]
+ glyph_map[old] ||= next_id += 1
+ map[code] = { :old => old, :new => glyph_map[old] }
- if last.nil? || code != last+1
+ delta = glyph_map[old] - code
+ if last.nil? || delta != difference
end_codes << last if last
start_codes << code
+ difference = delta
end
last = code
@@ -72,7 +76,7 @@ def self.encode(charmap)
subtable << end_codes.pack("n*") << "\0\0" << start_codes.pack("n*")
subtable << deltas.pack("n*") << range_offsets.pack("n*") << glyph_indices.pack("n*")
- { :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id }
+ { :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id+1 }
end
def [](code)
View
24 lib/ttfunk/table/cmap/subtable.rb
@@ -10,10 +10,28 @@ class Subtable
attr_reader :encoding_id
attr_reader :format
- def self.encode(charmap)
- result = Format04.encode(charmap)
+ ENCODING_MAPPINGS = {
+ :windows_1252 => { :platform_id => 0, :encoding_id => 0 },
+ :mac_roman => { :platform_id => 1, :encoding_id => 0 },
+ :unicode => { :platform_id => 0, :encoding_id => 0 }
+ }
+
+ def self.encode(charmap, encoding)
+ case encoding
+ when :mac_roman
+ result = Format00.encode(charmap, encoding)
+ when :windows_1252, :unicode
+ result = Format04.encode(charmap, encoding)
+ else
+ raise NotImplementedError, "encoding #{encoding.inspect} is not supported"
+ end
+
+ mapping = ENCODING_MAPPINGS[encoding]
+
# platform-id, encoding-id, offset
- result[:subtable] = [0, 0, 12, result[:subtable]].pack("nnNA*")
+ result[:subtable] = [mapping[:platform_id], mapping[:encoding_id],
+ 12, result[:subtable]].pack("nnNA*")
+
return result
end
View
2  lib/ttfunk/table/hmtx.rb
@@ -9,7 +9,7 @@ class Hmtx < Table
def self.encode(hmtx, mapping)
metrics = mapping.keys.sort.map do |new_id|
- metric = hmtx.for(new_id)
+ metric = hmtx.for(mapping[new_id])
[metric.advance_width, metric.left_side_bearing]
end
Please sign in to comment.
Something went wrong with that request. Please try again.