Permalink
Browse files

Merge remote-tracking branch 'alexdowad/master'

Conflicts:
	lib/pdf/reader/encoding.rb
  • Loading branch information...
2 parents d853c40 + 23672a8 commit 9d8214ddf87d522869d9e16e2c2e99922b2e597c @yob committed Sep 9, 2012
Showing with 156 additions and 132 deletions.
  1. +6 −6 lib/pdf/reader/buffer.rb
  2. +16 −27 lib/pdf/reader/encoding.rb
  3. +110 −46 lib/pdf/reader/page_state.rb
  4. +1 −1 lib/pdf/reader/page_text_receiver.rb
  5. +23 −52 lib/pdf/reader/parser.rb
@@ -1,4 +1,4 @@
-# coding: utf-8
+# coding: ASCII-8BIT
################################################################################
#
@@ -236,7 +236,7 @@ def prepare_hex_token
if byte.nil?
finished = true # unbalanced params
elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
- str << byte.chr
+ str << byte
elsif byte <= 32
# ignore it
else
@@ -266,15 +266,15 @@ def prepare_literal_token
if byte.nil?
count = 0 # unbalanced params
elsif byte == 0x5C
- str << byte.chr << @io.getbyte.chr
+ str << byte << @io.getbyte
elsif byte == 0x28 # "("
str << "("
count += 1
elsif byte == 0x29 # ")"
count -= 1
str << ")" unless count == 0
else
- str << byte.chr unless count == 0
+ str << byte unless count == 0
end
end
@@ -327,7 +327,7 @@ def prepare_regular_token
@io.getbyte
@tokens << ">>"
else
- @tokens << byte.chr
+ @tokens << ">"
end
tok = ""
break
@@ -351,7 +351,7 @@ def prepare_regular_token
tok = ""
break
else
- tok << byte.chr
+ tok << byte
end
end
@@ -34,6 +34,9 @@ class Encoding # :nodoc:
attr_reader :unpack
def initialize(enc)
+ @mapping = {} # maps from character codes to Unicode codepoints
+ # also maps control and invalid chars to UNKNOWN_CHAR
+
if enc.kind_of?(Hash)
self.differences = enc[:Differences] if enc[:Differences]
enc = enc[:Encoding] || enc[:BaseEncoding]
@@ -46,7 +49,9 @@ def initialize(enc)
@enc_name = enc
@unpack = get_unpack(enc)
@map_file = get_mapping_file(enc)
+
load_mapping(@map_file) if @map_file
+ add_control_chars_to_mapping
end
# set the differences table for this encoding. should be an array in the following format:
@@ -68,13 +73,15 @@ def differences=(diff)
byte = val.to_i
else
@differences[byte] = val
+ @mapping[byte] = names_to_unicode[val]
byte += 1
end
end
@differences
end
def differences
+ # this method is only used by the spec tests
@differences ||= {}
end
@@ -111,28 +118,8 @@ def little_boxes(times)
end
def convert_to_utf8(str)
- ret = str.unpack(unpack).map { |c|
- differences[c] || c
- }.map { |c|
- mapping[c] || c
- }.map { |c|
- names_to_unicode[c] || c
- }.map { |c|
- if PDF::Reader::Encoding::CONTROL_CHARS.include?(c)
- PDF::Reader::Encoding::UNKNOWN_CHAR
- else
- c
- end
- }.map { |c|
- if c.nil? || !c.is_a?(Fixnum)
- PDF::Reader::Encoding::UNKNOWN_CHAR
- else
- c
- end
- }.pack("U*")
-
+ ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
-
ret
end
@@ -166,12 +153,8 @@ def get_mapping_file(enc)
end
end
- def mapping
- @mapping ||= {}
- end
-
def has_mapping?
- mapping.size > 0
+ @mapping.size > 0
end
def names_to_unicode
@@ -185,10 +168,16 @@ def load_mapping(file)
File.open(file, mode) do |f|
f.each do |l|
m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
- mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
+ @mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
end
end
end
+ def add_control_chars_to_mapping
+ PDF::Reader::Encoding::CONTROL_CHARS.each do |byte|
+ @mapping[byte] = PDF::Reader::Encoding::UNKNOWN_CHAR
+ end
+ @mapping[nil] = PDF::Reader::Encoding::UNKNOWN_CHAR
+ end
end
end
@@ -1,22 +1,19 @@
# coding: utf-8
-require 'matrix'
-
module PDF
class Reader
class PageState
DEFAULT_GRAPHICS_STATE = {
- :ctm => Matrix.identity(3),
- :char_spacing => 0,
- :word_spacing => 0,
- :h_scaling => 100,
- :text_leading => 0,
- :text_font => nil,
+ :char_spacing => 0,
+ :word_spacing => 0,
+ :h_scaling => 100,
+ :text_leading => 0,
+ :text_font => nil,
:text_font_size => nil,
- :text_mode => 0,
- :text_rise => 0,
- :text_knockout => 0
+ :text_mode => 0,
+ :text_rise => 0,
+ :text_knockout => 0
}
# starting a new page
@@ -28,6 +25,7 @@ def initialize(page)
@xobject_stack = [page.xobjects]
@cs_stack = [page.color_spaces]
@stack = [DEFAULT_GRAPHICS_STATE.dup]
+ state[:ctm] = identity_matrix
end
#####################################################
@@ -54,30 +52,24 @@ def restore_graphics_state
# with the new matrix to form the updated matrix.
#
def concatenate_matrix(a, b, c, d, e, f)
- transform = Matrix[
- [a, b, 0],
- [c, d, 0],
- [e, f, 1]
- ]
if state[:ctm]
- state[:ctm] = transform * state[:ctm]
+ multiply!(state[:ctm], a,b,0, c,d,0, e,f,1)
else
- state[:ctm] = transform
+ state[:ctm] = [a,b,0, c,d,0, e,f,1]
end
+ @text_rendering_matrix = nil # invalidate cached value
end
#####################################################
# Text Object Operators
#####################################################
def begin_text_object
- @text_matrix = Matrix.identity(3)
- @text_line_matrix = Matrix.identity(3)
+ @text_matrix = identity_matrix
end
def end_text_object
- @text_matrix = Matrix.identity(3)
- @text_line_matrix = Matrix.identity(3)
+ # don't need to do anything
end
#####################################################
@@ -98,7 +90,7 @@ def set_text_font_and_size(label, size)
end
def font_size
- state[:text_font_size] * @text_matrix[0,0]
+ state[:text_font_size] * @text_matrix[0]
end
def set_text_leading(leading)
@@ -122,12 +114,18 @@ def set_word_spacing(word_spacing)
#####################################################
def move_text_position(x, y) # Td
- temp_matrix = Matrix[
- [1, 0, 0],
- [0, 1, 0],
- [x, y, 1]
- ]
- @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
+ # multiply the following matrix by @text_matrix,
+ # and store the result back into @text_matrix:
+ # 1 0 0
+ # 0 1 0
+ # x y 1
+ # (matrix multiplication code has been inlined for performance)
+
+ a2,b2,c2, d2,e2,f2, g2,h2,i2 = @text_matrix
+ @text_matrix[6] = (x * a2) + (y * d2) + g2
+ @text_matrix[7] = (x * b2) + (y * e2) + h2
+ @text_matrix[8] = (x * c2) + (y * f2) + i2
+ @text_rendering_matrix = nil # invalidate cached value
end
def move_text_position_and_set_leading(x, y) # TD
@@ -136,11 +134,12 @@ def move_text_position_and_set_leading(x, y) # TD
end
def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
- @text_matrix = @text_line_matrix = Matrix[
- [a, b, 0],
- [c, d, 0],
- [e, f, 1]
+ @text_matrix = [
+ a, b, 0,
+ c, d, 0,
+ e, f, 1
]
+ @text_rendering_matrix = nil # invalidate cached value
end
def move_to_start_of_next_line # T*
@@ -199,8 +198,8 @@ def invoke_xobject(label)
#
def ctm_transform(x, y, z = 1)
[
- (ctm[0,0] * x) + (ctm[1,0] * y) + (ctm[2,0] * z),
- (ctm[0,1] * x) + (ctm[1,1] * y) + (ctm[2,1] * z)
+ (ctm[0] * x) + (ctm[3] * y) + (ctm[6] * z),
+ (ctm[1] * x) + (ctm[4] * y) + (ctm[7] * z)
]
end
@@ -210,8 +209,8 @@ def ctm_transform(x, y, z = 1)
def trm_transform(x, y, z = 1)
trm = text_rendering_matrix
[
- (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
- (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
+ (trm[0] * x) + (trm[2] * y) + (trm[4] * z),
+ (trm[1] * x) + (trm[3] * y) + (trm[5] * z)
]
end
@@ -240,18 +239,51 @@ def find_xobject(label)
dict ? dict[label] : nil
end
- private
-
def text_rendering_matrix
- state_matrix = Matrix[
- [font_size * state[:h_scaling], 0, 0],
- [0, font_size, 0],
- [0, state[:text_rise], 1]
- ]
-
- state_matrix * @text_matrix * ctm
+ @text_rendering_matrix ||= begin
+ # original code:
+ # state_matrix = [
+ # font_size * state[:h_scaling], 0, 0,
+ # 0, font_size, 0,
+ # 0, state[:text_rise], 1
+ # ]
+ # multiply!(state_matrix, *@text_matrix)
+ # multiply!(state_matrix, *ctm)
+
+ # (matrix multiplication has been inlined for performance)
+ # (we also take advantage of the fact that the top-right and middle-right
+ # elements of @text_matrix are always zero, the top-right and
+ # middle-right elements of ctm are always zero, and the bottom-right
+ # element of ctm is always one)
+ # (also, the right-hand column of state_matrix will never be used)
+
+ a1,b1,c1, d1,e1,f1, g1,h1,i1 = @text_matrix # c1 and f1 will always be 0
+ a2,b2,c2, d2,e2,f2, g2,h2,i2 = ctm # c2 and f2 will always be 0, i2 will always be 1
+
+ scaled_font_size = font_size * state[:h_scaling]
+ text_rise = state[:text_rise]
+ scaled_font_size_a1 = scaled_font_size * a1
+ scaled_font_size_b1 = scaled_font_size * b1
+ font_size_d1 = font_size * d1
+ font_size_e1 = font_size * e1
+ text_rise_d1 = (text_rise * d1) + g1
+ text_rise_e1 = (text_rise * e1) + h1
+
+ [
+ (scaled_font_size_a1 * a2) + (scaled_font_size_b1 * d2),
+ (scaled_font_size_a1 * b2) + (scaled_font_size_b1 * e2),
+ # 0, # omitted, next index represents middle-left
+ (font_size_d1 * a2) + (font_size_e1 * d2),
+ (font_size_d1 * b2) + (font_size_e1 * e2),
+ # 0, # omitted, next index represents bottom-left
+ (text_rise_d1 * a2) + (text_rise_e1 * d2) + (i1 * g2),
+ (text_rise_d1 * b2) + (text_rise_e1 * e2) + (i1 * h2)
+ ]
+ end
end
+ private
+
# return the current transformation matrix
#
def ctm
@@ -290,6 +322,38 @@ def clone_state
end
end
+ #####################################################
+ # Low-level Matrix Operations
+ #####################################################
+
+ # This class uses 3x3 matrices to represent geometric transformations
+ # These matrices are represented by arrays with 9 elements
+ # The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
+ # a b c
+ # d e f
+ # g h i
+
+ def identity_matrix
+ [1,0,0, 0,1,0, 0,0,1]
+ end
+
+ # multiply two 3x3 matrices
+ # the second is represented by the last 9 scalar arguments
+ # store the results back into the first (to avoid allocating memory)
+ #
+ def multiply!(m1, a2,b2,c2, d2,e2,f2, g2,h2,i2)
+ a1,b1,c1, d1,e1,f1, g1,h1,i1 = m1
+ m1[0] = (a1 * a2) + (b1 * d2) + (c1 * g2)
+ m1[1] = (a1 * b2) + (b1 * e2) + (c1 * h2)
+ m1[2] = (a1 * c2) + (b1 * f2) + (c1 * i2)
+ m1[3] = (d1 * a2) + (e1 * d2) + (f1 * g2)
+ m1[4] = (d1 * b2) + (e1 * e2) + (f1 * h2)
+ m1[5] = (d1 * c2) + (e1 * f2) + (f1 * i2)
+ m1[6] = (g1 * a2) + (h1 * d2) + (i1 * g2)
+ m1[7] = (g1 * b2) + (h1 * e2) + (i1 * h2)
+ m1[8] = (g1 * c2) + (h1 * f2) + (i1 * i2)
+ m1
+ end
end
end
end
Oops, something went wrong. Retry.

0 comments on commit 9d8214d

Please sign in to comment.