Skip to content

Commit

Permalink
Merge remote-tracking branch 'alexdowad/master'
Browse files Browse the repository at this point in the history
Conflicts:
	lib/pdf/reader/encoding.rb
  • Loading branch information
yob committed Sep 9, 2012
2 parents d853c40 + 23672a8 commit 9d8214d
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 132 deletions.
12 changes: 6 additions & 6 deletions lib/pdf/reader/buffer.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# coding: utf-8
# coding: ASCII-8BIT

################################################################################
#
Expand Down Expand Up @@ -236,7 +236,7 @@ def prepare_hex_token
if byte.nil?
finished = true # unbalanced params
elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
str << byte.chr
str << byte
elsif byte <= 32
# ignore it
else
Expand Down Expand Up @@ -266,15 +266,15 @@ def prepare_literal_token
if byte.nil?
count = 0 # unbalanced params
elsif byte == 0x5C
str << byte.chr << @io.getbyte.chr
str << byte << @io.getbyte
elsif byte == 0x28 # "("
str << "("
count += 1
elsif byte == 0x29 # ")"
count -= 1
str << ")" unless count == 0
else
str << byte.chr unless count == 0
str << byte unless count == 0
end
end

Expand Down Expand Up @@ -327,7 +327,7 @@ def prepare_regular_token
@io.getbyte
@tokens << ">>"
else
@tokens << byte.chr
@tokens << ">"
end
tok = ""
break
Expand All @@ -351,7 +351,7 @@ def prepare_regular_token
tok = ""
break
else
tok << byte.chr
tok << byte
end
end

Expand Down
43 changes: 16 additions & 27 deletions lib/pdf/reader/encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class Encoding # :nodoc:
attr_reader :unpack

def initialize(enc)
@mapping = {} # maps from character codes to Unicode codepoints
# also maps control and invalid chars to UNKNOWN_CHAR

if enc.kind_of?(Hash)
self.differences = enc[:Differences] if enc[:Differences]
enc = enc[:Encoding] || enc[:BaseEncoding]
Expand All @@ -46,7 +49,9 @@ def initialize(enc)
@enc_name = enc
@unpack = get_unpack(enc)
@map_file = get_mapping_file(enc)

load_mapping(@map_file) if @map_file
add_control_chars_to_mapping
end

# set the differences table for this encoding. should be an array in the following format:
Expand All @@ -68,13 +73,15 @@ def differences=(diff)
byte = val.to_i
else
@differences[byte] = val
@mapping[byte] = names_to_unicode[val]
byte += 1
end
end
@differences
end

def differences
# this method is only used by the spec tests
@differences ||= {}
end

Expand Down Expand Up @@ -111,28 +118,8 @@ def little_boxes(times)
end

def convert_to_utf8(str)
ret = str.unpack(unpack).map { |c|
differences[c] || c
}.map { |c|
mapping[c] || c
}.map { |c|
names_to_unicode[c] || c
}.map { |c|
if PDF::Reader::Encoding::CONTROL_CHARS.include?(c)
PDF::Reader::Encoding::UNKNOWN_CHAR
else
c
end
}.map { |c|
if c.nil? || !c.is_a?(Fixnum)
PDF::Reader::Encoding::UNKNOWN_CHAR
else
c
end
}.pack("U*")

ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

ret
end

Expand Down Expand Up @@ -166,12 +153,8 @@ def get_mapping_file(enc)
end
end

def mapping
@mapping ||= {}
end

def has_mapping?
mapping.size > 0
@mapping.size > 0
end

def names_to_unicode
Expand All @@ -185,10 +168,16 @@ def load_mapping(file)
File.open(file, mode) do |f|
f.each do |l|
m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
end
end
end

def add_control_chars_to_mapping
PDF::Reader::Encoding::CONTROL_CHARS.each do |byte|
@mapping[byte] = PDF::Reader::Encoding::UNKNOWN_CHAR
end
@mapping[nil] = PDF::Reader::Encoding::UNKNOWN_CHAR
end
end
end
156 changes: 110 additions & 46 deletions lib/pdf/reader/page_state.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
# coding: utf-8

require 'matrix'

module PDF
class Reader
class PageState

DEFAULT_GRAPHICS_STATE = {
:ctm => Matrix.identity(3),
:char_spacing => 0,
:word_spacing => 0,
:h_scaling => 100,
:text_leading => 0,
:text_font => nil,
:char_spacing => 0,
:word_spacing => 0,
:h_scaling => 100,
:text_leading => 0,
:text_font => nil,
:text_font_size => nil,
:text_mode => 0,
:text_rise => 0,
:text_knockout => 0
:text_mode => 0,
:text_rise => 0,
:text_knockout => 0
}

# starting a new page
Expand All @@ -28,6 +25,7 @@ def initialize(page)
@xobject_stack = [page.xobjects]
@cs_stack = [page.color_spaces]
@stack = [DEFAULT_GRAPHICS_STATE.dup]
state[:ctm] = identity_matrix
end

#####################################################
Expand All @@ -54,30 +52,24 @@ def restore_graphics_state
# with the new matrix to form the updated matrix.
#
def concatenate_matrix(a, b, c, d, e, f)
transform = Matrix[
[a, b, 0],
[c, d, 0],
[e, f, 1]
]
if state[:ctm]
state[:ctm] = transform * state[:ctm]
multiply!(state[:ctm], a,b,0, c,d,0, e,f,1)
else
state[:ctm] = transform
state[:ctm] = [a,b,0, c,d,0, e,f,1]
end
@text_rendering_matrix = nil # invalidate cached value
end

#####################################################
# Text Object Operators
#####################################################

def begin_text_object
@text_matrix = Matrix.identity(3)
@text_line_matrix = Matrix.identity(3)
@text_matrix = identity_matrix
end

def end_text_object
@text_matrix = Matrix.identity(3)
@text_line_matrix = Matrix.identity(3)
# don't need to do anything
end

#####################################################
Expand All @@ -98,7 +90,7 @@ def set_text_font_and_size(label, size)
end

def font_size
state[:text_font_size] * @text_matrix[0,0]
state[:text_font_size] * @text_matrix[0]
end

def set_text_leading(leading)
Expand All @@ -122,12 +114,18 @@ def set_word_spacing(word_spacing)
#####################################################

def move_text_position(x, y) # Td
temp_matrix = Matrix[
[1, 0, 0],
[0, 1, 0],
[x, y, 1]
]
@text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
# multiply the following matrix by @text_matrix,
# and store the result back into @text_matrix:
# 1 0 0
# 0 1 0
# x y 1
# (matrix multiplication code has been inlined for performance)

a2,b2,c2, d2,e2,f2, g2,h2,i2 = @text_matrix
@text_matrix[6] = (x * a2) + (y * d2) + g2
@text_matrix[7] = (x * b2) + (y * e2) + h2
@text_matrix[8] = (x * c2) + (y * f2) + i2
@text_rendering_matrix = nil # invalidate cached value
end

def move_text_position_and_set_leading(x, y) # TD
Expand All @@ -136,11 +134,12 @@ def move_text_position_and_set_leading(x, y) # TD
end

def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
@text_matrix = @text_line_matrix = Matrix[
[a, b, 0],
[c, d, 0],
[e, f, 1]
@text_matrix = [
a, b, 0,
c, d, 0,
e, f, 1
]
@text_rendering_matrix = nil # invalidate cached value
end

def move_to_start_of_next_line # T*
Expand Down Expand Up @@ -199,8 +198,8 @@ def invoke_xobject(label)
#
def ctm_transform(x, y, z = 1)
[
(ctm[0,0] * x) + (ctm[1,0] * y) + (ctm[2,0] * z),
(ctm[0,1] * x) + (ctm[1,1] * y) + (ctm[2,1] * z)
(ctm[0] * x) + (ctm[3] * y) + (ctm[6] * z),
(ctm[1] * x) + (ctm[4] * y) + (ctm[7] * z)
]
end

Expand All @@ -210,8 +209,8 @@ def ctm_transform(x, y, z = 1)
def trm_transform(x, y, z = 1)
trm = text_rendering_matrix
[
(trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
(trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
(trm[0] * x) + (trm[2] * y) + (trm[4] * z),
(trm[1] * x) + (trm[3] * y) + (trm[5] * z)
]
end

Expand Down Expand Up @@ -240,18 +239,51 @@ def find_xobject(label)
dict ? dict[label] : nil
end

private

def text_rendering_matrix
state_matrix = Matrix[
[font_size * state[:h_scaling], 0, 0],
[0, font_size, 0],
[0, state[:text_rise], 1]
]

state_matrix * @text_matrix * ctm
@text_rendering_matrix ||= begin
# original code:
# state_matrix = [
# font_size * state[:h_scaling], 0, 0,
# 0, font_size, 0,
# 0, state[:text_rise], 1
# ]
# multiply!(state_matrix, *@text_matrix)
# multiply!(state_matrix, *ctm)

# (matrix multiplication has been inlined for performance)
# (we also take advantage of the fact that the top-right and middle-right
# elements of @text_matrix are always zero, the top-right and
# middle-right elements of ctm are always zero, and the bottom-right
# element of ctm is always one)
# (also, the right-hand column of state_matrix will never be used)

a1,b1,c1, d1,e1,f1, g1,h1,i1 = @text_matrix # c1 and f1 will always be 0
a2,b2,c2, d2,e2,f2, g2,h2,i2 = ctm # c2 and f2 will always be 0, i2 will always be 1

scaled_font_size = font_size * state[:h_scaling]
text_rise = state[:text_rise]
scaled_font_size_a1 = scaled_font_size * a1
scaled_font_size_b1 = scaled_font_size * b1
font_size_d1 = font_size * d1
font_size_e1 = font_size * e1
text_rise_d1 = (text_rise * d1) + g1
text_rise_e1 = (text_rise * e1) + h1

[
(scaled_font_size_a1 * a2) + (scaled_font_size_b1 * d2),
(scaled_font_size_a1 * b2) + (scaled_font_size_b1 * e2),
# 0, # omitted, next index represents middle-left
(font_size_d1 * a2) + (font_size_e1 * d2),
(font_size_d1 * b2) + (font_size_e1 * e2),
# 0, # omitted, next index represents bottom-left
(text_rise_d1 * a2) + (text_rise_e1 * d2) + (i1 * g2),
(text_rise_d1 * b2) + (text_rise_e1 * e2) + (i1 * h2)
]
end
end

private

# return the current transformation matrix
#
def ctm
Expand Down Expand Up @@ -290,6 +322,38 @@ def clone_state
end
end

#####################################################
# Low-level Matrix Operations
#####################################################

# This class uses 3x3 matrices to represent geometric transformations
# These matrices are represented by arrays with 9 elements
# The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
# a b c
# d e f
# g h i

def identity_matrix
[1,0,0, 0,1,0, 0,0,1]
end

# multiply two 3x3 matrices
# the second is represented by the last 9 scalar arguments
# store the results back into the first (to avoid allocating memory)
#
def multiply!(m1, a2,b2,c2, d2,e2,f2, g2,h2,i2)
a1,b1,c1, d1,e1,f1, g1,h1,i1 = m1
m1[0] = (a1 * a2) + (b1 * d2) + (c1 * g2)
m1[1] = (a1 * b2) + (b1 * e2) + (c1 * h2)
m1[2] = (a1 * c2) + (b1 * f2) + (c1 * i2)
m1[3] = (d1 * a2) + (e1 * d2) + (f1 * g2)
m1[4] = (d1 * b2) + (e1 * e2) + (f1 * h2)
m1[5] = (d1 * c2) + (e1 * f2) + (f1 * i2)
m1[6] = (g1 * a2) + (h1 * d2) + (i1 * g2)
m1[7] = (g1 * b2) + (h1 * e2) + (i1 * h2)
m1[8] = (g1 * c2) + (h1 * f2) + (i1 * i2)
m1
end
end
end
end
Expand Down
Loading

0 comments on commit 9d8214d

Please sign in to comment.