Skip to content
Browse files

update FileWriter to use XRef Streams in files that already use them

* several PDF readers seem to cope fine when opening a PDF that was
  originally saved with an XRef stream and has an incremental update
  that uses a traditional xref table. Adobe is not one of them, it
  refuses to open such files.
* The only solution is to detect which xref style is in use and then
  write any incremental updates in the appropriate style
  • Loading branch information...
1 parent e876b17 commit bef6986f776b83b71946d02c265b750871dca629 @yob committed Jan 24, 2013
Showing with 79 additions and 15 deletions.
  1. +4 −0 lib/pdf/reader/file_hash.rb
  2. +68 −14 lib/pdf/reader/file_writer.rb
  3. +1 −1 lib/pdf/reader/object_hash.rb
  4. +6 −0 lib/pdf/reader/xref.rb
View
4 lib/pdf/reader/file_hash.rb
@@ -264,6 +264,10 @@ def sec_handler?
!!sec_handler
end
+ def traditional_xref?
+ @xref.traditional?
+ end
+
private
def build_security_handler(opts = {})
View
82 lib/pdf/reader/file_writer.rb
@@ -33,34 +33,61 @@ def copy_to_io(writer)
writer.write @objects.io.read
end
- def add_updated_objects_and_xref(writer)
+ def add_updated_objects(writer)
# now write the updated objects
offsets = {}
@objects.each_updated do |key, value|
offsets[key] = writer.pos
writer.write "#{key.id} #{key.gen} obj\n"
writer.write PdfObject.dump(value)
- writer.write "\nendobj\n"
+ writer.write "\r\nendobj\r\n"
end
+ offsets
+ end
- updated_xref_pos = writer.pos
- writer.write "xref\n"
+ def add_traditional_xref(writer, offsets)
+ writer.write "xref\r\n"
each_offset_group(offsets) do |group|
starts_at = group.keys.sort.first.id
- writer.write("#{starts_at} #{group.size}\n")
+ writer.write("#{starts_at} #{group.size}\r\n")
group.each do |key, offset|
- writer.write("%010d 00000 n \n" % offset)
+ writer.write("%010d 00000 n\r\n" % offset)
end
end
- updated_xref_pos
end
- def add_new_trailer(writer, xref_offset)
- writer.write "trailer\n"
- writer.write PdfObject.dump(@objects.trailer) << "\n"
+ def add_traditional_trailer(writer, xref_offset)
+ writer.write "trailer\r\n"
+ writer.write PdfObject.dump(@objects.trailer) << "\r\n"
+ writer.write "startxref\r\n"
+ writer.write "#{xref_offset}\r\n"
+ writer.write "%%EOF\r\n"
+ end
+
+ def add_stream_trailer(writer, xref_offset)
writer.write "startxref\n"
writer.write "#{xref_offset}\n"
- writer.write "%%EOF"
+ writer.write "%%EOF\n"
+ end
+
+ def add_stream_xref(writer, offsets)
+ xref_offset = writer.pos
+ max_id = @objects.keys.sort.last.id
+ reference = PDF::Reader::Reference.new(max_id, 0)
+ offsets[reference] = xref_offset
+ stream_data, index = build_xref_stream_data(offsets)
+ dict = @objects.trailer.merge(
+ :Type => :XRef,
+ :Length => stream_data.size,
+ :Index => index,
+ :W => [1,4,1],
+ :Size => @objects.keys.sort.last.id+1)
+ writer.write "#{max_id} 0 obj\n"
+ writer.write PdfObject.dump(dict) << "\n"
+ writer.write "stream\n"
+ writer.write stream_data + "\n"
+ writer.write "endstream\n"
+ writer.write "endobj\n"
end
def incremental_save_to_io(writer)
@@ -69,11 +96,38 @@ def incremental_save_to_io(writer)
writer.write @objects.io.read
writer.write "\n"
- xref_offset = add_updated_objects_and_xref(writer)
- add_new_trailer(writer, xref_offset)
+ # write the updated and new objects
+ offsets = add_updated_objects(writer)
+
+ # write a new xref table (or stream) and trailer
+ xref_offset = writer.pos
+ if @objects.traditional_xref? # if traditional xref
+ add_traditional_xref(writer, offsets)
+ add_traditional_trailer(writer, xref_offset)
+ else
+ add_stream_xref(writer, offsets)
+ add_stream_trailer(writer, xref_offset)
+ end
end
- private
+ def build_xref_stream_data(offsets)
+ data = StringIO.new
+ index = []
+ each_offset_group(offsets) do |group|
+ starts_at = group.keys.sort.first.id
+ index << starts_at
+ index << group.size
+ group.each do |key, offset|
+ data.write [1, offset, 0].pack("CNC")
+ end
+ end
+
+ if "".respond_to?(:force_encoding)
+ return data.string.force_encoding("binary"), index
+ else
+ return data.string, index
+ end
+ end
def each_offset_group(offsets, &block)
keys = offsets.keys.sort
View
2 lib/pdf/reader/object_hash.rb
@@ -39,7 +39,7 @@ class ObjectHash
def_delegators :@objects, :size, :length, :has_key?, :include?, :key?, :empty?
def_delegators :@objects, :member?, :value?, :has_value?, :keys, :values
def_delegators :@objects, :values_at, :page_references
- def_delegators :encrypted?, :sec_handler?
+ def_delegators :@objects, :encrypted?, :sec_handler?, :traditional_xref?
def initialize(input, opts = {})
@io = extract_io_from(input)
View
6 lib/pdf/reader/xref.rb
@@ -57,6 +57,7 @@ def initialize (io)
@io = io
@junk_offset = calc_junk_offset(io) || 0
@xref = {}
+ @traditional = true
@trailer = load_offsets
end
@@ -85,6 +86,10 @@ def each(&block)
end
end
################################################################################
+ def traditional?
+ @traditional
+ end
+ ################################################################################
private
################################################################################
# Read a xref table from the underlying buffer.
@@ -160,6 +165,7 @@ def load_xref_stream(stream)
trailer = Hash[stream.hash.select { |key, value|
[:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
}]
+ @traditional = false
widths = stream.hash[:W]
entry_length = widths.inject(0) { |s, w| s + w }

0 comments on commit bef6986

Please sign in to comment.
Something went wrong with that request. Please try again.