Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

update FileWriter to use XRef Streams in files that already use them

* several PDF readers seem to cope fine when opening a PDF that was
  originally saved with an XRef stream and has an incremental update
  that uses a traditional xref table. Adobe is not one of them, it
  refuses to open such files.
* The only solution is to detect which xref style is in use and then
  write any incremental updates in the appropriate style
  • Loading branch information...
commit bef6986f776b83b71946d02c265b750871dca629 1 parent e876b17
@yob authored
View
4 lib/pdf/reader/file_hash.rb
@@ -264,6 +264,10 @@ def sec_handler?
!!sec_handler
end
+ def traditional_xref?
+ @xref.traditional?
+ end
+
private
def build_security_handler(opts = {})
View
82 lib/pdf/reader/file_writer.rb
@@ -33,34 +33,61 @@ def copy_to_io(writer)
writer.write @objects.io.read
end
- def add_updated_objects_and_xref(writer)
+ def add_updated_objects(writer)
# now write the updated objects
offsets = {}
@objects.each_updated do |key, value|
offsets[key] = writer.pos
writer.write "#{key.id} #{key.gen} obj\n"
writer.write PdfObject.dump(value)
- writer.write "\nendobj\n"
+ writer.write "\r\nendobj\r\n"
end
+ offsets
+ end
- updated_xref_pos = writer.pos
- writer.write "xref\n"
+ def add_traditional_xref(writer, offsets)
+ writer.write "xref\r\n"
each_offset_group(offsets) do |group|
starts_at = group.keys.sort.first.id
- writer.write("#{starts_at} #{group.size}\n")
+ writer.write("#{starts_at} #{group.size}\r\n")
group.each do |key, offset|
- writer.write("%010d 00000 n \n" % offset)
+ writer.write("%010d 00000 n\r\n" % offset)
end
end
- updated_xref_pos
end
- def add_new_trailer(writer, xref_offset)
- writer.write "trailer\n"
- writer.write PdfObject.dump(@objects.trailer) << "\n"
+ def add_traditional_trailer(writer, xref_offset)
+ writer.write "trailer\r\n"
+ writer.write PdfObject.dump(@objects.trailer) << "\r\n"
+ writer.write "startxref\r\n"
+ writer.write "#{xref_offset}\r\n"
+ writer.write "%%EOF\r\n"
+ end
+
+ def add_stream_trailer(writer, xref_offset)
writer.write "startxref\n"
writer.write "#{xref_offset}\n"
- writer.write "%%EOF"
+ writer.write "%%EOF\n"
+ end
+
+ def add_stream_xref(writer, offsets)
+ xref_offset = writer.pos
+ max_id = @objects.keys.sort.last.id
+ reference = PDF::Reader::Reference.new(max_id, 0)
+ offsets[reference] = xref_offset
+ stream_data, index = build_xref_stream_data(offsets)
+ dict = @objects.trailer.merge(
+ :Type => :XRef,
+ :Length => stream_data.size,
+ :Index => index,
+ :W => [1,4,1],
+ :Size => @objects.keys.sort.last.id+1)
+ writer.write "#{max_id} 0 obj\n"
+ writer.write PdfObject.dump(dict) << "\n"
+ writer.write "stream\n"
+ writer.write stream_data + "\n"
+ writer.write "endstream\n"
+ writer.write "endobj\n"
end
def incremental_save_to_io(writer)
@@ -69,11 +96,38 @@ def incremental_save_to_io(writer)
writer.write @objects.io.read
writer.write "\n"
- xref_offset = add_updated_objects_and_xref(writer)
- add_new_trailer(writer, xref_offset)
+ # write the updated and new objects
+ offsets = add_updated_objects(writer)
+
+ # write a new xref table (or stream) and trailer
+ xref_offset = writer.pos
+ if @objects.traditional_xref? # if traditional xref
+ add_traditional_xref(writer, offsets)
+ add_traditional_trailer(writer, xref_offset)
+ else
+ add_stream_xref(writer, offsets)
+ add_stream_trailer(writer, xref_offset)
+ end
end
- private
+ def build_xref_stream_data(offsets)
+ data = StringIO.new
+ index = []
+ each_offset_group(offsets) do |group|
+ starts_at = group.keys.sort.first.id
+ index << starts_at
+ index << group.size
+ group.each do |key, offset|
+ data.write [1, offset, 0].pack("CNC")
+ end
+ end
+
+ if "".respond_to?(:force_encoding)
+ return data.string.force_encoding("binary"), index
+ else
+ return data.string, index
+ end
+ end
def each_offset_group(offsets, &block)
keys = offsets.keys.sort
View
2  lib/pdf/reader/object_hash.rb
@@ -39,7 +39,7 @@ class ObjectHash
def_delegators :@objects, :size, :length, :has_key?, :include?, :key?, :empty?
def_delegators :@objects, :member?, :value?, :has_value?, :keys, :values
def_delegators :@objects, :values_at, :page_references
- def_delegators :encrypted?, :sec_handler?
+ def_delegators :@objects, :encrypted?, :sec_handler?, :traditional_xref?
def initialize(input, opts = {})
@io = extract_io_from(input)
View
6 lib/pdf/reader/xref.rb
@@ -57,6 +57,7 @@ def initialize (io)
@io = io
@junk_offset = calc_junk_offset(io) || 0
@xref = {}
+ @traditional = true
@trailer = load_offsets
end
@@ -85,6 +86,10 @@ def each(&block)
end
end
################################################################################
+ def traditional?
+ @traditional
+ end
+ ################################################################################
private
################################################################################
# Read a xref table from the underlying buffer.
@@ -160,6 +165,7 @@ def load_xref_stream(stream)
trailer = Hash[stream.hash.select { |key, value|
[:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
}]
+ @traditional = false
widths = stream.hash[:W]
entry_length = widths.inject(0) { |s, w| s + w }
Please sign in to comment.
Something went wrong with that request. Please try again.