Permalink
Browse files

Merge pull request #25 from elmatou/master

rules the binary content of damn pdfs
  • Loading branch information...
2 parents f783243 + 2cdde00 commit d7a2baf6e8bade7e7dd53a2e9e7427468e8ce742 @tcocca tcocca committed Dec 19, 2011
Showing with 39 additions and 40 deletions.
  1. +24 −37 spec/active_pdftk/wrapper_spec.rb
  2. +14 −2 spec/support/inputs_helper.rb
  3. +1 −1 spec/support/matchers/content_matcher.rb
@@ -10,7 +10,7 @@ def get_input(input_type, file_name = 'spec.fields.pdf')
when :hash
{path_to_pdf(file_name) => nil}
when :file
- File.new(path_to_pdf(file_name))
+ File.new(path_to_pdf(file_name), 'rb')
when :tempfile
t = Tempfile.new('input.spec')
t.write(File.read(path_to_pdf(file_name)))
@@ -25,7 +25,7 @@ def get_output(output_type)
when :path
path_to_pdf('output.spec')
when :file
- File.new(path_to_pdf('output.spec'), 'w+')
+ File.new(path_to_pdf('output.spec'), 'wb+')
when :tempfile
Tempfile.new('output.spec')
when :stringio
@@ -63,14 +63,17 @@ def map_output_type(output_specified)
end
end
- shared_examples "a working command" do
+ shared_examples "a command" do
it "should return a #{@output.nil? ? StringIO : @output.class}" do
@call_output.should be_kind_of(@output.nil? ? StringIO : @output.class)
end
it "should return expected data" do
if example.metadata[:genesis] && @output.is_a?(String)
FileUtils.copy_entry(@output, @example_expect.to_s, true, false, true)
+ elsif example.metadata[:cleanup]
+ #cleanup_file_content!(File.open(@output, 'r:binary').read).should == cleanup_file_content!(File.open(@example_expect, 'r:binary').read) if @output.is_a?(String) # lets keep this line for debugging purpose.
+ @call_output.should look_like_the_same_pdf_as(@example_expect)
else
@call_output.should have_the_content_of(@example_expect)
end
@@ -79,22 +82,6 @@ def map_output_type(output_specified)
after(:each) { remove_output(@call_output) }
end
- shared_examples "a combination command" do
- it "should return a #{@output.nil? ? StringIO : @output.class}" do
- @call_output.should be_kind_of(@output.nil? ? StringIO : @output.class)
- end
-
- it "should return expected data" do
- if example.metadata[:genesis] && @output.is_a?(String)
- FileUtils.copy_entry(@output, @example_expect.to_s, true, false, true)
- else
- @call_output.should look_like_the_same_pdf_as(@example_expect)
- end
- end
-
- after(:each) { remove_output(@call_output) }
- end
-
inputs.each do |input_type|
outputs.each do |output_type|
@@ -106,39 +93,39 @@ def map_output_type(output_specified)
end
describe "#dump_data_fields" do
- it_behaves_like "a working command" do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('dump_data_fields/expect.data_fields') }
before(:each) { @call_output = @pdftk.dump_data_fields(@input, :output => @output) }
end
end
describe "#fill_form" do
- it_behaves_like "a working command" do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('fill_form/expect.pdf') }
before(:each) { @call_output = @pdftk.fill_form(@input, path_to_pdf('fill_form/spec.fdf'), :output => @output) }
end
- it_behaves_like "a working command" do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('fill_form/expect.pdf') }
before(:each) { @call_output = @pdftk.fill_form(@input, path_to_pdf('fill_form/spec.xfdf'), :output => @output) }
end
end
describe "#generate_fdf" do
- it_behaves_like "a working command" do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('generate_fdf/expect.fdf') }
before(:each) { @call_output = @pdftk.generate_fdf(@input,:output => @output) }
end
end
describe "#dump_data" do
- it_behaves_like "a working command" do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('dump_data/expect.data') }
before(:each) { @call_output = @pdftk.dump_data(@input,:output => @output) }
end
end
describe "#update_info" do
- it_behaves_like "a working command" do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('update_info/expect.pdf') }
before(:each) { @call_output = @pdftk.update_info(@input, path_to_pdf('update_info/spec.data'), :output => @output) }
end
@@ -200,43 +187,43 @@ def map_output_type(output_specified)
end
end
- describe "#background" do
- it_behaves_like "a working command" do
+ describe "#background", :cleanup => true do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('background/expect.pdf') }
before(:each) { @call_output = @pdftk.background(get_input(input_type, 'multi.pdf'), path_to_pdf('poly.pdf'), :output => @output) }
end
end
- describe "#multibackground" do
- it_behaves_like "a working command" do
+ describe "#multibackground", :cleanup => true do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('multibackground/expect.pdf') }
before(:each) { @call_output = @pdftk.multibackground(get_input(input_type, 'multi.pdf'), path_to_pdf('poly.pdf'), :output => @output) }
end
end
- describe "#stamp" do
- it_behaves_like "a working command" do
+ describe "#stamp", :cleanup => true do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('stamp/expect.pdf') }
before(:each) { @call_output = @pdftk.stamp(get_input(input_type, 'multi.pdf'), path_to_pdf('poly.pdf'), :output => @output) }
end
end
- describe "#multistamp" do
- it_behaves_like "a working command" do
+ describe "#multistamp", :cleanup => true do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('multistamp/expect.pdf') }
before(:each) { @call_output = @pdftk.multistamp(get_input(input_type, 'multi.pdf'), path_to_pdf('poly.pdf'), :output => @output) }
end
end
- describe "#cat" do
- it_behaves_like "a combination command" do
+ describe "#cat", :cleanup => true do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('cat/expect.pdf')}
before(:each) { @call_output = @pdftk.cat([{:pdf => path_to_pdf('multi.pdf')}, {:pdf => path_to_pdf('poly.pdf'), :start => 1, :end => 'end', :orientation => 'N', :pages => 'even'}], :output => @output) }
end
end
- describe "#shuffle" do
- it_behaves_like "a combination command" do
+ describe "#shuffle", :cleanup => true do
+ it_behaves_like "a command" do
before(:all) { @example_expect = fixtures_path('shuffle/expect.pdf')}
before(:each) { @call_output = @pdftk.shuffle([{:pdf => path_to_pdf('multi.pdf')}, {:pdf => path_to_pdf('poly.pdf'), :start => 1, :end => 'end', :orientation => 'N', :pages => 'even'}], :output => @output) }
end
@@ -59,9 +59,21 @@ def open_or_rewind(target)
end
def cleanup_file_content!(text)
+
+ unless @filter
+ @filter = {
+ :date => /\(D\:.*\)/, # Remove dates ex: /CreationDate (D:20111106104455-05'00')
+ :ids => /\/ID \[<\w*><\w*>\]/, # Remove ID values ex: /ID [<4ba02a4cf55b1fc842299e6f01eb838e><33bec7dc37839cadf7ab76f3be4d4306>]
+ :stream => /stream .* 9|10 0 obj /m, # Remove some binary stream
+ :content => /\/Contents \[.*\]/,
+ :xref => /^\d{10} \d{5} n|f $/ # Remove Cross-references dictionnary
+ }
+ @filter.each {|k,reg| @filter[k] = Regexp.new(reg.source.encode('ASCII-8BIT'), reg.options) if reg.source.respond_to? :encode }
+ end
+
+
text.force_encoding('ASCII-8BIT') if text.respond_to? :force_encoding # PDF embed some binary data breaking gsub with ruby 1.9.2
- text.gsub!(/\(D\:.*\)/, '') # Remove dates ex: /CreationDate (D:20111106104455-05'00')
- text.gsub!(/\/ID \[<\w*><\w*>\]/, '') # Remove ID values ex: /ID [<4ba02a4cf55b1fc842299e6f01eb838e><33bec7dc37839cadf7ab76f3be4d4306>]
+ @filter.each {|k,reg| text.gsub!(reg, '')}
text
end
@@ -59,7 +59,7 @@ def sha256_hash_of_almost(entry)
if entry.directory?
sha256_hash_of_almost(Dir.new(entry))
elsif entry.file?
- sha256_hash_of_almost(File.read(entry))
+ sha256_hash_of_almost(File.open(entry, 'r:binary').read)
end
end
end

0 comments on commit d7a2baf

Please sign in to comment.