Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

rough experiments in parsing speed with treetop

  • Loading branch information...
commit 32671686b72582888ab98b9fbd7dadfb47e8763f 1 parent 0b64809
@yob authored
View
19 lib/pdf/reader/new_parser.rb
@@ -90,7 +90,7 @@ def to_ruby
class NewParser
Treetop.load(File.join(File.dirname(__FILE__), 'pdf.treetop'))
- attr_reader :pos
+ attr_reader :pos, :count
def initialize(data)
@data = data
@@ -99,15 +99,27 @@ def initialize(data)
@parser.root = :content_stream
@pos = 0
@tokens = []
+ @count = 0
end
def next_token
- 100.times { prepare_tokens } if @tokens.size <= 3
+ 100.times { prepare_tokens } if @tokens.size <= 3 && @pos < @data.bytesize
@tokens.shift
end
def all_tokens
- 100.times { prepare_tokens } if @tokens.size <= 3
+ #100.times { prepare_tokens } if @tokens.size <= 3 && @pos < @data.bytesize
+ @parser.consume_all_input = true
+ tree = @parser.parse(@data, index: @pos)
+ if tree
+ @tokens = tree.elements.select { |obj|
+ obj.respond_to?(:to_ruby)
+ }.map(&:to_ruby)
+ else
+ # If the AST is nil then there was an error during parsing
+ # we need to report a simple error message to help the user
+ raise Exception, "Parse error at offset: #{@parser.index}"
+ end
@tokens
end
@@ -115,6 +127,7 @@ def all_tokens
def prepare_tokens
return if @pos >= @data.bytesize
+ @count += 1
token = @parser.parse(@data, index: @pos)
@pos = @parser.index
View
2  lib/pdf/reader/pdf.treetop
@@ -13,7 +13,7 @@ grammar Pdf
end
rule content_stream
- (comment / base_object / operator / separator)
+ (comment / base_object / operator / separator)*
end
#---------------------------------------------
View
22 tools/bench2.rb
@@ -0,0 +1,22 @@
+# coding: utf-8
+
+
+require 'pdf/reader'
+require 'benchmark'
+require 'stringio'
+
+Benchmark.bm(7) do |x|
+ x.report("Parser") do
+ 1000.times do
+ buf = PDF::Reader::Buffer.new(StringIO.new("1 q Q"))
+ PDF::Reader::Parser.new(buf).parse_token
+ end
+ end
+ x.report("NewParser") do
+ 1000.times do
+ parser = PDF::Reader::NewParser.new("1 q Q")
+ #parser.next_token
+ parser.all_tokens
+ end
+ end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.