Skip to content

Commit

Permalink
rough experiments in parsing speed with treetop
Browse files Browse the repository at this point in the history
  • Loading branch information
yob committed Jun 9, 2012
1 parent 0b64809 commit 3267168
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 4 deletions.
19 changes: 16 additions & 3 deletions lib/pdf/reader/new_parser.rb
Expand Up @@ -90,7 +90,7 @@ def to_ruby
class NewParser
Treetop.load(File.join(File.dirname(__FILE__), 'pdf.treetop'))

attr_reader :pos
attr_reader :pos, :count

def initialize(data)
@data = data
Expand All @@ -99,22 +99,35 @@ def initialize(data)
@parser.root = :content_stream
@pos = 0
@tokens = []
@count = 0
end

def next_token
100.times { prepare_tokens } if @tokens.size <= 3
100.times { prepare_tokens } if @tokens.size <= 3 && @pos < @data.bytesize
@tokens.shift
end

def all_tokens
100.times { prepare_tokens } if @tokens.size <= 3
#100.times { prepare_tokens } if @tokens.size <= 3 && @pos < @data.bytesize
@parser.consume_all_input = true
tree = @parser.parse(@data, index: @pos)
if tree
@tokens = tree.elements.select { |obj|
obj.respond_to?(:to_ruby)
}.map(&:to_ruby)
else
# If the AST is nil then there was an error during parsing
# we need to report a simple error message to help the user
raise Exception, "Parse error at offset: #{@parser.index}"
end
@tokens
end

private

def prepare_tokens
return if @pos >= @data.bytesize
@count += 1

token = @parser.parse(@data, index: @pos)
@pos = @parser.index
Expand Down
2 changes: 1 addition & 1 deletion lib/pdf/reader/pdf.treetop
Expand Up @@ -13,7 +13,7 @@ grammar Pdf
end

rule content_stream
(comment / base_object / operator / separator)
(comment / base_object / operator / separator)*
end

#---------------------------------------------
Expand Down
22 changes: 22 additions & 0 deletions tools/bench2.rb
@@ -0,0 +1,22 @@
# coding: utf-8


require 'pdf/reader'
require 'benchmark'
require 'stringio'

Benchmark.bm(7) do |x|
x.report("Parser") do
1000.times do
buf = PDF::Reader::Buffer.new(StringIO.new("1 q Q"))
PDF::Reader::Parser.new(buf).parse_token
end
end
x.report("NewParser") do
1000.times do
parser = PDF::Reader::NewParser.new("1 q Q")
#parser.next_token
parser.all_tokens
end
end
end

0 comments on commit 3267168

Please sign in to comment.