Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Added new profiling scripts

  • Loading branch information...
commit aac827afcafc8db378e2cc4993e9c84a6518e1d9 1 parent c5c1fe8
@alexdowad alexdowad authored
Showing with 82 additions and 15 deletions.
  1. +62 −15 tools/bench.rb
  2. +20 −0 tools/profile.rb
View
77 tools/bench.rb
@@ -1,23 +1,70 @@
# coding: utf-8
-# a quick script for profiling performance with perftools.
-#
-# USAGE
-#
-# ruby tools/bench.rb
-# evince bench.pdf
-
-$:.unshift "../lib"
-require 'pdf-reader'
-require 'perftools'
-
-PerfTools::CpuProfiler.start("/tmp/restart_profile") do
- PDF::Reader.open("restart.pdf") do |reader|
+# a script for measuring text extraction performance
+
+# TO BENCHMARK: ruby tools/bench.rb <runs>
+# TO PROFILE: ruby tools/bench.rb perftools
+# OR: ruby-prof tools/bench.rb <runs>
+# FOR OBJECT ALLOCATION STATS: ruby tools/bench.rb memprof
+# TO COUNT GC RUNS: ruby tools/bench.rb gc
+
+$project_root = File.expand_path(File.join(File.dirname(__FILE__), ".."))
+require 'rubygems' # for Ruby 1.8
+$:.unshift "#{$project_root}/lib"
+require 'pdf/reader'
+
+# Extract all the text from a large PDF
+
+def extract_text
+ PDF::Reader.open("#{$project_root}/spec/data/no_text_spaces.pdf") do |reader|
reader.pages.each do |page|
page.text
end
end
end
-`pprof.rb --text /tmp/restart_profile > bench.txt`
-`pprof.rb --pdf /tmp/restart_profile > bench.pdf`
+case ARGV[0]
+when "memprof"
+ # Measure object allocation with memprof
+ require 'memprof'
+ GC.disable
+ Memprof.track { extract_text }
+
+when "perftools"
+ # Profile with perftools.rb
+ # (The best thing about perftools.rb is that it shows you time spent on
+ # garbage collection)
+ require 'perftools'
+ PerfTools::CpuProfiler.start("/tmp/perftools_data") do
+ extract_text
+ end
+ `pprof.rb --text /tmp/perftools_data > #{$project_root}/tools/profiles/perftools.txt`
+ `pprof.rb --pdf /tmp/perftools_data > #{$project_root}/tools/profiles/perftools.pdf`
+
+when "gc"
+ before = GC.count
+ extract_text
+ puts "GC ran #{GC.count - before} times"
+
+else
+ # Benchmark
+ # Average the results over multiple runs
+ # Throw out the best and worst results, and average what remains
+ # With 10 runs, the results seem to fluctuate by as much as 6-7%
+ # I'd like that to be 1-2%, but that requires a VERY high number of runs
+
+ runs = (ARGV[0] || 10).to_i
+ times = []
+
+ runs.times do
+ start = Time.new
+ extract_text
+ times << (Time.new - start)
+ sleep(0.1) # results seem more consistent this way
+ end
+
+ times.sort!
+ times = times.drop(runs / 5).take(runs - (runs * 2 / 3))
+ average = times.reduce(0,&:+).to_f / times.size
+ puts "#{"%0.3f" % average} seconds"
+end
View
20 tools/profile.rb
@@ -0,0 +1,20 @@
+# Driver to run a bunch of profiling scripts in parallel,
+# leaving all the results in tools/profiles
+# Assumes "ruby" is Ruby 1.9, and "ruby1.8" is Ruby 1.8.7
+# Also assumes that all needed gems are installed
+# This script itself should be run under Ruby 1.9
+
+require 'fileutils'
+
+project_root = File.expand_path(File.join(File.dirname(__FILE__), ".."))
+dir = "#{project_root}/tools/profiles"
+FileUtils.mkdir(dir) unless File.exist?(dir)
+
+pids = []
+pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof.txt` }
+pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof-graph.htm --printer=graph_html` }
+pids << fork { `ruby-prof #{project_root}/tools/bench.rb 1 --file=#{dir}/rubyprof-stack.htm --printer=call_stack` }
+pids << fork { `ruby1.8 #{project_root}/tools/bench.rb memprof > #{dir}/memprof.txt` }
+pids << fork { `ruby #{project_root}/tools/bench.rb perftools` }
+
+pids.each { |pid| Process.wait(pid) }
Please sign in to comment.
Something went wrong with that request. Please try again.