Skip to content

Commit

Permalink
Experiment with different hostname-to-names algorithms
Browse files Browse the repository at this point in the history
➜  publicsuffix-ruby git:(thesis-hash) ✗ ruby benchmarks/bm_parts.rb
Warming up --------------------------------------
          tokenizer1    26.384k i/100ms
          tokenizer2    26.571k i/100ms
          tokenizer3    32.293k i/100ms
          tokenizer4    27.595k i/100ms
Calculating -------------------------------------
          tokenizer1    310.488k (± 6.6%) i/s -      1.557M in 5.035961s
          tokenizer2    308.801k (± 8.3%) i/s -      1.541M in 5.027643s
          tokenizer3    378.716k (± 5.3%) i/s -      1.905M in 5.045422s
          tokenizer4    305.493k (± 9.6%) i/s -      1.518M in 5.018550s

Comparison:
          tokenizer3:   378716.5 i/s
          tokenizer1:   310488.3 i/s - 1.22x  slower
          tokenizer2:   308800.6 i/s - 1.23x  slower
          tokenizer4:   305493.5 i/s - 1.24x  slower
  • Loading branch information
weppos committed Jan 24, 2017
1 parent 6f99ea0 commit f88e617
Showing 1 changed file with 91 additions and 0 deletions.
91 changes: 91 additions & 0 deletions benchmarks/bm_names.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
require 'benchmark/ips'

STRING = "www.subdomain.example.com"
ARRAY = %w(
com
example.com
subdomain.example.com
www.subdomain.example.com
)

def tokenizer1(string)
parts = string.split(".").reverse!
index = 0
query = parts[index]
names = []

loop do
names << query

index += 1
break if index >= parts.size
query = parts[index] + "." + query
end
names
end

def tokenizer2(string)
parts = string.split(".")
index = parts.size - 1
query = parts[index]
names = []

loop do
names << query

index -= 1
break if index < 0
query = parts[index] + "." + query
end
names
end

def tokenizer3(string)
isx = string.size
idx = string.size - 1
names = []

loop do
isx = string.rindex(".", isx - 1) || -1
names << string[isx + 1, idx - isx]

break if isx <= 0
end
names
end

def tokenizer4(string)
isx = string.size
idx = string.size - 1
names = []

loop do
isx = string.rindex(".", isx - 1) || -1
names << string[(isx+1)..idx]

break if isx <= 0
end
names
end

(x = tokenizer1(STRING)) == ARRAY or fail("tokenizer1 failed: #{x.inspect}")
(x = tokenizer2(STRING)) == ARRAY or fail("tokenizer2 failed: #{x.inspect}")
(x = tokenizer3(STRING)) == ARRAY or fail("tokenizer3 failed: #{x.inspect}")
(x = tokenizer4(STRING)) == ARRAY or fail("tokenizer4 failed: #{x.inspect}")

Benchmark.ips do |x|
x.report("tokenizer1") do
tokenizer1(STRING).is_a?(Array)
end
x.report("tokenizer2") do
tokenizer2(STRING).is_a?(Array)
end
x.report("tokenizer3") do
tokenizer3(STRING).is_a?(Array)
end
x.report("tokenizer4") do
tokenizer4(STRING).is_a?(Array)
end

x.compare!
end

0 comments on commit f88e617

Please sign in to comment.