Permalink
Browse files

removed inverted list

  • Loading branch information...
1 parent 99d9135 commit 0576eb30ac6fb72d4ef11674df74e9f83571a7d0 Oleg Andreev committed Jun 8, 2008
View
1 lib/strokedb/data_structures.rb
@@ -1,5 +1,4 @@
require 'data_structures/simple_skiplist'
require 'data_structures/skiplist'
require 'data_structures/chunked_skiplist'
-require 'data_structures/inverted_list'
require 'data_structures/point_query'
View
297 lib/strokedb/data_structures/inverted_list.rb
@@ -1,297 +0,0 @@
-module StrokeDB
- class InvertedList
- include Enumerable
-
- SEPARATOR = "\x01"
- TERMINATOR = "\x02"
-
- attr_accessor :default, :head, :tail, :cut_level
-
- def initialize(cut_level = nil)
- @cut_level = cut_level
- @head = HeadNode.new
- @tail = TailNode.new
- @head.forward[0] = @tail
- end
-
- def insert(slots, data, __cheaters_level = nil)
- slots.each do |key, value|
- value = value.to_s
- key = key.to_s
- prefix = value + SEPARATOR + key + TERMINATOR
- insert_attribute(prefix, data, __cheaters_level)
- end
- end
-
- def insert_attribute(key, value, __cheaters_level = nil)
- @size_cache = nil
- update = Array.new(@head.level)
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- update[i-1] = x
- end
- x = x.forward[0]
- if x.key == key
- x.values.push value
- else
- newlevel = __cheaters_level || random_level
- newlevel = 1 if empty?
- if newlevel > @head.level
- (@head.level + 1).upto(newlevel) do |i|
- update[i-1] = @head
- end
- end
-
- x = Node.new(newlevel, key, value)
-
- if cut?(newlevel, update[0])
- return new_chunks!(x, update)
- else
- newlevel.times do |i|
- x.forward[i] = update[i].forward[i] || @tail
- update[i].forward[i] = x
- end
- end
- end
- return self
- end
-
-
- def delete(slots, data)
- slots.each do |key, value|
- value = value.to_s
- key = key.to_s
- prefix = value + SEPARATOR + key + TERMINATOR
- delete_attribute(prefix, data)
- end
- end
-
- def delete_attribute(key, value)
- @size_cache = nil
- update = Array.new(@head.level)
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- update[i-1] = x
- end
- x = x.forward[0]
- if x.key == key
- x.values.delete value
- value
- else
- nil
- end
- end
-
-
- # Finders
-
- def find(*args)
- q = PointQuery.new(*args)
- total = Set.new
- first_pass = true
- q.slots.each do |key, value|
- results = []
- key = key.to_s
- value = value.to_s
- prefix = value + SEPARATOR + key + TERMINATOR
- node = find_node(prefix)
- results = node.values if node
- total = (first_pass ? results.to_set : (total & results))
- first_pass = false
- end
- total
- end
-
- def find_node(key)
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- end
- x = x.forward[0]
- return (x.key && yield(x.key, key) ? x : nil) if block_given?
- return x if x.key == key
- nil
- end
-
- def first_node
- @head.forward[0]
- end
-
- def size
- @size_cache ||= inject(0){|c,k| c + 1}
- end
-
- def empty?
- @head.forward[0] == @tail
- end
-
- # Returns a string representation of the Skiplist.
- def to_s
- "#<#{self.class.name} " +
- [@head.to_s, map{|node| node.to_s }, @tail.to_s].flatten.join(', ') +
- ">"
- end
- def to_s_levels
- "#<#{self.class.name}:levels " +
- [@head.to_s, map{|node| node.level.to_s }, @tail.to_s].flatten.join(', ') +
- ">"
- end
-
- def debug_dump
- s = ""
- each do |n|
- s << "#{n.key.inspect}: #{n.values.inspect}\n"
- end
- s
- end
-
- def each
- n = @head.forward[0]
- until TailNode === n
- yield n
- n = n.forward[0]
- end
- end
-
- private
-
- # 1/E is a fastest search value
- PROBABILITY = 1/Math::E
- MAX_LEVEL = 32
-
- def random_level
- l = 1
- l += 1 while rand < PROBABILITY && l < MAX_LEVEL
- return l
- end
-
- def cut?(l, prev)
- @cut_level && !empty? && l >= @cut_level && prev != @head
- end
-
- def new_chunks!(newnode, update)
- # Transposed picture:
- #
- # head level 8: - - - - - - - -
- # update.size 8: - - - - - - - -
- # ...
- # newnode.level 5: - - - - -
- # cut level 3: - - -
- # regular node: -
- # regular node: - -
- # ...
- # tail node: T T T T T T T T
- # refs: A B C D E F G H
- #
- # How to cut?
- #
- # 0) tail1 = TailNode.new; list2 = Skiplist.new
- # 1) newnode.{A, B, C, D, E} := update{A,B,C,D,E}.forward
- # 2) update.{all} := tail1 (for current chunk)
- # 3) list2.head.{A, B, C, D, E} = new_node.{A, B, C, D, E}
- # 4) tail1.next_list = list2
-
- list2 = Skiplist.new({}, @default, @cut_level)
- tail1 = TailNode.new
-
- newnode.level.times do |i|
- # add '|| @tail' because update[i] may be head of a lower level
- # without forward ref to tail.
- newnode.forward[i] = update[i].forward[i] || @tail
- list2.head.forward[i] = newnode
- end
- @head.level.times do |i|
- update[i].forward[i] = tail1
- end
- tail1.next_list = list2
- # return the current chunk and the next chunk
- return self, list2
- end
-
- class Node
- attr_accessor :key, :values, :forward
- attr_accessor :_serialized_index
- def initialize(level, key, value)
- @key, @values = key, [value]
- @forward = Array.new(level)
- end
- # this is called when node is thrown out of the list
- # note, that node.value is called immediately after node.free
- def free(list)
- # do nothing
- end
- def level
- @forward.size
- end
- def <(key)
- @key < key
- end
- def <=(key)
- @key <= key
- end
- def next
- forward[0]
- end
- def to_s
- "[#{level}]#{@key}: #{@values.inspect}"
- end
- end
-
- class HeadNode < Node
- def initialize
- super 1, nil, nil
- end
- def <(key)
- true
- end
- def <=(key)
- true
- end
- def to_s
- "head(#{level})"
- end
- end
-
- # also proxy-to-next-chunk node
- class TailNode < Node
- attr_accessor :next_list
- def initialize
- super 1, nil, nil
- end
- def <(key)
- false
- end
- def <=(key)
- false
- end
- def to_s
- "tail(#{level})"
- end
- end
-
- def debug(msg)
- if block_given?
- begin
- out = []
- out << "\n\n---- Start of #{msg} -----"
- yield(out)
- return
- rescue => e
- puts out.join("\n")
- puts "---- End of #{msg}: exception! -----"
- puts e
- puts e.backtrace.join("\n") rescue nil
- puts "----"
- raise e
- end
- else
- puts "IL DEBUG: #{msg}" if ENV['DEBUG']
- end
- end
- def debug_header
- puts "\n==========================================\n" if ENV['DEBUG']
- end
- end
-end
View
172 spec/lib/strokedb/data_structures/inverted_list_spec.rb
@@ -1,172 +0,0 @@
-require File.dirname(__FILE__) + '/spec_helper'
-
-describe InvertedList, " with flat string attributes" do
-
- before(:all) do
- @il = InvertedList.new
- @oleg_profile = new_doc('Profile', :name => 'Oleg',
- :email => 'oleganza')
- @yrashk_profile = new_doc('Profile', :name => 'Yurii',
- :email => 'yrashk')
- @article1 = new_doc('Article', :title => 'StrokeDB kicks ass',
- :author => ('@#' + @yrashk_profile[:uuid]))
- @article2 = new_doc('Article', :title => 'StrokeDB strikes back',
- :date => '28 Jan 2008',
- :author => ('@#' + @yrashk_profile[:uuid]))
- @post1 = new_doc('Post', :title => 'Hello',
- :date => '28 Jan 2008',
- :author => ('@#' + @yrashk_profile[:uuid]))
-
- insert_doc(@il, @oleg_profile)
- insert_doc(@il, @yrashk_profile)
- insert_doc(@il, @article1)
- insert_doc(@il, @article2)
- insert_doc(@il, @post1)
- end
-
- it "should find objects by a single attribute" do
- @il.find(:name => 'Oleg').should == [@oleg_profile[:uuid]].to_set
- @il.find(:email => 'yrashk').should == [@yrashk_profile[:uuid]].to_set
- @il.find(:meta => 'Article').should == [@article1[:uuid], @article2[:uuid]].to_set
- @il.find(:version => @article1[:slots][:version]).should == [@article1[:uuid]].to_set
- end
-
- it "should not find object by a not matched attribute" do
- @il.find(:name => 'Nobody').should == [ ].to_set
- @il.find(:meta => 'NoMeta').should == [ ].to_set
- @il.find(:version => 'no-version').should == [ ].to_set
- end
-
- it "should find objects by a pair of attributes" do
- @il.find(:date => '28 Jan 2008', :title => 'Hello').should == [@post1[:uuid]].to_set
- @il.find(:date => '28 Jan 2008', :meta => 'Article').should == [@article2[:uuid]].to_set
- @il.find(:date => '28 Jan 2008', :author => ('@#' + @yrashk_profile[:uuid])).should == [@post1[:uuid], @article2[:uuid]].to_set
- end
-
- it "should not find objects by a not matched pair of attributes" do
- @il.find(:date => '28 Jan 2008', :title => 'StrokeDB kicks ass').should == [ ].to_set
- @il.find(:date => '28 Jan 2008', :meta => 'Profile').should == [ ].to_set
- @il.find(:date => '28 Jan 2008', :author => ('@#' + @oleg_profile[:uuid])).should == [ ].to_set
- end
-
- it "should find objects by three attributes" do
- @il.find(:date => '28 Jan 2008',
- :author => ('@#' + @yrashk_profile[:uuid]),
- :meta => 'Article'
- ).should == [ @article2[:uuid] ].to_set
- end
-
- it "should not find objects by not matched three attributes" do
- @il.find(:date => '28 Jan 2008',
- :author => ('@#' + @oleg_profile[:uuid]),
- :meta => 'Article'
- ).should == [ ].to_set
- @il.find(:date => '42 Jan 2008',
- :author => ('@#' + @yrashk_profile[:uuid]),
- :meta => 'Article'
- ).should == [ ].to_set
- @il.find(:date => '28 Jan 2008',
- :author => ('@#' + @yrashk_profile[:uuid]),
- :meta => 'Profile'
- ).should == [ ].to_set
- end
-
- it "should delete doc from index" do
- @il.find(:name => 'Oleg').should == [@oleg_profile[:uuid]].to_set
- delete_doc(@il, @oleg_profile)
- @il.find(:name => 'Oleg').should == [ ].to_set
- @il.find(:email => 'yrashk').should == [@yrashk_profile[:uuid]].to_set
- delete_doc(@il, @yrashk_profile)
- @il.find(:email => 'yrashk').should == [ ].to_set
- end
-
-end
-
-describe InvertedList, " with numeric attributes" do
-
- before(:all) do
- @il = InvertedList.new
- @ps = []
- @ps << new_doc('Point', :x => 0, :y => 0) # 0
- @ps << new_doc('Point', :x => 10, :y => 50) # 1
- @ps << new_doc('Point', :x => 50, :y => 50) # 2
- @ps << new_doc('Point', :x => 200, :y => 10) # 3
- @ps << new_doc('Point', :x => 500, :y => 10) # 4
- @ps << new_doc('Point', :x => -500, :y => 10) # 5
- @ps << new_doc('Point', :x => -20, :y => 10) # 6
- @ps << new_doc('Point', :x => -2.1, :y => 10) # 7
- @ps << new_doc('Point', :x => 20.6, :y => 10) # 8
-
- @ps.each {|p| insert_doc(@il, p) }
- end
-
- it "should find by positive value" do
- @il.find(:x => 10).should == [@ps[1][:uuid]].to_set
- @il.find(:x => 50).should == [@ps[2][:uuid]].to_set
- @il.find(:x => 200).should == [@ps[3][:uuid]].to_set
- end
-
- it "should find by negative value" do
- @il.find(:x => -500).should == [@ps[5][:uuid]].to_set
- @il.find(:x => -20).should == [@ps[6][:uuid]].to_set
- end
-
- it "should find by zero value" do
- @il.find(:x => 0).should == [@ps[0][:uuid]].to_set
- end
-
- # Dangerous: 2.1 may suddenly appear as 2.0999999999996235 or like that
- it "should find by float value" do
- @il.find(:x => -2.1).should == [@ps[7][:uuid]].to_set
- @il.find(:x => 20.6).should == [@ps[8][:uuid]].to_set
- end
-
-end
-=begin
-describe InvertedList, " with multivalue slots" do
- before(:all) do
- @il = InvertedList.new
- @ab = new_doc(%w[A B])
- @a = new_doc(%w[A])
- @b = new_doc(%w[B])
- @c = new_doc(%w[C])
- insert_doc(@il, @ab)
- insert_doc(@il, @a)
- insert_doc(@il, @b)
- insert_doc(@il, @c)
- end
-
- it "should find multivalue objects by a single value" do
- @il.find(:meta => proc{|v| v.include? 'A' }).should == [@a[:uuid], @ab[:uuid]].to_set
- @il.find(:meta => proc{|v| v.include? 'B' }).should == [@b[:uuid], @ab[:uuid]].to_set
- end
-
- it "should not find by scalar value" do
- @il.find(:meta => 'A').should == [ ].to_set
- @il.find(:meta => 'B').should == [ ].to_set
- end
-
- it "should find multivalue objects with a complex predicate" do
- @il.find(:meta => proc{|v| v.include?('A') && !v.include?('B') }).should == [@a[:uuid]].to_set
- @il.find(:meta => proc{|v| v.include?('A') || v.include?('B') }).should ==
- [@a[:uuid], @ab[:uuid], @b[:uuid]].to_set
- @il.find(:meta => proc{|v| v.include?('A') && v.include?('B') }).should == [@ab[:uuid]].to_set
- end
-end
-=end
-
-
-def new_doc(meta, slots = {})
- slots[:meta] = meta
- slots[:version] = 'v1' + rand(10000000).to_s
- {:uuid => meta.to_s + '-' + rand(1000000).to_s, :slots => slots}
-end
-
-def insert_doc(il, doc)
- il.insert(doc[:slots], doc[:uuid])
-end
-
-def delete_doc(il, doc)
- il.delete(doc[:slots], doc[:uuid])
-end
-

0 comments on commit 0576eb3

Please sign in to comment.