Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

removed skiplist & chunked skiplist

  • Loading branch information...
commit ff4c04e1f3e2f65f3984ce9d75166278c1391118 1 parent 70eeaff
Oleg Andreev authored
View
3  lib/strokedb/data_structures.rb
@@ -1,4 +1 @@
require 'data_structures/simple_skiplist'
-require 'data_structures/skiplist'
-require 'data_structures/chunked_skiplist'
-require 'data_structures/point_query'
View
123 lib/strokedb/data_structures/chunked_skiplist.rb
@@ -1,123 +0,0 @@
-require File.expand_path(File.dirname(__FILE__) + '/../util/class_optimization')
-module StrokeDB
- # ChunkedSkiplist (CS) implements a distributed, concurrently accessible
- # skiplist using SimpleSkiplist (SL) as building blocks.
- # Each instance contains a single instance of SimpleSkiplist.
- # Higher-level CS store references to lower-level SL as SL "data".
- # Lowest-level CS contains actual data.
- #
- # Regular state of the chunks (square brackets denote references):
- #
- # ______ ___________________
- # / \ / \
- # HEAD -> C1[ C2, C3 ], C2[ C4, C5 ], C3[ C6, C7 ], C4[data], ...
- # => \__________________/
- #
- # Initial state is a single lowest-level chunk:
- #
- # HEAD -> C1[data]
- #
- # When higher-level node is inserted, new skiplist is created.
- # Old skiplist is moved to a new chunk, current chunk uppers its level.
- #
- # ASYNCHRONOUS CONCURRENT INSERT
- #
- # SKiplists, by their nature, allow you to concurrently insert and
- # delete nodes. However, very little number of nodes must be locked
- # during update. In our implementation, we lock a whole chunk if it is
- # modified. Higher-level chunks are modified rarely, so they are not
- # locked most of the time. Different chunks could be updated concurrently.
- # Read-only concurrent access is always possible no matter what nodes are
- # locked for modification.
- #
- # ChunkedSkiplist has an API for asynchronous data access useful for
- # cöoperative multitasking, but it is also thread-safe for preemtive
- # multitasking, which is kinda nice feature, but is not to be evaluated
- # in a real-world applications.
- #
- # Chunked #find
- #
- # Find may return an actual data or a reference to lower-level chunk.
- # It is a networking wrapper business to do interpret the result of #find.
- #
- # Insert is harder =) When new node level is higher than data chunk level
- # we have to insert into proxy chunk and create all the levels of proxy
- # chunks down to the data chunk. If node level is low, we just insert
- # node into appropriate data chunk.
- # The hard part about it are locking issues during insertion.
- #
- #
- class ChunkedSkiplist
- attr_accessor :lo_level, :hi_level, :probability, :container
-
- DEFAULT_MAXLEVEL = 7
- DEFAULT_PROBABILITY = 1/Math::E
-
- def initialize(lo_level = nil, hi_level = nil, probability = nil, container = nil)
- @lo_level = lo_level || 0
- @hi_level = hi_level || DEFAULT_MAXLEVEL
- @probability = probability || DEFAULT_PROBABILITY
- @container = container || SimpleSkiplist.new(nil,
- :maxlevel => @hi_level + 1, :probability => @probability)
- end
-
- # If chunk is not a lowest-level list, then it
- # contains references to other chunks. Hence, it is a "proxy".
- #
- def proxy?
- @lo_level > 0
- end
-
- # Insertion cases:
- #
- # |
- # [ levels 16..23 ] | |
- # [ levels 08..15 ] | | |
- # [ levels 00..07 ] | | | |
- # A B C D
- #
- # A - insert in a lower-level chunk
- # B - insert in a 08..15-levels chunk, create new 0..7-level chunk
- # C - insert in a 16..23-levels chunk, create new chunks of levels
- # 0..7 and 8..15.
- # D - create new 24..31-levels chunk with reference to previous head.
- #
- def insert(key, value, __level = nil)
- @container.insert(key, value, __level)
- end
-
- # Create new chunk, move local skiplist there,
- # create new skiplist here and insert
- def promote_level(key, level, size)
-
- end
-
- def generate_chain(key, value, size, start_level)
-
- end
-
- # Finds reference to another chunk (if proxy) or an actual data.
- #
- def find(key)
- proxy? ? @container.find_nearest(key) : @container.find(key)
- end
-
- # Generates random level of arbitrary size.
- # In other words, it actually contains an infinite loop.
- def random_level
- p = @probability
- l = 1
- l += 1 while rand < p
- return l
- end
- end
-end
-
-if __FILE__ == $0
- require File.expand_path(File.dirname(__FILE__) + '/../data_structures/simple_skiplist.rb')
- require 'benchmark'
-
-
-
-
-end
View
25 lib/strokedb/data_structures/point_query.rb
@@ -1,25 +0,0 @@
-module StrokeDB
- # PointQuery is used to perform navigation to a single multidimensinal point.
- # Initializer accepts a hash of slots. Slots may have such value types:
- # "string" scalar string value
- # 3.1415 (numeric) numeric value
- # :L lowest value
- # :H highest value
- #
- # Example:
- # PointQuery.new(:meta => 'Article',
- # :author => 'Oleg Andreev',
- # :date => :last)
- #
- class PointQuery
- attr_reader :slots
-
- def initialize(slots)
- @slots = {}
- slots.each do |k, v|
- k = k.meta_uuid if k.is_a?(Module) # quick hack, but PointQuery will be thrown away as soon as we'll have new search system
- @slots[k.to_optimized_raw] = v.to_optimized_raw
- end
- end
- end
-end
View
302 lib/strokedb/data_structures/skiplist.rb
@@ -1,302 +0,0 @@
-module StrokeDB
- class Skiplist
- include Enumerable
-
- attr_accessor :default, :head, :tail, :cut_level, :unique_keys
-
- def initialize(data = {}, default = nil, cut_level = nil, unique_keys = true)
- @default, @cut_level, @unique_keys = default, cut_level, unique_keys
-
- @head = HeadNode.new
- @tail = TailNode.new
- @head.forward[0] = @tail
- data.each{|k, v| insert(k, v) }
- end
-
- def insert(key, value, __cheaters_level = nil, __timestamp = nil)
- @size_cache = nil
- update = Array.new(@head.level)
- x = @head
- # We have to choose between < and <= only,
- # but we go into different branches to keep things fast.
- if @unique_keys
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- update[i-1] = x
- end
- else
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] <= key
- update[i-1] = x
- end
- end
- x = x.forward[0]
- if x.key == key && @unique_keys
- x.value = value
- x.timestamp = __timestamp
- value.skiplist_node_container = x if value.respond_to? :skiplist_node_container=
- else
- newlevel = __cheaters_level || random_level
- newlevel = 1 if empty?
- if newlevel > @head.level
- (@head.level + 1).upto(newlevel) do |i|
- update[i-1] = @head
- end
- end
-
- x = Node.new(newlevel, key, value, __timestamp)
- value.skiplist_node_container = x if value.respond_to? :skiplist_node_container=
-
- if cut?(newlevel, update[0])
- return new_chunks!(x, update)
- else
- newlevel.times do |i|
- x.forward[i] = update[i].forward[i] || @tail
- update[i].forward[i] = x
- end
- end
- end
- return self
- end
-
- # Finders
-
- def find_node(key = nil)
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- end
- x = x.forward[0]
- return (x.key && yield(x.key, key) ? x : nil) if block_given?
- return x if x.key == key
- nil
- end
-
- def find(key, default = nil)
- (i = find_node(key)) && i.value || default || @default
- end
-
- def find_nearest_node(key)
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- end
- x = x.forward[0] if (x.forward[0].key == key || x == @head)
- x
- end
-
- def find_nearest(key, default = nil)
- find_nearest_node(key).value || default || @default
- end
-
- def find_all_with_prefix(key)
- results = []
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- end
- x = x.forward[0]
- # got first
- while x.key && x.key[0, key.size] == key
- results << x.value
- x = x.forward[0]
- end
- results
- end
-
-
- def delete(key, default = nil)
- @size_cache = nil
- default ||= @default
- update = Array.new(@head.level)
- x = @head
- @head.level.downto(1) do |i|
- x = x.forward[i-1] while x.forward[i-1] < key
- update[i-1] = x
- end
- x = x.forward[0]
- if x.key == key
- @head.level.times do |i|
- break if update[i].forward[i] != x
- update[i].forward[i] = x.forward[i]
- end
- true while (y = @head.forward.pop) == @tail
- @head.forward.push(y || @tail)
- x.free(self)
- x.value
- else
- default
- end
- end
-
- def first_node
- @head.forward[0]
- end
-
- def size
- @size_cache ||= inject(0){|c,k| c + 1}
- end
-
- def empty?
- @head.forward[0] == @tail
- end
-
- # Returns a string representation of the Skiplist.
- def to_s
- "#<#{self.class.name} " +
- [@head.to_s, map{|node| node.to_s }, @tail.to_s].flatten.join(', ') +
- ">"
- end
- def to_s_levels
- "#<#{self.class.name}:levels " +
- [@head.to_s, map{|node| node.level.to_s }, @tail.to_s].flatten.join(', ') +
- ">"
- end
-
- def eql?(skiplist)
- zip(skiplist) {|a, b| return false unless a.key == b.key && a.value == b.value }
- true
- end
-
- def each
- n = @head.forward[0]
- until n.is_a?(TailNode)
- yield n
- n = n.forward[0]
- end
- end
-
- # Only for empty list!
- def raw_insert(data)
- n = @head
- sn = nil
- update = []
- data.each do |item|
- key, value, level, timestamp = yield(item)
- sn = Node.new(level, key, value, timestamp)
- level.times do |i|
- update[i] ||= @head
- update[i].forward[i] = sn
- sn.forward[i] = @tail
- update[i] = sn
- end
- end
- end
-
- private
-
- # 1/E is a fastest search value
- PROBABILITY = 1/Math::E
- MAX_LEVEL = 32
-
- def random_level
- l = 1
- l += 1 while rand < PROBABILITY && l < MAX_LEVEL
- return l
- end
-
- def cut?(l, prev)
- @cut_level && !empty? && l >= @cut_level && prev != @head
- end
-
- def new_chunks!(newnode, update)
- # Transposed picture:
- #
- # head level 8: - - - - - - - -
- # update.size 8: - - - - - - - -
- # ...
- # newnode.level 5: - - - - -
- # cut level 3: - - -
- # regular node: -
- # regular node: - -
- # ...
- # tail node: T T T T T T T T
- # refs: A B C D E F G H
- #
- # How to cut?
- #
- # 0) tail1 = TailNode.new; list2 = Skiplist.new
- # 1) newnode.{A, B, C, D, E} := update{A,B,C,D,E}.forward
- # 2) update.{all} := tail1 (for current chunk)
- # 3) list2.head.{A, B, C, D, E} = new_node.{A, B, C, D, E}
- # 4) tail1.next_list = list2
-
- list2 = Skiplist.new({}, @default, @cut_level)
- tail1 = TailNode.new
-
- newnode.level.times do |i|
- # add '|| @tail' because update[i] may be head of a lower level
- # without forward ref to tail.
- newnode.forward[i] = update[i].forward[i] || @tail
- list2.head.forward[i] = newnode
- end
- @head.level.times do |i|
- update[i].forward[i] = tail1
- end
- tail1.next_list = list2
- # return the current chunk and the next chunk
- return self, list2
- end
-
- class Node
- attr_accessor :key, :value, :forward, :timestamp
- attr_accessor :_serialized_index
- def initialize(level, key, value,timestamp=nil)
- @key, @value, @timestamp = key, value, timestamp
- @forward = Array.new(level)
- end
- # this is called when node is thrown out of the list
- # note, that node.value is called immediately after node.free
- def free(list)
- # do nothing
- end
- def level
- @forward.size
- end
- def <(key)
- @key < key
- end
- def <=(key)
- @key <= key
- end
- def next
- forward[0]
- end
- def to_s
- "[#{level}]#{@key}: #{@value}"
- end
- end
-
- class HeadNode < Node
- def initialize
- super 1, nil, nil
- end
- def <(key)
- true
- end
- def <=(key)
- true
- end
- def to_s
- "head(#{level})"
- end
- end
-
- # also proxy-to-next-chunk node
- class TailNode < Node
- attr_accessor :next_list
- def initialize
- super 1, nil, nil
- end
- def <(key)
- false
- end
- def <=(key)
- false
- end
- def to_s
- "tail(#{level})"
- end
- end
- end
-end
View
144 spec/lib/strokedb/data_structures/chunked_skiplist_spec.rb
@@ -1,144 +0,0 @@
-require File.dirname(__FILE__) + '/spec_helper'
-
-describe "Empty chunked skiplist" do
- before(:each) do
- @cut_level = 4
- @list = Skiplist.new({}, nil, @cut_level)
- end
-
- it "should make first node level first-level only" do
- @list.head.level.should == 1
- @list.insert("k","v", @cut_level + 1)
- @list.head.level.should == 1
- @list.head.forward[0].level.should == 1
- end
-
-end
-
-
-describe "Chunked skiplist (variants)" do
-
- before(:each) do
- @cut_level = 16
- end
-
- it "should be valid with ascending level order" do
- @list = Skiplist.new({}, nil, @cut_level)
- (1..10).each do |i|
- @list.insert("K#{i*10}", "V", i)
- end
- @list.should have(10).items
- end
-
- it "should be valid with descending level order" do
- @list = Skiplist.new({}, nil, @cut_level)
- (1..10).each do |i|
- @list.insert("K#{i*10}", "V", 11 - i)
- end
- @list.should have(10).items
- end
-
- it "should be valid with fixed level order" do
- @list = Skiplist.new({}, nil, @cut_level)
- (1..10).each do |i|
- @list.insert("K#{i*10}", "V", 15)
- end
- @list.should have(10).items
- end
-
-end
-
-
-describe "Insertion into skiplist" do
-
- before(:each) do
- @cut_level = 4
- @list = Skiplist.new({}, nil, @cut_level)
- @levels_list = []
- (0..9).each do |i|
- @list.insert("K#{i*10}", "V", x = rand(@cut_level-1)+1)
- @levels_list << x
- end
- end
-
- it "should return [self, nil] if not cut" do
- a, b = @list.insert("K42", "L", @cut_level-1)
- a.should == @list
- b.should be_nil
- end
-
- it "should cut list by middle-entered value" do
- a, b = @list.insert("K42", "H", @cut_level)
- a.should == @list
- b.should be_a_kind_of(Skiplist)
- (0..4).each do |i|
- chunks_should_have_separate_values(a, b, "K#{i*10}", "V")
- end
- chunks_should_have_separate_values(b, a, "K42", "H")
- (5..9).each do |i|
- chunks_should_have_separate_values(b, a, "K#{i*10}", "V")
- end
- end
-
- def chunks_should_have_separate_values(a, b, a_key, a_value)
- a.find(a_key).should == a_value
- b.find(a_key).should == nil
- end
-end
-
-=begin
-random chunk writings, bad test. Come back to it in ChunkStore
-describe "Chunked skiplist process" do
-
- before(:all) do
- @cut_level = 4
- list = Skiplist.new({}, nil, @cut_level)
- @lists = {list => 1}
- n = ((1/Skiplist::PROBABILITY)**(@cut_level+2)).round
- srand 12345
- n.times do |i|
- a, b = @lists.keys[rand(@lists.size)].insert(rand(100_000).to_s, "V")
- @lists[a] = 1
- @lists[b] = 1 if b
- end
- @lists = @lists.keys
- end
-
- it "should produce several chunks after many insertions" do
- @lists.size.should > 1
- end
-
- # TODO: move to separate description with narrow assertions
- it "should keep all the nodes except the first one on a lower level in each chunk" do
- counted_first_node = false
- @lists.each do |list|
- cut_level = list.map{|node| node.level }.max
- counted_cut = false
- puts list.to_s_levels
- puts "------"
- if cut_level < @cut_level
- #puts list
- #puts "------- #{cut_level}"
- end
- if cut_level < @cut_level && !counted_first_node
- counted_first_node = true
- else
- #cut_level.should >= @cut_level
- end
-
- list.each do |node|
- unless counted_cut && node.level == cut_level
- counted_cut = true
- else
- if node.level >= @cut_level
- #puts list
- end
- #node.level.should < @cut_level
- end
- end
- end
- end
-
-end
-=end
-
View
253 spec/lib/strokedb/data_structures/skiplist_spec.rb
@@ -1,253 +0,0 @@
-require File.dirname(__FILE__) + '/spec_helper'
-
-describe "Non-empty skiplist" do
-
- before(:each) do
- @list = Skiplist.new("a" => "1",
- "aa" => "2",
- "aaa" => "3",
- "p" => "4",
- "123.1" => "v1",
- "123.2" => "v2",
- "123" => "v0")
- end
-
- it "should not be empty" do
- @list.should_not be_empty
- end
-
- it "should have size" do
- @list.should have(7).items
- end
-
- it "should find" do
- @list.find("a").should == "1"
- @list.find("aaa").should == "3"
- end
-
- it "should find entries with prefix" do
- @list.find_all_with_prefix("123").to_set.should == ["v1","v2","v0"].to_set
- end
-
- it "should find default value if search with prefix returns nothing" do
- @list.find_all_with_prefix("nothinglike123").should == []
- end
-
- it "should return default value if nothing found" do
- @list.find("404").should be_nil
- end
-
- it "should give local default value if nothing found" do
- @list.find("404", :default_value).should == :default_value
- end
-
- it "should insert data" do
- @list.insert("b", "3.5")
- @list.find("b").should == "3.5"
- end
-
- it "should replace data" do
- @list.insert("aaa", "3.5")
- @list.find("aaa").should == "3.5"
- end
-
- it "should delete node by key" do
- @list.delete("aaa").should == "3"
- @list.find("aaa").should be_nil
- end
-
- it "should not delete non-existent key" do
- @list.find("404").should be_nil
- @list.delete("404").should be_nil
- @list.find("404").should be_nil
- end
-
- it "should find the nearest key" do
- @list.find_nearest("0").should == "v0"
- @list.find_nearest("a").should == "1"
- @list.find_nearest("aa").should == "2"
- @list.find_nearest("aa0").should == "2"
- @list.find_nearest("aaa").should == "3"
- @list.find_nearest("ab").should == "3"
- @list.find_nearest("d").should == "3"
- @list.find_nearest("xxx").should == "4"
- end
-
-end
-
-
-describe "Skiplist with duplicate keys" do
-
- before(:all) do
- @list = Skiplist.new({}, nil, nil, false)
- @list.insert("a", "v1")
- @list.insert("a", "v2")
- @list.insert("a", "v3")
- end
-
- it "should find first value" do
- @list.find("a").should == 'v1'
- end
-
- it "should find node iterator" do
- @list.find_node("a").value.should == 'v1'
- @list.find_node("a").next.value.should == 'v2'
- @list.find_node("a").next.next.value.should == 'v3'
- end
-
-end
-
-describe "Skiplist (cut)" do
-
- before(:each) do
- @chunk = Skiplist.new({}, nil, 4)
- @chunk.insert('500', 'V', 2)
- end
-
- it "should find single value" do
- @chunk.find('500').should == 'V'
- @chunk.size.should == 1
- end
- [['low level',1],['cut level',1],['high level',6]].each do |t, l|
- it "should insert #{t} item into the start" do
- a, b = @chunk.insert('200', 'W', l)
- a.find('200').should == 'W'
- a.find('500').should == 'V'
- a.should == @chunk
- b.should be_nil
- @chunk.size.should == 2
- end
- end
-
- it "should cut when high level item inserted in the middle" do
- a, b = @chunk.insert('600', 'W', 6)
- a.find('500').should == 'V'
- a.find('600').should be_nil
- a.should == @chunk
- a.size.should == 1
- b.should be_kind_of(Skiplist)
- b.find('500').should be_nil
- b.find('600').should == 'W'
- b.size.should == 1
- end
-
- it "should cut when high level item inserted in the middle, but several hi-level items in the start" do
- a, b = @chunk.insert('300', 'X', 6)
- a, b = @chunk.insert('200', 'Y', 5)
- a, b = @chunk.insert('600', 'W', 6)
- a.find('500').should == 'V'
- a.find('300').should == 'X'
- a.find('200').should == 'Y'
- a.find('600').should be_nil
- a.should == @chunk
- a.size.should == 3
- b.should be_kind_of(Skiplist)
- b.find('500').should be_nil
- b.find('300').should be_nil
- b.find('200').should be_nil
- b.find('600').should == 'W'
- b.size.should == 1
- end
-
-
-end
-
-describe "Empty big skiplist" do
-
- before(:each) do
- @list = Skiplist.new
- end
-
- it "should be empty" do
- @list.should be_empty
- @list.should have(0).items
- end
-
- it "should be empty with #each iteratpr" do
- a = b = "each{ } did not yield"
- @list.each{|n| a = "each{ } did yield!" }
- a.should == b
- end
-
- it "should not find anything" do
- @list.find("a").should be_nil
- @list.find("").should be_nil
- @list.find("aaa").should be_nil
- @list.find(123).should be_nil
- @list.find(-1).should be_nil
- end
-
- it "should not delete anything" do
- @list.delete("a").should be_nil
- @list.delete("a").should be_nil
- @list.delete("").should be_nil
- @list.delete("aaa").should be_nil
- @list.delete(123).should be_nil
- @list.delete(-1).should be_nil
- end
-end
-
-
-
-describe "Non-empty big skiplist" do
-
- before(:each) do
- a = []
- 100.times { |i|
- a << "#{i}"
- a << "#{rand(100)}"
- }
- @list = Skiplist.new(Hash[*a])
- end
-
- it "should contain all the items" do
- #puts @list.to_s_levels
- end
-end
-
-
-
-describe "Skiplist search" do
- before(:each) do
- @times = 100
- @start = 128
- @ratio = 2
- @lists = [@start, @start*@ratio, @start*@ratio*@ratio].map do |len|
- list = Skiplist.new
- len.times do |i|
- list.insert(i, rand)
- end
- list
- end
- end
-
- it "should be O(log(n))" do
- t1 = time(@times, @lists[0])
- t2 = time(@times, @lists[1])
- t3 = time(@times, @lists[2])
-
- r1 = Math.log(t2/t1)
- r2 = Math.log(t3/t2)
-
- #p [t1, t2, t3]
- #p [r1, r2]
-
- # r1.should == r2
- end
-
- def time(n, list)
- GC.start
- t = Time.now
- s = list.size
- n.times { list.find(rand(s)) }
- Time.now - t
- ensure
- GC.start
- end
-end
-
-
-
-
-
-
Please sign in to comment.
Something went wrong with that request. Please try again.