Skip to content

Commit

Permalink
DataOriented: Improves the perfomance
Browse files Browse the repository at this point in the history
Pointers logic of nodes storing is replaced by the nodes indicies within
vector containing all nodes.

This change removes all the reference counting overhead and
theoretically improves caches utilization.

Both of these factors positively affect the perfomance.
  • Loading branch information
aserebryakov committed Jul 5, 2018
1 parent 8032921 commit 1542fd9
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 43 deletions.
2 changes: 1 addition & 1 deletion benches/basic_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
extern crate gtrie;
extern crate test;

use test::Bencher;
use std::collections::HashMap;
use test::Bencher;

fn generate_keys() -> Vec<String> {
let mut keys = Vec::new();
Expand Down
74 changes: 48 additions & 26 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,14 @@
//! ```

mod trie_node;
use std::rc::Rc;
use std::cell::RefCell;
use std::cmp::{Eq, Ord};
use std::clone::Clone;
use std::cmp::{Eq, Ord};
use trie_node::TrieNode;

/// Prefix tree object
pub struct Trie<T, U> {
/// Root of the prefix tree
root: Rc<RefCell<TrieNode<T>>>,
nodes: Vec<TrieNode<T>>,
values: Vec<U>,
}

Expand All @@ -86,7 +84,7 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// ```
pub fn new() -> Trie<T, U> {
Trie {
root: Rc::new(RefCell::new(TrieNode::new(None))),
nodes: Vec::<TrieNode<T>>::new(),
values: Vec::<U>::new(),
}
}
Expand All @@ -102,7 +100,7 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// assert_eq!(t.is_empty(), true);
/// ```
pub fn is_empty(&self) -> bool {
self.root.borrow().children.is_empty()
self.nodes.is_empty()
}

/// Adds a new key to the trie
Expand All @@ -119,15 +117,25 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// assert_eq!(t.is_empty(), false);
/// ```
pub fn insert<V: Iterator<Item = T>>(&mut self, key: V, value: U) {
self.values.push(value);
let mut node_id = 0usize;

if self.is_empty() {
node_id = self.create_new_node();
}

let mut node = self.root.clone();
for c in key {
let next_node = (*node).borrow_mut().insert(&c);
node = next_node;
if let Some(id) = self.nodes[node_id].find(&c) {
node_id = id;
} else {
let new_node_id = self.create_new_node();
self.nodes[node_id].insert(&c, new_node_id);
node_id = new_node_id;
}
}

(*node).borrow_mut().set_value(self.values.len() - 1);
// TODO: Check if node already contains value
self.values.push(value);
self.nodes[node_id].set_value(self.values.len() - 1);
}

/// Clears the trie
Expand All @@ -145,7 +153,8 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// assert_eq!(t.is_empty(), true);
/// ```
pub fn clear(&mut self) {
(*self.root).borrow_mut().children.clear();
self.nodes.clear();
self.values.clear();
}

/// Looks for the key in trie
Expand All @@ -166,9 +175,13 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// assert_eq!(t.contains_key(another_data), false);
/// ```
pub fn contains_key<V: Iterator<Item = T>>(&self, key: V) -> bool {
if self.values.is_empty() && self.nodes.is_empty() {
return false;
}

match self.find_node(key) {
Some(node) => {
if node.borrow().may_be_leaf() {
Some(node_id) => {
if self.nodes[node_id].may_be_leaf() {
true
} else {
false
Expand Down Expand Up @@ -197,7 +210,7 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
pub fn get_value<V: Iterator<Item = T>>(&self, key: V) -> Option<U> {
match self.find_node(key) {
// TODO: Properly handle the probable panic
Some(node) => Some(self.values[node.borrow().get_value().unwrap()].clone()),
Some(node_id) => Some(self.values[self.nodes[node_id].get_value().unwrap()].clone()),
None => None,
}
}
Expand All @@ -222,8 +235,9 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// ```
pub fn set_value<V: Iterator<Item = T>>(&mut self, key: V, value: U) -> Result<(), ()> {
match self.find_node(key) {
Some(node) => {
self.values[node.borrow().get_value().unwrap()] = value;
Some(node_id) => {
let value_id = self.nodes[node_id].get_value().unwrap();
self.values[value_id] = value;
Ok(())
}
None => Err(()),
Expand All @@ -233,20 +247,28 @@ impl<T: Eq + Ord + Clone, U: Clone> Trie<T, U> {
/// Finds the node in the trie by the key
///
/// Internal API
fn find_node<V: Iterator<Item = T>>(&self, key: V) -> Option<Rc<RefCell<TrieNode<T>>>> {
let mut node = self.root.clone();
fn find_node<V: Iterator<Item = T>>(&self, key: V) -> Option<usize> {
if self.nodes.is_empty() {
return None;
}

for c in key {
let mut _next_node = node.clone();
let mut node_id = 0usize;

match node.borrow().find(&c) {
Some(child) => _next_node = child,
for c in key {
match self.nodes[node_id].find(&c) {
Some(child_id) => node_id = child_id,
None => return None,
}

node = _next_node;
}

Some(node.clone())
Some(node_id)
}

/// Creates a new node and returns the node id
///
/// Internal API
fn create_new_node(&mut self) -> usize {
self.nodes.push(TrieNode::new(None));
self.nodes.len() - 1
}
}
23 changes: 7 additions & 16 deletions src/trie_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,42 +20,33 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

use std::rc::Rc;
use std::cell::RefCell;
use std::cmp::{Eq, Ord};
use std::clone::Clone;
use std::cmp::{Eq, Ord};

pub struct TrieNode<T> {
pub value: Option<usize>,
pub children: Vec<(T, Rc<RefCell<TrieNode<T>>>)>,
pub children: Vec<(T, usize)>,
}

impl<T: Eq + Ord + Clone> TrieNode<T> {
pub fn new(value: Option<usize>) -> TrieNode<T> {
TrieNode {
value,
children: Vec::<(T, Rc<RefCell<TrieNode<T>>>)>::new(),
children: Vec::<(T, usize)>::new(),
}
}

pub fn find(&self, key: &T) -> Option<Rc<RefCell<TrieNode<T>>>> {
pub fn find(&self, key: &T) -> Option<usize> {
if let Ok(idx) = self.children.binary_search_by(|x| x.0.cmp(key)) {
return Some(self.children[idx].1.clone());
}

None
}

pub fn insert(&mut self, key: &T) -> Rc<RefCell<TrieNode<T>>> {
match self.find(key) {
None => {
let new_node = Rc::new(RefCell::new(TrieNode::new(None)));
self.children.push((key.clone(), new_node.clone()));
self.children.sort_by(|a, b| a.0.cmp(&b.0));
new_node
}
Some(node) => node.clone(),
}
pub fn insert(&mut self, key: &T, child_id: usize) {
self.children.push((key.clone(), child_id));
self.children.sort_by(|a, b| a.0.cmp(&b.0));
}

pub fn set_value(&mut self, value: usize) {
Expand Down

0 comments on commit 1542fd9

Please sign in to comment.