In [1]:
import numpy as np
from itertools import chain

## Хеш-таблица, использующая метод цепочек

In [2]:
class Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

class Link_list:
    def __init__(self, node=None):
        self.head_node = node
        
    def __iter__(self):
        item = self.head_node
        while item is not None:
            yield item
            item = item.next
            
    def add(self, node):
        if self.head_node is None:
            self.head_node = node
            return
        last_node = self.head_node
        while last_node.next:
            last_node = last_node.next
        last_node.next = node
    
    
    def search(self, key):
        if self.head_node is None:
            return
        next_node = self.head_node
        if next_node.key == key:
            return next_node
        while next_node.next:
            next_node = next_node.next
            if next_node.key == key:
                return next_node
            
    def remove(self, node_key):
        next_node = self.head_node
        if next_node.key == node_key:
            self.head_node = next_node.next
            return
        while next_node:
            if next_node.next.key == node_key:
                next_node.next = next_node.next.next
                return
            next_node = next_node.next

class HashTable:
    
    def __init__(self, size=10, hash_fun=None):
        self.size = size
        self.data = [None] * self.size
        if hash_fun:
            self.hash = hash_fun
        else:
            self.hash = self.my_hash
        
    def my_hash(self, string):
        summ = 0
        string = str(string)
        for item in range(len(string)):
            summ = summ + ord(string[item]) * (item + 1)
        return summ % self.size
        
    def put(self, key, value):
        hash_value = self.hash(key)
        if not self.data[hash_value]:
            self.data[hash_value] = Link_list(Node(key, value))
        else:
            node = self.data[hash_value].search(key)
            if node:
                node.value = value
            else:
                self.data[hash_value].add(Node(key, value))
    
    def get(self, key):
        hash_value = self.hash(key)
        if self.data[hash_value]:
            node = self.data[hash_value].search(key)
            if node:
                return((node.key, node.value))
            
    def remove(self, key):
        hash_value = self.hash(key)
        if not self.data[hash_value]:
            return
        else:
            self.data[hash_value].remove(key)
            if self.data[hash_value].head_node == None:
                self.data[hash_value] = None
                
    def __repr__(self):
        return str([[it.key for it in item] for item in self.data if item])
    
    
    def __iter__(self):
        list_gen = []
        for item in self.data:
            if item:
                list_gen.append(item)
        for item in chain(*list_gen):
            yield (item.key, item.value)
#         for elem in [it for item in list_gen for it in item]:
#             yield (elem.key, elem.value)
    

In [3]:
len_table = 10
ht = HashTable(len_table)

In [4]:
s ='qwertyuiopa'
for i in range(10):
    ht.put( s[10-i], i)
ht.data

[None,
 <__main__.Link_list at 0x7f9937372e80>,
 <__main__.Link_list at 0x7f9937372f60>,
 None,
 <__main__.Link_list at 0x7f99373e22b0>,
 <__main__.Link_list at 0x7f9937372e48>,
 <__main__.Link_list at 0x7f9937372cf8>,
 <__main__.Link_list at 0x7f9937372dd8>,
 None,
 <__main__.Link_list at 0x7f99373e2518>]

In [5]:
ht

[['o', 'y', 'e'], ['p'], ['r'], ['i'], ['t'], ['a', 'u'], ['w']]

In [6]:
for i in ht:
    print(i)

('o', 2)
('y', 5)
('e', 8)
('p', 1)
('r', 7)
('i', 3)
('t', 6)
('a', 0)
('u', 4)
('w', 9)


In [7]:
ht.get('y')

('y', 5)

In [8]:
ht.remove('y')

In [9]:
ht.get('y')

## Хеш-таблица с открытой адресацией, "ленивым" удалением, двойным хешированием (вместо квадратичного пробинга)

In [51]:
class HashOpenTable:
    
    def __init__(self, size=10, hash_fun=None):
        self.size = size
        self.data = np.array(([[None, None, False]] * self.size), dtype='object')            
        if hash_fun:
            self.hash = hash_fun
        else:
            self.hash = self.my_hash
        
    def my_hash(self, string, i):
        summ = 0
        string = str(string)
        for item in range(len(string)):
            summ = summ + ord(string[item]) * (item + 1)
        hash_1 = summ % self.size   
        hash_2 = 1 + (summ % (self.size - 1))            
        return (hash_1 + i * hash_2) % self.size
    
    
    def put(self, key, value):
        count = 0
        i = 0       
        hash_value = self.hash(key, i)
        while True:
            if not self.data[hash_value][0]:
                self.data[hash_value][0] = key
                self.data[hash_value][1] = value
                if self.data[hash_value][2]:
                    seld.data[hash_value][2] = False
                break
            else:
                if self.data[hash_value][2]:
                    self.data[hash_value][0] = key
                    self.data[hash_value][1] = value
                    self.data[hash_value][2] = False
                    break
                else:
                    if self.data[hash_value][0] == key:
                        self.data[hash_value][1] = value
                        break
                    i += 1
                    hash_value = self.hash(key, i)
                    count += 1
                    if count >= self.size:
                        self.resize()
                        self.put(key, value)
                        break
                    
    def get(self, key):
        count = 0
        i = 0   
        temp_hash = None
        hash_value = self.hash(key, i)
        while self.data[hash_value][0] or self.data[hash_value][2]:
            if self.data[hash_value][2]:
                temp_hash = hash_value
            if self.data[hash_value][0] == key and not self.data[hash_value][2]:
                if temp_hash:
                    self.data[temp_hash] = self.data[hash_value]
                    self.data[hash_value][2] = True 
                return self.data[hash_value][1]
            count += 1
            if count >= self.size:
                return
            i += 1
            hash_value = self.hash(key, i)
    
    def remove(self, key):
        count = 0
        i = 0       
        hash_value = self.hash(key, i)
        while self.data[hash_value][0] or self.data[hash_value][2]:
            if self.data[hash_value][0] == key:
                self.data[hash_value][2] = True
            count += 1
            if count >= self.size:
                return
            i += 1
            hash_value = self.hash(key, i)
            
    
    def resize(self):
        temp = []
        for item in self.data:
            if not item[2] and item[0]:
                temp.append((item[0], item[1]))
        self.size = self.size * 2
        self.data = np.array(([[None, None, False]] * self.size), dtype='object') 
        for item in temp:
            self.put(item[0], item[1])
            
            
    def __iter__(self):
        for i in [(item[0],item[1])for item in self.data if item[0] and not item[2]]:
            yield i
    
    def __repr__(self):
        return str([(item[0],item[1]) for item in self.data if item[0] and not item[2]])
    
  
        

In [52]:
hot = HashOpenTable()

In [53]:
# вставка
s ='qwertyuiopa'
for i in range(10):
    hot.put( s[:10-i], i)
hot.data

array([['qwerty', 4, False],
       [None, None, False],
       [None, None, False],
       ['qwe', 7, False],
       [None, None, False],
       ['qwertyui', 2, False],
       [None, None, False],
       [None, None, False],
       [None, None, False],
       [None, None, False],
       ['qwert', 5, False],
       ['qw', 8, False],
       [None, None, False],
       ['q', 9, False],
       ['qwertyuio', 1, False],
       ['qwertyu', 3, False],
       ['qwertyuiop', 0, False],
       [None, None, False],
       [None, None, False],
       ['qwer', 6, False]], dtype=object)

In [23]:
# получение элемента
hot.get('qwerty')

4

In [24]:
# ленивое удаление элемента
hot.remove('qwerty')
hot.data

array([['qwerty', 4, True],
       [None, None, False],
       [None, None, False],
       ['qwe', 7, False],
       [None, None, False],
       ['qwertyui', 2, False],
       [None, None, False],
       [None, None, False],
       [None, None, False],
       [None, None, False],
       ['qwert', 5, False],
       ['qw', 8, False],
       [None, None, False],
       ['q', 9, False],
       ['qwertyuio', 1, False],
       ['qwertyu', 3, False],
       ['qwertyuiop', 0, False],
       [None, None, False],
       [None, None, False],
       ['qwer', 6, False]], dtype=object)

In [25]:
hot.get('qwerty')

In [26]:
# новая запись на место удаленного элемента
hot.put('dd', 77)
hot.data

array([['dd', 77, False],
       [None, None, False],
       [None, None, False],
       ['qwe', 7, False],
       [None, None, False],
       ['qwertyui', 2, False],
       [None, None, False],
       [None, None, False],
       [None, None, False],
       [None, None, False],
       ['qwert', 5, False],
       ['qw', 8, False],
       [None, None, False],
       ['q', 9, False],
       ['qwertyuio', 1, False],
       ['qwertyu', 3, False],
       ['qwertyuiop', 0, False],
       [None, None, False],
       [None, None, False],
       ['qwer', 6, False]], dtype=object)

In [27]:
for i in hot:
    print(i)

('dd', 77)
('qwe', 7)
('qwertyui', 2)
('qwert', 5)
('qw', 8)
('q', 9)
('qwertyuio', 1)
('qwertyu', 3)
('qwertyuiop', 0)
('qwer', 6)


In [28]:
hot

[('dd', 77), ('qwe', 7), ('qwertyui', 2), ('qwert', 5), ('qw', 8), ('q', 9), ('qwertyuio', 1), ('qwertyu', 3), ('qwertyuiop', 0), ('qwer', 6)]

In [54]:
# "всплытие" значения, на место удаленных.
hot = HashOpenTable()
s ='qwertyuiopa'
for i in range(10):
    hot.put( s[10-i], i)
hot.data

array([['e', 8, False],
       ['o', 2, False],
       ['p', 1, False],
       ['w', 9, False],
       ['t', 6, False],
       ['i', 3, False],
       ['y', 5, False],
       ['a', 0, False],
       ['u', 4, False],
       ['r', 7, False]], dtype=object)

In [55]:
s ='qwetyuiopa'
for i in range(9):
    hot.remove(s[1 + i])
hot.data

array([['e', 8, True],
       ['o', 2, True],
       ['p', 1, True],
       ['w', 9, True],
       ['t', 6, True],
       ['i', 3, True],
       ['y', 5, True],
       ['a', 0, True],
       ['u', 4, True],
       ['r', 7, False]], dtype=object)

In [56]:
# 'r' перемещается на место удаленной записи, ближе к началу пробинга
hot.get('r')
hot.data

array([['e', 8, True],
       ['o', 2, True],
       ['r', 7, False],
       ['w', 9, True],
       ['t', 6, True],
       ['i', 3, True],
       ['y', 5, True],
       ['a', 0, True],
       ['u', 4, True],
       ['r', 7, True]], dtype=object)

In [57]:
hot.get('r')
hot.data

array([['e', 8, True],
       ['o', 2, True],
       ['r', 7, True],
       ['w', 9, True],
       ['t', 6, True],
       ['r', 7, False],
       ['y', 5, True],
       ['a', 0, True],
       ['u', 4, True],
       ['r', 7, True]], dtype=object)

In [58]:
hot.get('r')
hot.data

array([['e', 8, True],
       ['o', 2, True],
       ['r', 7, True],
       ['w', 9, True],
       ['t', 6, True],
       ['r', 7, True],
       ['y', 5, True],
       ['a', 0, True],
       ['r', 7, False],
       ['r', 7, True]], dtype=object)

In [59]:
hot.get('r')
hot.data

array([['e', 8, True],
       ['r', 7, False],
       ['r', 7, True],
       ['w', 9, True],
       ['t', 6, True],
       ['r', 7, True],
       ['y', 5, True],
       ['a', 0, True],
       ['r', 7, True],
       ['r', 7, True]], dtype=object)

In [60]:
hot.get('r')
hot.data

array([['e', 8, True],
       ['r', 7, True],
       ['r', 7, True],
       ['w', 9, True],
       ['r', 7, False],
       ['r', 7, True],
       ['y', 5, True],
       ['a', 0, True],
       ['r', 7, True],
       ['r', 7, True]], dtype=object)

In [61]:
# ключ 'r' занял место, соответствующее  i = 0 в хешфункции.
hot.get('r')
hot.data

array([['e', 8, True],
       ['r', 7, True],
       ['r', 7, True],
       ['w', 9, True],
       ['r', 7, False],
       ['r', 7, True],
       ['y', 5, True],
       ['a', 0, True],
       ['r', 7, True],
       ['r', 7, True]], dtype=object)

In [64]:
i = 0
hot.hash('r', i)

4

In [65]:
hot.get('r')

7