-
Notifications
You must be signed in to change notification settings - Fork 81
/
Copy pathtwo_level_hash_map.cpp
135 lines (102 loc) · 3.54 KB
/
two_level_hash_map.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <iostream>
#include <iomanip>
#include <vector>
#include <unordered_map>
#include <sparsehash/dense_hash_map>
#include <sparsehash/sparse_hash_map>
//#define DBMS_HASH_MAP_DEBUG_RESIZES
#include <Common/Stopwatch.h>
#include <AggregateFunctions/UniquesHashSet.h>
#include <base/types.h>
#include <IO/ReadBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
#include <Common/HashTable/TwoLevelHashTable.h>
#include <Common/HashTable/HashMap.h>
using Key = UInt64;
using Value = UInt64;
int main(int argc, char ** argv)
{
if (argc < 2)
{
std::cerr << "Usage: program n\n";
return 1;
}
size_t n = std::stol(argv[1]);
std::vector<Key> data(n);
std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
{
Stopwatch watch;
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * n);
watch.stop();
std::cerr << std::fixed << std::setprecision(2)
<< "Vector. Size: " << n
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
}
{
Stopwatch watch;
std::cerr << sizeof(HashMapCell<Key, Value, DefaultHash<Key>>) << std::endl;
using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key>>, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
Map map;
Map::LookupResult it;
bool inserted;
for (size_t i = 0; i < n; ++i)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2)
<< "HashMap. Size: " << map.size()
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
size_t sum_counts = 0;
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.getMapped();
++elems;
}
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
}
{
Stopwatch watch;
using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key>>, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
//using Map = HashMap<Key, Value, UniquesHashSetDefaultHash>;
Map map;
Map::LookupResult it;
bool inserted;
for (size_t i = 0; i < n; ++i)
{
map.emplace(i, it, inserted);
if (inserted)
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2)
<< "HashMap. Size: " << map.size()
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
size_t sum_counts = 0;
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.getMapped();
++elems;
if (kv.getKey() > n)
std::cerr << kv.getKey() << std::endl;
}
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
if (sum_counts != n)
std::cerr << "Error!" << std::endl;
}
return 0;
}