Skip to content

Commit

Permalink
hash_map: adding a rehash() method
Browse files Browse the repository at this point in the history
This allows a highly fragmented hash_map to have tombstones removed as
the values are all rehashed.

It would be nice to make this rehash() automatically, but that currently
presents a challenge where it doesn't work with adapted contexts since
the keys are not preserved in the map for re-hashing and the hash value
is not stored currently, and the non-adapted contexts require a bit of
additional book-keeping to check before calling rehash().
  • Loading branch information
mrjbq7 committed Nov 6, 2023
1 parent 1b0b46a commit 3c24425
Showing 1 changed file with 122 additions and 0 deletions.
122 changes: 122 additions & 0 deletions lib/std/hash_map.zig
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,11 @@ pub fn HashMap(
self.unmanaged = .{};
return result;
}

/// Rehash the map, in-place
pub fn rehash(self: *Self) void {
self.unmanaged.rehash(self.ctx);
}
};
}

Expand Down Expand Up @@ -1322,6 +1327,7 @@ pub fn HashMapUnmanaged(
if (@TypeOf(hash) != Hash) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type! " ++ @typeName(Hash) ++ " was expected, but found " ++ @typeName(@TypeOf(hash)));
}

const mask = self.capacity() - 1;
const fingerprint = Metadata.takeFingerprint(hash);
var limit = self.capacity();
Expand Down Expand Up @@ -1505,6 +1511,91 @@ pub fn HashMapUnmanaged(
return result;
}

/// Rehash the map, in-place
pub fn rehash(self: *Self, ctx: anytype) void {
const mask = self.capacity() - 1;

var metadata = self.metadata.?;
var keys_ptr = self.keys();
var values_ptr = self.values();
var curr: Size = 0;

while (curr < self.capacity()) {
if (!metadata[curr].isUsed()) {
if (!metadata[curr].isFree()) {
metadata[curr].fingerprint = Metadata.free;
assert(metadata[curr].isFree());
}

curr += 1;
continue;
}

var hash = ctx.hash(keys_ptr[curr]);
var fingerprint = Metadata.takeFingerprint(hash);
var idx = @as(usize, @truncate(hash & mask));

while (idx < curr and metadata[idx].isUsed()) {
idx += 1;
}

if (idx < curr) {
assert(!metadata[idx].isUsed());
metadata[idx].fingerprint = fingerprint;
metadata[idx].used = 1;
keys_ptr[idx] = keys_ptr[curr];
values_ptr[idx] = values_ptr[curr];

metadata[curr].fingerprint = Metadata.free;
metadata[curr].used = 0;
keys_ptr[curr] = undefined;
values_ptr[curr] = undefined;

curr += 1;
} else if (idx == curr) {
if (metadata[idx].fingerprint == Metadata.free) {
metadata[idx].fingerprint = fingerprint;
}

curr += 1;
} else {
while (metadata[idx].isUsed() and (idx <= curr or metadata[idx].fingerprint == Metadata.free)) {
idx = (idx + 1) & mask;
}
assert(idx != curr);

if (idx > curr and metadata[idx].isUsed()) {
var tmpfingerprint = metadata[idx].fingerprint;
var tmpkey = keys_ptr[idx];
var tmpvalue = values_ptr[idx];

metadata[idx].fingerprint = Metadata.free;
keys_ptr[idx] = keys_ptr[curr];
values_ptr[idx] = values_ptr[curr];

metadata[curr].fingerprint = tmpfingerprint;
keys_ptr[curr] = tmpkey;
values_ptr[curr] = tmpvalue;
} else {
assert(!metadata[idx].isUsed());
metadata[idx].fingerprint = fingerprint;
metadata[idx].used = 1;
keys_ptr[idx] = keys_ptr[curr];
values_ptr[idx] = values_ptr[curr];

metadata[curr].fingerprint = Metadata.free;
metadata[curr].used = 0;
keys_ptr[curr] = undefined;
values_ptr[curr] = undefined;

curr += 1;
}
}
}

self.available = @as(u32, @truncate((self.capacity() * max_load_percentage) / 100)) - self.size;
}

fn grow(self: *Self, allocator: Allocator, new_capacity: Size, ctx: Context) Allocator.Error!void {
@setCold(true);
const new_cap = @max(new_capacity, minimal_capacity);
Expand Down Expand Up @@ -2218,3 +2309,34 @@ test "std.hash_map repeat fetchRemove" {
try testing.expect(map.get(2) != null);
try testing.expect(map.get(3) != null);
}

test "std.hash_map rehash" {
var map = AutoHashMap(u32, u32).init(std.testing.allocator);
defer map.deinit();

var prng = std.rand.DefaultPrng.init(0);
const random = prng.random();

const count = 6 * random.intRangeLessThan(u32, 10_000, 100_000);

var i: u32 = 0;
while (i < count) : (i += 1) {
try map.put(i, i);
if (i % 3 == 0) {
try expectEqual(map.remove(i), true);
}
}

map.rehash();

try expectEqual(map.count(), count * 2 / 3);

i = 0;
while (i < count) : (i += 1) {
if (i % 3 == 0) {
try expectEqual(map.get(i), null);
} else {
try expectEqual(map.get(i).?, i);
}
}
}

0 comments on commit 3c24425

Please sign in to comment.