Skip to content
Snippets Groups Projects
Commit 599f3034 authored by Petr Chmel's avatar Petr Chmel
Browse files

Publish cuckoo hash

parent dd14ff8d
No related branches found
No related tags found
No related merge requests found
test: cuckoo_hash_test
./$<
INCLUDE ?= .
CXXFLAGS=-std=c++23 -O2 -Wall -Wextra -g -Wno-sign-compare -Wno-array-bounds -I$(INCLUDE)
cuckoo_hash_test: cuckoo_hash_test.cpp test_main.cpp cuckoo_hash.h hash_functions.h random.h
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@
clean:
rm -f cuckoo_hash_test
.PHONY: clean test
#include <vector>
#include <cstdint>
#include <iostream>
const uint32_t UNUSED = 0xffffffff;
template<class Hash>
class CuckooTable {
/*
* Hash table with Cuckoo hashing.
*
* We have two hash functions, which map 32-bit keys to buckets of a common
* hash table. Unused buckets contain 0xffffffff.
*/
// The array of buckets
vector<uint32_t> table;
uint32_t num_buckets;
// Hash functions and the random generator used to create them
array<Hash,2> &hashes;
public:
CuckooTable(uint32_t num_buckets, array<Hash,2> &hashes) : num_buckets{num_buckets}, hashes{hashes}
{
// Initialize the table with the given number of buckets.
// The number of buckets is expected to stay constant.
table.resize(num_buckets, UNUSED);
}
const vector<uint32_t>& get_table() const {
return table;
}
bool lookup(uint32_t key) const {
// Check if the table contains the given key. Returns True or False.
uint32_t h0 = hashes[0].hash(key);
uint32_t h1 = hashes[1].hash(key);
return (table[h0] == key || table[h1] == key);
}
void insert(uint32_t key) {
// Insert a new key to the table. Assumes that the key is not present yet.
EXPECT(key != UNUSED, "Keys must differ from UNUSED.");
// TODO: Implement
}
uint32_t rehash(uint32_t key) {
// Relocate all items using new hash functions and insert a given key.
for (int i=0; i<2; i++)
hashes[i].regenerate();
// TODO: Implement
return key;
}
};
#include <functional>
#include <cstdlib>
#include <vector>
#include <string>
#include <array>
#include <iostream>
#include "hash_functions.h"
#include "cuckoo_hash.h"
template<class Hash>
void inspect_table(const CuckooTable<Hash> &cuckoo, const array<Hash,2> &hashes, uint32_t n, uint32_t table_size, uint32_t step) {
const vector<uint32_t> &table = cuckoo.get_table();
EXPECT(table.size() == table_size, "The size of table is given and it is expected not to be changed.");
for (uint32_t i = 0; i < n; i++) {
uint32_t k = step*i;
uint32_t h0 = hashes[0].hash(k), h1 = hashes[1].hash(k);;
EXPECT(table[h0] == k || table[h1] == k, "Item should be stored on one of two positions given by hash functions.");
EXPECT(h0 == h1 || table[h0] != k || table[h1] != k, "Item should be stored only on one position.");
}
for (uint32_t t = 0; t < table_size; t++) {
uint32_t k = table[t];
if (k != UNUSED) {
EXPECT(k % step == 0 && k < step * n, "Only inserted items should be stored.");
EXPECT(hashes[0].hash(k) == t || hashes[1].hash(k) == t, "Item should be stored on one of two positions given by hash functions.");
}
}
}
void simple_test(uint32_t n, uint32_t table_size_percentage) {
const uint32_t table_size = n * table_size_percentage / 100;
RandomGen random_gen(42);
array<TabulationHash,2> hashes{TabulationHash(table_size, random_gen), TabulationHash(table_size, random_gen)};
CuckooTable cuckoo(table_size, hashes);
for (uint32_t i=0; i < n; i++)
cuckoo.insert(37*i);
for (uint32_t i=0; i < n; i++) {
EXPECT(cuckoo.lookup(37*i), "Item not present in table, but it should be.");
EXPECT(!cuckoo.lookup(37*i+1), "Item present in table, even though it should not be.");
}
inspect_table(cuckoo, hashes, n, table_size, 37);
}
void multiple_test(uint32_t min_n, uint32_t max_n, uint32_t step_n, uint32_t table_size_percentage) {
for (uint32_t n = min_n; n < max_n; n += step_n) {
printf("\tn=%u\n", n);
simple_test(n, table_size_percentage);
}
}
void fixed_test() {
const uint32_t table_size = FixedHash::table_size;
array<FixedHash,2> hashes{FixedHash(0), FixedHash(1)};
CuckooTable cuckoo(table_size, hashes);
for (uint32_t k = 0; k < FixedHash::keys; k++) {
cuckoo.insert(k);
}
inspect_table(cuckoo, hashes, FixedHash::keys, table_size, 1);
}
/*** A list of all tests ***/
vector<pair<string, function<void()>>> tests = {
{ "small", [] { simple_test(100, 400); } },
{ "middle", [] { simple_test(31415, 300); } },
{ "big", [] { simple_test(1000000, 300); } },
{ "tight", [] { multiple_test(20000, 40000, 500, 205); } },
{ "fixed", fixed_test }
};
#include <cstdlib>
#include <string>
#include "random.h"
using namespace std;
// If the condition is not true, report an error and halt.
#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0)
void expect_failed(const string& message);
class TabulationHash {
/*
* Hash function for hashing by tabulation.
*
* The 32-bit key is split to four 8-bit parts. Each part indexes
* a separate table of 256 randomly generated values. Obtained values
* are XORed together.
*/
size_t num_buckets;
RandomGen &random_gen;
uint32_t tables[4][256];
public:
TabulationHash(size_t num_buckets, RandomGen &random_gen) : num_buckets(num_buckets), random_gen(random_gen) {
regenerate();
}
void regenerate() {
for (int i=0; i<4; i++)
for (int j=0; j<256; j++)
tables[i][j] = random_gen.next_u32();
}
uint32_t hash(uint32_t key) const {
uint32_t h0 = key & 0xff;
uint32_t h1 = (key >> 8) & 0xff;
uint32_t h2 = (key >> 16) & 0xff;
uint32_t h3 = (key >> 24) & 0xff;
return (tables[0][h0] ^ tables[1][h1] ^ tables[2][h2] ^ tables[3][h3]) % num_buckets;
}
};
class FixedHash {
public:
static constexpr uint32_t keys = 5, max_regenerations = 6, table_size = 16;
private:
static constexpr uint32_t hashes[max_regenerations][2][keys] {
{ // Two items hashed into the same bucket by both functions
{ 1, 7, 3, 7, 10 },
{ 2, 7, 4, 7, 11 }
},
{ // Three items stored in two positions
{ 1, 7, 3, 8, 7 },
{ 2, 8, 4, 7, 8 }
},
{ // Four items stored in three positions
{ 1, 7, 7, 8, 9 },
{ 2, 8, 9, 7, 8 }
},
{ // Five should be possible to store in five positions, but the cuckoo's insert operation may not find the proper locations
{ 1, 2, 3, 4, 5 },
{ 2, 3, 4, 5, 1 }
},
{ // Five should be possible to store in six positions, the timeout in the insert may not be sufficient
{ 1, 2, 3, 4, 5 },
{ 2, 3, 4, 5, 6 }
},
{ // This should be easy
{ 8, 7, 7, 8, 12 },
{ 11, 6, 7, 9, 0 }
}
};
size_t regenerations, id;
public:
FixedHash(size_t id) : regenerations{0}, id{id} {}
uint32_t hash(uint32_t key) const {
EXPECT(key < keys, "Invalid key");
return hashes[regenerations][id][key];
}
void regenerate() {
regenerations++;
EXPECT(regenerations < max_regenerations, "Too many rehashes");
}
};
#define DS1_RANDOM_H
#include <cstdint>
/*
* This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
* and Sebastiano Vigna, distributed under the CC-0 license. For more details,
* see http://vigna.di.unimi.it/xorshift/.
*
* Rewritten to C++ by Martin Mares, also placed under CC-0.
*/
class RandomGen {
uint64_t state[2];
uint64_t rotl(uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
public:
// Initialize the generator, set its seed and warm it up.
RandomGen(unsigned int seed)
{
state[0] = seed * 0xdeadbeef;
state[1] = seed ^ 0xc0de1234;
for (int i=0; i<100; i++)
next_u64();
}
// Generate a random 64-bit number.
uint64_t next_u64(void)
{
uint64_t s0 = state[0], s1 = state[1];
uint64_t result = s0 + s1;
s1 ^= s0;
state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
state[1] = rotl(s1, 36);
return result;
}
// Generate a random 32-bit number.
uint32_t next_u32(void)
{
return next_u64() >> 11;
}
// Generate a number between 0 and range-1.
unsigned int next_range(unsigned int range)
{
/*
* This is not perfectly uniform, unless the range is a power of two.
* However, for 64-bit random values and 32-bit ranges, the bias is
* insignificant.
*/
return next_u64() % range;
}
};
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using namespace std;
extern vector<pair<string, function<void()>>> tests;
void expect_failed(const string& message) {
cerr << "Test error: " << message << endl;
exit(1);
}
int main(int argc, char* argv[]) {
vector<string> required_tests;
if (argc > 1) {
required_tests.assign(argv + 1, argv + argc);
} else {
for (const auto& test : tests)
required_tests.push_back(test.first);
}
for (const auto& required_test : required_tests) {
bool found = false;
for (const auto& test : tests)
if (required_test == test.first) {
cerr << "Running test " << required_test << endl;
test.second();
found = true;
break;
}
if (!found) {
cerr << "Unknown test " << required_test << endl;
return 1;
}
}
return 0;
}
import math
class CuckooTable:
"""Hash table with Cuckoo hashing.
We have two hash functions, which map 32-bit keys to buckets of a common
hash table. Unused buckets contain None.
"""
def __init__(self, num_buckets, hashes):
"""Initialize the table with the given number of buckets.
The number of buckets is expected to stay constant."""
# The array of buckets
self.num_buckets = num_buckets
self.table = [None] * num_buckets
self.hashes = hashes
def get_table(self):
return self.table
def lookup(self, key):
"""Check if the table contains the given key. Returns True or False."""
b0 = self.hashes[0].hash(key)
b1 = self.hashes[1].hash(key)
# print("## Lookup key={} b0={} b1={}".format(key, b0, b1))
return self.table[b0] == key or self.table[b1] == key
def insert(self, key):
"""Insert a new key to the table. Assumes that the key is not present yet."""
# TODO: Implement
raise NotImplementedError
def rehash(self, key):
""" Relocate all items using new hash functions and insert a given key. """
# Obtain new hash functions
for i in range(2):
self.hashes[i].regenerate()
# TODO: Implement
#!/usr/bin/env python3
import sys
import random
from cuckoo_hash import CuckooTable
from hash_functions import TabulationHash, FixedHash
def inspect_table(cuckoo, hashes, n, table_size, step):
table = cuckoo.get_table()
assert len(table) == table_size, "The size of table is given and it is expected not to be changed."
for i in range(n):
k = step*i
h0 = hashes[0].hash(k)
h1 = hashes[1].hash(k)
assert table[h0] == k or table[h1] == k, "Item should be stored on one of two positions given by hash functions."
assert h0 == h1 or table[h0] != k or table[h1] != k, "Item should be stored only on one position."
for t in range(table_size):
k = table[t]
if k is not None:
assert k % step == 0 and k < step * n, "Only inserted items should be stored."
assert hashes[0].hash(k) == t or hashes[1].hash(k) == t, "Item should be stored on one of two positions given by hash functions."
def simple_test(n, table_size_percentage):
random.seed(42)
num_buckets = n*table_size_percentage//100
hashes = [TabulationHash(num_buckets), TabulationHash(num_buckets)]
table = CuckooTable(num_buckets, hashes)
# Insert an arithmetic progression
for i in range(n):
table.insert(37*i)
# Verify contents of the table
for i in range(n):
assert table.lookup(37*i), "Item not present in table, but it should be."
assert not table.lookup(37*i+1), "Item present in table, even though it should not be."
inspect_table(table, hashes, n, num_buckets, 37)
def multiple_test(min_n, max_n, step_n, table_size_percentage):
for n in range(min_n, max_n, step_n):
print("\tn={}".format(n))
simple_test(n, table_size_percentage)
def fixed_test():
table_size = FixedHash.table_size
hashes = [FixedHash(0), FixedHash(1) ]
cuckoo = CuckooTable(table_size, hashes)
for k in range(FixedHash.keys):
cuckoo.insert(k)
inspect_table(cuckoo, hashes, FixedHash.keys, table_size, 1)
# A list of all tests
tests = [
("small", lambda: simple_test(100, 400)),
("middle", lambda: simple_test(31415, 300)),
("big", lambda: simple_test(1000000, 300)),
("tight", lambda: multiple_test(20000, 40000, 500, 205)),
("fixed", fixed_test)
]
if __name__ == "__main__":
for required_test in sys.argv[1:] or [name for name, _ in tests]:
for name, test in tests:
if name == required_test:
print("Running test {}".format(name), file=sys.stderr)
test()
break
else:
raise ValueError("Unknown test {}".format(name))
import random
class TabulationHash:
"""Hash function for hashing by tabulation.
The 32-bit key is split to four 8-bit parts. Each part indexes
a separate table of 256 randomly generated values. Obtained values
are XORed together.
"""
def __init__(self, num_buckets):
self.tables = [None] * 4
self.num_buckets = num_buckets
self.regenerate()
def regenerate(self):
for i in range(4):
self.tables[i] = [random.randint(0, 0xffffffff) for _ in range(256)]
def hash(self, key):
h0 = key & 0xff
h1 = (key >> 8) & 0xff
h2 = (key >> 16) & 0xff
h3 = (key >> 24) & 0xff
t = self.tables
return (t[0][h0] ^ t[1][h1] ^ t[2][h2] ^ t[3][h3]) % self.num_buckets
class FixedHash:
keys = 5
max_regenerations = 6
table_size = 16
hashes = [
[ # Two items hashed into the same bucket by both functions
[ 1, 7, 3, 7, 10 ],
[ 2, 7, 4, 7, 11 ]
],
[ # Three items stored in two positions
[ 1, 7, 3, 8, 7 ],
[ 2, 8, 4, 7, 8 ]
],
[ # Four items stored in three positions
[ 1, 7, 7, 8, 9 ],
[ 2, 8, 9, 7, 8 ]
],
[ # Five should be possible to store in five positions, but the cuckoo's insert operation may not find the proper locations
[ 1, 2, 3, 4, 5 ],
[ 2, 3, 4, 5, 1 ]
],
[ # Five should be possible to store in six positions, the timeout in the insert may not be sufficient
[ 1, 2, 3, 4, 5 ],
[ 2, 3, 4, 5, 6 ]
],
[ # This should be easy
[ 8, 7, 7, 8, 12 ],
[ 11, 6, 7, 9, 0 ]
]
]
def __init__(self, id):
self.id = id
self.regenerations = 0
def regenerate(self):
self.regenerations += 1
assert self.regenerations < self.max_regenerations, "Too many rehashes"
def hash(self, key):
assert 0 <= key and key < self.keys, "Invalid key"
return self.hashes[self.regenerations][self.id][key]
Implement Cuckoo hash table with simple tabulation hashing.
You are given a skeleton code which defines the table, implements
`lookup()`, and provides hash functions. You have to add an `insert()`
method.
If too many elements are moved during a single insert, the table must
be rehashed with new hash functions. See lecture notes for the particular
bounds.
The size of the table should stay constant
throughout the existence of the data structure.
Source code templates can be found in [git](https://gitlab.kam.mff.cuni.cz/datovky/assignments/-/tree/master).
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment