Commit 925841cc authored by David Mareček's avatar David Mareček
Browse files

cuckoo hashing

parent 95a3a136
test: cuckoo_hash_test
./$<
INCLUDE ?= .
CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
cuckoo_hash_test: cuckoo_hash_test.cpp cuckoo_hash.h test_main.cpp $(INCLUDE)/random.h
$(CXX) $(CXXFLAGS) $^ -o $@
clean:
rm -f cuckoo_hash_test
.PHONY: clean test
#include <string>
#include <vector>
#include <cstdint>
#include <iostream>
#include "random.h"
using namespace std;
// If the condition is not true, report an error and halt.
#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0)
void expect_failed(const string& message);
class TabulationHash {
/*
* Hash function for hashing by tabulation.
*
* The 32-bit key is split to four 8-bit parts. Each part indexes
* a separate table of 256 randomly generated values. Obtained values
* are XORed together.
*/
unsigned num_buckets;
uint32_t tables[4][256];
public:
TabulationHash(unsigned num_buckets, RandomGen *random_gen)
{
this->num_buckets = num_buckets;
for (int i=0; i<4; i++)
for (int j=0; j<256; j++)
tables[i][j] = random_gen->next_u32();
}
uint32_t hash(uint32_t key)
{
unsigned h0 = key & 0xff;
unsigned h1 = (key >> 8) & 0xff;
unsigned h2 = (key >> 16) & 0xff;
unsigned h3 = (key >> 24) & 0xff;
return (tables[0][h0] ^ tables[1][h1] ^ tables[2][h2] ^ tables[3][h3]) % num_buckets;
}
};
class CuckooTable {
/*
* Hash table with Cuckoo hashing.
*
* We have two hash functions, which map 32-bit keys to buckets of a common
* hash table. Unused buckets contain 0xffffffff.
*/
const uint32_t UNUSED = 0xffffffff;
// The array of buckets
vector<uint32_t> table;
unsigned num_buckets;
// Hash functions and the random generator used to create them
TabulationHash *hashes[2];
RandomGen *random_gen;
public:
CuckooTable(unsigned num_buckets)
{
// Initialize the table with the given number of buckets.
// The number of buckets is expected to stay constant.
this->num_buckets = num_buckets;
table.resize(num_buckets, UNUSED);
// Obtain two fresh hash functions.
random_gen = new RandomGen(42);
for (int i=0; i<2; i++)
hashes[i] = new TabulationHash(num_buckets, random_gen);
}
~CuckooTable()
{
for (int i=0; i<2; i++)
delete hashes[i];
delete random_gen;
}
bool lookup(uint32_t key)
{
// Check if the table contains the given key. Returns True or False.
unsigned h0 = hashes[0]->hash(key);
unsigned h1 = hashes[1]->hash(key);
return (table[h0] == key || table[h1] == key);
}
void insert(uint32_t key)
{
// Insert a new key to the table. Assumes that the key is not present yet.
EXPECT(key != UNUSED, "Keys must differ from UNUSED.");
// TODO: Implement
}
};
#include <functional>
#include <cstdlib>
#include <vector>
#include "cuckoo_hash.h"
void simple_test(unsigned n, unsigned table_size_percentage)
{
CuckooTable table(n * table_size_percentage / 100);
for (unsigned i=0; i < n; i++)
table.insert(37*i);
for (unsigned i=0; i < n; i++) {
EXPECT(table.lookup(37*i), "Item not present in table, but it should be.");
EXPECT(!table.lookup(37*i+1), "Item present in table, even though it should not be.");
}
}
void multiple_test(unsigned min_n, unsigned max_n, unsigned step_n, unsigned table_size_percentage)
{
for (unsigned n=min_n; n < max_n; n += step_n) {
printf("\tn=%u\n", n);
simple_test(n, table_size_percentage);
}
}
/*** A list of all tests ***/
vector<pair<string, function<void()>>> tests = {
{ "small", [] { simple_test(100, 400); } },
{ "middle", [] { simple_test(31415, 300); } },
{ "big", [] { simple_test(1000000, 300); } },
{ "tight", [] { multiple_test(20000, 40000, 500, 205); } },
};
#define DS1_RANDOM_H
#include <cstdint>
/*
* This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
* and Sebastiano Vigna, distributed under the CC-0 license. For more details,
* see http://vigna.di.unimi.it/xorshift/.
*
* Rewritten to C++ by Martin Mares, also placed under CC-0.
*/
class RandomGen {
uint64_t state[2];
uint64_t rotl(uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
public:
// Initialize the generator, set its seed and warm it up.
RandomGen(unsigned int seed)
{
state[0] = seed * 0xdeadbeef;
state[1] = seed ^ 0xc0de1234;
for (int i=0; i<100; i++)
next_u64();
}
// Generate a random 64-bit number.
uint64_t next_u64(void)
{
uint64_t s0 = state[0], s1 = state[1];
uint64_t result = s0 + s1;
s1 ^= s0;
state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
state[1] = rotl(s1, 36);
return result;
}
// Generate a random 32-bit number.
uint32_t next_u32(void)
{
return next_u64() >> 11;
}
// Generate a number between 0 and range-1.
unsigned int next_range(unsigned int range)
{
/*
* This is not perfectly uniform, unless the range is a power of two.
* However, for 64-bit random values and 32-bit ranges, the bias is
* insignificant.
*/
return next_u64() % range;
}
};
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using namespace std;
extern vector<pair<string, function<void()>>> tests;
void expect_failed(const string& message) {
cerr << "Test error: " << message << endl;
exit(1);
}
int main(int argc, char* argv[]) {
vector<string> required_tests;
if (argc > 1) {
required_tests.assign(argv + 1, argv + argc);
} else {
for (const auto& test : tests)
required_tests.push_back(test.first);
}
for (const auto& required_test : required_tests) {
bool found = false;
for (const auto& test : tests)
if (required_test == test.first) {
cerr << "Running test " << required_test << endl;
test.second();
found = true;
break;
}
if (!found) {
cerr << "Unknown test " << required_test << endl;
return 1;
}
}
return 0;
}
import random
import math
class TabulationHash:
"""Hash function for hashing by tabulation.
The 32-bit key is split to four 8-bit parts. Each part indexes
a separate table of 256 randomly generated values. Obtained values
are XORed together.
"""
def __init__(self, num_buckets):
self.tables = [None] * 4
for i in range(4):
self.tables[i] = [random.randint(0, 0xffffffff) for _ in range(256)]
self.num_buckets = num_buckets
def hash(self, key):
h0 = key & 0xff
h1 = (key >> 8) & 0xff
h2 = (key >> 16) & 0xff
h3 = (key >> 24) & 0xff
t = self.tables
return (t[0][h0] ^ t[1][h1] ^ t[2][h2] ^ t[3][h3]) % self.num_buckets
class CuckooTable:
"""Hash table with Cuckoo hashing.
We have two hash functions, which map 32-bit keys to buckets of a common
hash table. Unused buckets contain None.
"""
def __init__(self, num_buckets):
"""Initialize the table with the given number of buckets.
The number of buckets is expected to stay constant."""
# The array of buckets
self.num_buckets = num_buckets
self.table = [None] * num_buckets
# Create two fresh hash functions
self.hashes = [TabulationHash(num_buckets), TabulationHash(num_buckets)]
def lookup(self, key):
"""Check if the table contains the given key. Returns True or False."""
b0 = self.hashes[0].hash(key)
b1 = self.hashes[1].hash(key)
# print("## Lookup key={} b0={} b1={}".format(key, b0, b1))
return self.table[b0] == key or self.table[b1] == key
def insert(self, key):
"""Insert a new key to the table. Assumes that the key is not present yet."""
# TODO: Implement
raise NotImplementedError
#!/usr/bin/env python3
import sys
import random
from cuckoo_hash import CuckooTable
def simple_test(n, table_size_percentage):
random.seed(42)
table = CuckooTable(n*table_size_percentage//100)
# Insert an arithmetic progression
for i in range(n):
table.insert(37*i)
# Verify contents of the table
for i in range(n):
assert table.lookup(37*i), "Item not present in table, but it should be."
assert not table.lookup(37*i+1), "Item present in table, even though it should not be."
def multiple_test(min_n, max_n, step_n, table_size_percentage):
for n in range(min_n, max_n, step_n):
print("\tn={}".format(n))
simple_test(n, table_size_percentage)
# A list of all tests
tests = [
("small", lambda: simple_test(100, 400)),
("middle", lambda: simple_test(31415, 300)),
("big", lambda: simple_test(1000000, 300)),
("tight", lambda: multiple_test(20000, 40000, 500, 205)),
]
if __name__ == "__main__":
for required_test in sys.argv[1:] or [name for name, _ in tests]:
for name, test in tests:
if name == required_test:
print("Running test {}".format(name), file=sys.stderr)
test()
break
else:
raise ValueError("Unknown test {}".format(name))
Implement Cuckoo hash table with simple tabulation hashing.
You are given a skeleton code which defines the table, implements
`lookup()`, and provides hash functions. You have to add an `insert()`
method.
If too many elements are moved during a single insert, the table must
be rehashed with new hash functions. See lecture notes for the particular
bounds.
The size of the table should stay constant
throughout the existence of the data structure.
Source code templates can be found in [git](https://gitlab.kam.mff.cuni.cz/datovky/assignments/-/tree/master).
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment