Commit e0d0e6be authored by Martin Mareš's avatar Martin Mareš

Cuckoo hashing

parent 50660fce
test: cuckoo_hash_test
./$<
INCLUDE ?= .
CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
cuckoo_hash_test: cuckoo_hash_test.cpp cuckoo_hash.h test_main.cpp $(INCLUDE)/random.h
$(CXX) $(CXXFLAGS) $^ -o $@
clean:
rm -f cuckoo_hash_test
.PHONY: clean test
#include <string>
#include <vector>
#include <cstdint>
#include <iostream>
#include "random.h"
using namespace std;
// If the condition is not true, report an error and halt.
#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0)
void expect_failed(const string& message);
class TabulationHash {
/*
* Hash function for hashing by tabulation.
*
* The 32-bit key is split to four 8-bit parts. Each part indexes
* a separate table of 256 randomly generated values. Obtained values
* are XORed together.
*/
unsigned num_buckets;
uint32_t tables[4][256];
public:
TabulationHash(unsigned num_buckets, RandomGen *random_gen)
{
this->num_buckets = num_buckets;
for (int i=0; i<4; i++)
for (int j=0; j<256; j++)
tables[i][j] = random_gen->next_u32();
}
uint32_t hash(uint32_t key)
{
unsigned h0 = key & 0xff;
unsigned h1 = (key >> 8) & 0xff;
unsigned h2 = (key >> 16) & 0xff;
unsigned h3 = (key >> 24) & 0xff;
return (tables[0][h0] ^ tables[1][h1] ^ tables[2][h2] ^ tables[3][h3]) % num_buckets;
}
};
class CuckooTable {
/*
* Hash table with Cuckoo hashing.
*
* We have two hash functions, which map 32-bit keys to buckets of a common
* hash table. Unused buckets contain 0xffffffff.
*/
const uint32_t UNUSED = 0xffffffff;
// The array of buckets
vector<uint32_t> table;
unsigned num_buckets;
// Hash functions and the random generator used to create them
TabulationHash *hashes[2];
RandomGen *random_gen;
public:
CuckooTable(unsigned num_buckets)
{
// Initialize the table with the given number of buckets.
this->num_buckets = num_buckets;
table.resize(num_buckets, UNUSED);
// Obtain two fresh hash functions.
random_gen = new RandomGen(42);
for (int i=0; i<2; i++)
hashes[i] = new TabulationHash(num_buckets, random_gen);
}
~CuckooTable()
{
for (int i=0; i<2; i++)
delete hashes[i];
delete random_gen;
}
bool lookup(uint32_t key)
{
// Check if the table contains the given key. Returns True or False.
unsigned h0 = hashes[0]->hash(key);
unsigned h1 = hashes[1]->hash(key);
return (table[h0] == key || table[h1] == key);
}
void insert(uint32_t key)
{
// Insert a new key to the table. Assumes that the key is not present yet.
EXPECT(key != UNUSED, "Keys must differ from UNUSED.");
// TODO: Implement
}
};
#include <functional>
#include <cstdlib>
#include <vector>
#include "cuckoo_hash.h"
void simple_test(unsigned n, unsigned table_size_percentage)
{
CuckooTable table(n * table_size_percentage / 100);
for (unsigned i=0; i < n; i++)
table.insert(37*i);
for (unsigned i=0; i < n; i++) {
EXPECT(table.lookup(37*i), "Item not present in table, but it should be.");
EXPECT(!table.lookup(37*i+1), "Item present in table, even though it should not be.");
}
}
void multiple_test(unsigned min_n, unsigned max_n, unsigned step_n, unsigned table_size_percentage)
{
for (unsigned n=min_n; n < max_n; n += step_n) {
printf("\tn=%u\n", n);
simple_test(n, table_size_percentage);
}
}
/*** A list of all tests ***/
vector<pair<string, function<void()>>> tests = {
{ "small", [] { simple_test(100, 400); } },
{ "middle", [] { simple_test(31415, 300); } },
{ "big", [] { simple_test(1000000, 300); } },
{ "tight", [] { multiple_test(20000, 40000, 500, 205); } },
};
#define DS1_RANDOM_H
#include <cstdint>
/*
* This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
* and Sebastiano Vigna, distributed under the CC-0 license. For more details,
* see http://vigna.di.unimi.it/xorshift/.
*
* Rewritten to C++ by Martin Mares, also placed under CC-0.
*/
class RandomGen {
uint64_t state[2];
uint64_t rotl(uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
public:
// Initialize the generator, set its seed and warm it up.
RandomGen(unsigned int seed)
{
state[0] = seed * 0xdeadbeef;
state[1] = seed ^ 0xc0de1234;
for (int i=0; i<100; i++)
next_u64();
}
// Generate a random 64-bit number.
uint64_t next_u64(void)
{
uint64_t s0 = state[0], s1 = state[1];
uint64_t result = s0 + s1;
s1 ^= s0;
state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
state[1] = rotl(s1, 36);
return result;
}
// Generate a random 32-bit number.
uint32_t next_u32(void)
{
return next_u64() >> 11;
}
// Generate a number between 0 and range-1.
unsigned int next_range(unsigned int range)
{
/*
* This is not perfectly uniform, unless the range is a power of two.
* However, for 64-bit random values and 32-bit ranges, the bias is
* insignificant.
*/
return next_u64() % range;
}
};
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using namespace std;
extern vector<pair<string, function<void()>>> tests;
void expect_failed(const string& message) {
cerr << "Test error: " << message << endl;
exit(1);
}
int main(int argc, char* argv[]) {
vector<string> required_tests;
if (argc > 1) {
required_tests.assign(argv + 1, argv + argc);
} else {
for (const auto& test : tests)
required_tests.push_back(test.first);
}
for (const auto& required_test : required_tests) {
bool found = false;
for (const auto& test : tests)
if (required_test == test.first) {
cerr << "Running test " << required_test << endl;
test.second();
found = true;
break;
}
if (!found) {
cerr << "Unknown test " << required_test << endl;
return 1;
}
}
return 0;
}
import random
import math
class TabulationHash:
"""Hash function for hashing by tabulation.
The 32-bit key is split to four 8-bit parts. Each part indexes
a separate table of 256 randomly generated values. Obtained values
are XORed together.
"""
def __init__(self, num_buckets):
self.tables = [None] * 4
for i in range(4):
self.tables[i] = [random.randint(0, 0xffffffff) for _ in range(256)]
self.num_buckets = num_buckets
def hash(self, key):
h0 = key & 0xff
h1 = (key >> 8) & 0xff
h2 = (key >> 16) & 0xff
h3 = (key >> 24) & 0xff
t = self.tables
return (t[0][h0] ^ t[1][h1] ^ t[2][h2] ^ t[3][h3]) % self.num_buckets
class CuckooTable:
"""Hash table with Cuckoo hashing.
We have two hash functions, which map 32-bit keys to buckets of a common
hash table. Unused buckets contain None.
"""
def __init__(self, num_buckets):
"""Initialize the table with the given number of buckets."""
# The array of buckets
self.num_buckets = num_buckets
self.table = [None] * num_buckets
# Create two fresh hash functions
self.hashes = [TabulationHash(num_buckets), TabulationHash(num_buckets)]
def lookup(self, key):
"""Check if the table contains the given key. Returns True or False."""
b0 = self.hashes[0].hash(key)
b1 = self.hashes[1].hash(key)
# print("## Lookup key={} b0={} b1={}".format(key, b0, b1))
return self.table[b0] == key or self.table[b1] == key
def insert(self, key):
"""Insert a new key to the table. Assumes that the key is not present yet."""
# TODO: Implement
raise NotImplementedError
#!/usr/bin/env python3
import sys
import random
from cuckoo_hash import CuckooTable
def simple_test(n, table_size_percentage):
random.seed(42)
table = CuckooTable(n*table_size_percentage//100)
# Insert an arithmetic progression
for i in range(n):
table.insert(37*i)
# Verify contents of the table
for i in range(n):
assert table.lookup(37*i), "Item not present in table, but it should be."
assert not table.lookup(37*i+1), "Item present in table, even though it should not be."
def multiple_test(min_n, max_n, step_n, table_size_percentage):
for n in range(min_n, max_n, step_n):
print("\tn={}".format(n))
simple_test(n, table_size_percentage)
# A list of all tests
tests = [
("small", lambda: simple_test(100, 400)),
("middle", lambda: simple_test(31415, 300)),
("big", lambda: simple_test(1000000, 300)),
("tight", lambda: multiple_test(20000, 40000, 500, 205)),
]
if __name__ == "__main__":
for required_test in sys.argv[1:] or [name for name, _ in tests]:
for name, test in tests:
if name == required_test:
print("Running test {}".format(name), file=sys.stderr)
test()
break
else:
raise ValueError("Unknown test {}".format(name))
Implement Cuckoo hash table with simple tabulation hashing.
You are given a skeleton code which defines the table, implements
`lookup()`, and provides hash functions. You have to add an `insert()`
method.
If too many elements are moved during a single insert, the table must
be rehashed with new hash functions. See lecture notes for the particular
bounds.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment