Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
datovky
assignments
Commits
43914683
Commit
43914683
authored
Apr 21, 2021
by
Martin Mareš
Browse files
Cuckoo hash and Hash experiment
parent
490de451
Changes
14
Hide whitespace changes
Inline
Side-by-side
07-cuckoo_hash/cpp/Makefile
0 → 100644
View file @
43914683
test
:
cuckoo_hash_test
./
$<
INCLUDE
?=
.
CXXFLAGS
=
-std
=
c++11
-O2
-Wall
-Wextra
-g
-Wno-sign-compare
-I
$(INCLUDE)
cuckoo_hash_test
:
cuckoo_hash_test.cpp cuckoo_hash.h test_main.cpp $(INCLUDE)/random.h
$(CXX)
$(CXXFLAGS)
$^
-o
$@
clean
:
rm
-f
cuckoo_hash_test
.PHONY
:
clean test
07-cuckoo_hash/cpp/cuckoo_hash.h
0 → 100644
View file @
43914683
#include <string>
#include <vector>
#include <cstdint>
#include <iostream>
#include "random.h"
using
namespace
std
;
// If the condition is not true, report an error and halt.
#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0)
void
expect_failed
(
const
string
&
message
);
class
TabulationHash
{
/*
* Hash function for hashing by tabulation.
*
* The 32-bit key is split to four 8-bit parts. Each part indexes
* a separate table of 256 randomly generated values. Obtained values
* are XORed together.
*/
unsigned
num_buckets
;
uint32_t
tables
[
4
][
256
];
public:
TabulationHash
(
unsigned
num_buckets
,
RandomGen
*
random_gen
)
{
this
->
num_buckets
=
num_buckets
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
j
=
0
;
j
<
256
;
j
++
)
tables
[
i
][
j
]
=
random_gen
->
next_u32
();
}
uint32_t
hash
(
uint32_t
key
)
{
unsigned
h0
=
key
&
0xff
;
unsigned
h1
=
(
key
>>
8
)
&
0xff
;
unsigned
h2
=
(
key
>>
16
)
&
0xff
;
unsigned
h3
=
(
key
>>
24
)
&
0xff
;
return
(
tables
[
0
][
h0
]
^
tables
[
1
][
h1
]
^
tables
[
2
][
h2
]
^
tables
[
3
][
h3
])
%
num_buckets
;
}
};
class
CuckooTable
{
/*
* Hash table with Cuckoo hashing.
*
* We have two hash functions, which map 32-bit keys to buckets of a common
* hash table. Unused buckets contain 0xffffffff.
*/
const
uint32_t
UNUSED
=
0xffffffff
;
// The array of buckets
vector
<
uint32_t
>
table
;
unsigned
num_buckets
;
// Hash functions and the random generator used to create them
TabulationHash
*
hashes
[
2
];
RandomGen
*
random_gen
;
public:
CuckooTable
(
unsigned
num_buckets
)
{
// Initialize the table with the given number of buckets.
// The number of buckets is expected to stay constant.
this
->
num_buckets
=
num_buckets
;
table
.
resize
(
num_buckets
,
UNUSED
);
// Obtain two fresh hash functions.
random_gen
=
new
RandomGen
(
42
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
hashes
[
i
]
=
new
TabulationHash
(
num_buckets
,
random_gen
);
}
~
CuckooTable
()
{
for
(
int
i
=
0
;
i
<
2
;
i
++
)
delete
hashes
[
i
];
delete
random_gen
;
}
bool
lookup
(
uint32_t
key
)
{
// Check if the table contains the given key. Returns True or False.
unsigned
h0
=
hashes
[
0
]
->
hash
(
key
);
unsigned
h1
=
hashes
[
1
]
->
hash
(
key
);
return
(
table
[
h0
]
==
key
||
table
[
h1
]
==
key
);
}
void
insert
(
uint32_t
key
)
{
// Insert a new key to the table. Assumes that the key is not present yet.
EXPECT
(
key
!=
UNUSED
,
"Keys must differ from UNUSED."
);
// TODO: Implement
}
};
07-cuckoo_hash/cpp/cuckoo_hash_test.cpp
0 → 100644
View file @
43914683
#include <functional>
#include <cstdlib>
#include <vector>
#include "cuckoo_hash.h"
void
simple_test
(
unsigned
n
,
unsigned
table_size_percentage
)
{
CuckooTable
table
(
n
*
table_size_percentage
/
100
);
for
(
unsigned
i
=
0
;
i
<
n
;
i
++
)
table
.
insert
(
37
*
i
);
for
(
unsigned
i
=
0
;
i
<
n
;
i
++
)
{
EXPECT
(
table
.
lookup
(
37
*
i
),
"Item not present in table, but it should be."
);
EXPECT
(
!
table
.
lookup
(
37
*
i
+
1
),
"Item present in table, even though it should not be."
);
}
}
void
multiple_test
(
unsigned
min_n
,
unsigned
max_n
,
unsigned
step_n
,
unsigned
table_size_percentage
)
{
for
(
unsigned
n
=
min_n
;
n
<
max_n
;
n
+=
step_n
)
{
printf
(
"
\t
n=%u
\n
"
,
n
);
simple_test
(
n
,
table_size_percentage
);
}
}
/*** A list of all tests ***/
vector
<
pair
<
string
,
function
<
void
()
>>>
tests
=
{
{
"small"
,
[]
{
simple_test
(
100
,
400
);
}
},
{
"middle"
,
[]
{
simple_test
(
31415
,
300
);
}
},
{
"big"
,
[]
{
simple_test
(
1000000
,
300
);
}
},
{
"tight"
,
[]
{
multiple_test
(
20000
,
40000
,
500
,
205
);
}
},
};
07-cuckoo_hash/cpp/random.h
0 → 100644
View file @
43914683
#define DS1_RANDOM_H
#include <cstdint>
/*
* This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
* and Sebastiano Vigna, distributed under the CC-0 license. For more details,
* see http://vigna.di.unimi.it/xorshift/.
*
* Rewritten to C++ by Martin Mares, also placed under CC-0.
*/
class
RandomGen
{
uint64_t
state
[
2
];
uint64_t
rotl
(
uint64_t
x
,
int
k
)
{
return
(
x
<<
k
)
|
(
x
>>
(
64
-
k
));
}
public:
// Initialize the generator, set its seed and warm it up.
RandomGen
(
unsigned
int
seed
)
{
state
[
0
]
=
seed
*
0xdeadbeef
;
state
[
1
]
=
seed
^
0xc0de1234
;
for
(
int
i
=
0
;
i
<
100
;
i
++
)
next_u64
();
}
// Generate a random 64-bit number.
uint64_t
next_u64
(
void
)
{
uint64_t
s0
=
state
[
0
],
s1
=
state
[
1
];
uint64_t
result
=
s0
+
s1
;
s1
^=
s0
;
state
[
0
]
=
rotl
(
s0
,
55
)
^
s1
^
(
s1
<<
14
);
state
[
1
]
=
rotl
(
s1
,
36
);
return
result
;
}
// Generate a random 32-bit number.
uint32_t
next_u32
(
void
)
{
return
next_u64
()
>>
11
;
}
// Generate a number between 0 and range-1.
unsigned
int
next_range
(
unsigned
int
range
)
{
/*
* This is not perfectly uniform, unless the range is a power of two.
* However, for 64-bit random values and 32-bit ranges, the bias is
* insignificant.
*/
return
next_u64
()
%
range
;
}
};
07-cuckoo_hash/cpp/test_main.cpp
0 → 100644
View file @
43914683
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using
namespace
std
;
extern
vector
<
pair
<
string
,
function
<
void
()
>>>
tests
;
void
expect_failed
(
const
string
&
message
)
{
cerr
<<
"Test error: "
<<
message
<<
endl
;
exit
(
1
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
vector
<
string
>
required_tests
;
if
(
argc
>
1
)
{
required_tests
.
assign
(
argv
+
1
,
argv
+
argc
);
}
else
{
for
(
const
auto
&
test
:
tests
)
required_tests
.
push_back
(
test
.
first
);
}
for
(
const
auto
&
required_test
:
required_tests
)
{
bool
found
=
false
;
for
(
const
auto
&
test
:
tests
)
if
(
required_test
==
test
.
first
)
{
cerr
<<
"Running test "
<<
required_test
<<
endl
;
test
.
second
();
found
=
true
;
break
;
}
if
(
!
found
)
{
cerr
<<
"Unknown test "
<<
required_test
<<
endl
;
return
1
;
}
}
return
0
;
}
07-cuckoo_hash/python/cuckoo_hash.py
0 → 100644
View file @
43914683
import
random
import
math
class
TabulationHash
:
"""Hash function for hashing by tabulation.
The 32-bit key is split to four 8-bit parts. Each part indexes
a separate table of 256 randomly generated values. Obtained values
are XORed together.
"""
def
__init__
(
self
,
num_buckets
):
self
.
tables
=
[
None
]
*
4
for
i
in
range
(
4
):
self
.
tables
[
i
]
=
[
random
.
randint
(
0
,
0xffffffff
)
for
_
in
range
(
256
)]
self
.
num_buckets
=
num_buckets
def
hash
(
self
,
key
):
h0
=
key
&
0xff
h1
=
(
key
>>
8
)
&
0xff
h2
=
(
key
>>
16
)
&
0xff
h3
=
(
key
>>
24
)
&
0xff
t
=
self
.
tables
return
(
t
[
0
][
h0
]
^
t
[
1
][
h1
]
^
t
[
2
][
h2
]
^
t
[
3
][
h3
])
%
self
.
num_buckets
class
CuckooTable
:
"""Hash table with Cuckoo hashing.
We have two hash functions, which map 32-bit keys to buckets of a common
hash table. Unused buckets contain None.
"""
def
__init__
(
self
,
num_buckets
):
"""Initialize the table with the given number of buckets.
The number of buckets is expected to stay constant."""
# The array of buckets
self
.
num_buckets
=
num_buckets
self
.
table
=
[
None
]
*
num_buckets
# Create two fresh hash functions
self
.
hashes
=
[
TabulationHash
(
num_buckets
),
TabulationHash
(
num_buckets
)]
def
lookup
(
self
,
key
):
"""Check if the table contains the given key. Returns True or False."""
b0
=
self
.
hashes
[
0
].
hash
(
key
)
b1
=
self
.
hashes
[
1
].
hash
(
key
)
# print("## Lookup key={} b0={} b1={}".format(key, b0, b1))
return
self
.
table
[
b0
]
==
key
or
self
.
table
[
b1
]
==
key
def
insert
(
self
,
key
):
"""Insert a new key to the table. Assumes that the key is not present yet."""
# TODO: Implement
raise
NotImplementedError
07-cuckoo_hash/python/cuckoo_hash_test.py
0 → 100755
View file @
43914683
#!/usr/bin/env python3
import
sys
import
random
from
cuckoo_hash
import
CuckooTable
def
simple_test
(
n
,
table_size_percentage
):
random
.
seed
(
42
)
table
=
CuckooTable
(
n
*
table_size_percentage
//
100
)
# Insert an arithmetic progression
for
i
in
range
(
n
):
table
.
insert
(
37
*
i
)
# Verify contents of the table
for
i
in
range
(
n
):
assert
table
.
lookup
(
37
*
i
),
"Item not present in table, but it should be."
assert
not
table
.
lookup
(
37
*
i
+
1
),
"Item present in table, even though it should not be."
def
multiple_test
(
min_n
,
max_n
,
step_n
,
table_size_percentage
):
for
n
in
range
(
min_n
,
max_n
,
step_n
):
print
(
"
\t
n={}"
.
format
(
n
))
simple_test
(
n
,
table_size_percentage
)
# A list of all tests
tests
=
[
(
"small"
,
lambda
:
simple_test
(
100
,
400
)),
(
"middle"
,
lambda
:
simple_test
(
31415
,
300
)),
(
"big"
,
lambda
:
simple_test
(
1000000
,
300
)),
(
"tight"
,
lambda
:
multiple_test
(
20000
,
40000
,
500
,
205
)),
]
if
__name__
==
"__main__"
:
for
required_test
in
sys
.
argv
[
1
:]
or
[
name
for
name
,
_
in
tests
]:
for
name
,
test
in
tests
:
if
name
==
required_test
:
print
(
"Running test {}"
.
format
(
name
),
file
=
sys
.
stderr
)
test
()
break
else
:
raise
ValueError
(
"Unknown test {}"
.
format
(
name
))
07-cuckoo_hash/task.md
0 → 100644
View file @
43914683
Implement Cuckoo hash table with simple tabulation hashing.
You are given a skeleton code which defines the table, implements
`lookup()`
, and provides hash functions. You have to add an
`insert()`
method.
If too many elements are moved during a single insert, the table must
be rehashed with new hash functions. See lecture notes for the particular
bounds.
The size of the table should stay constant
throughout the existence of the data structure.
08-hash_experiment/cpp/Makefile
0 → 100644
View file @
43914683
INCLUDE
?=
.
CXXFLAGS
=
-std
=
c++11
-O2
-Wall
-Wextra
-g
-Wno-sign-compare
-I
$(INCLUDE)
STUDENT_ID
?=
PLEASE_SET_STUDENT_ID
HASHFUNCS
=
ms-low ms-high poly-1 poly-2 tab
.PHONY
:
test
test
:
$(addprefix out/t-grow-
,
$(HASHFUNCS)) $(addprefix out/t-usage-
,
$(HASHFUNCS))
out/t-%
:
hash_experiment
@
mkdir
-p
out
./hash_experiment
$*
$(STUDENT_ID)
>
$@
hash_experiment
:
hash_experiment.cpp $(INCLUDE)/random.h
$(CXX)
$(CPPFLAGS)
$(CXXFLAGS)
hash_experiment.cpp
-o
$@
.PHONY
:
clean
clean
:
rm
-f
hash_experiment
rm
-rf
out
08-hash_experiment/cpp/hash_experiment.cpp
0 → 100644
View file @
43914683
#include <vector>
#include <functional>
#include <algorithm>
#include <utility>
#include <stdexcept>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include "random.h"
using
namespace
std
;
RandomGen
rng
(
42
);
typedef
uint32_t
uint
;
typedef
function
<
uint
(
uint
)
>
HashFunction
;
typedef
function
<
HashFunction
(
unsigned
num_buckets
)
>
HashFunctionFactory
;
/*
* Hash function for hashing by tabulation.
*
* The 32-bit key is split to four 8-bit parts. Each part indexes
* a separate table of 256 randomly generated values. Obtained values
* are XORed together.
*/
class
TabulationHash
{
unsigned
num_buckets
;
vector
<
uint
>
tables
;
TabulationHash
(
unsigned
num_buckets
)
:
num_buckets
(
num_buckets
),
tables
(
4
*
256
)
{
for
(
uint
&
x
:
tables
)
x
=
rng
.
next_u32
();
}
public:
static
HashFunction
factory
(
unsigned
num_buckets
)
{
return
HashFunction
(
TabulationHash
(
num_buckets
));
}
uint
operator
()(
uint
key
)
{
return
(
tables
[
key
&
0xff
]
^
tables
[((
key
>>
8
)
&
0xff
)
|
0x100
]
^
tables
[((
key
>>
16
)
&
0xff
)
|
0x200
]
^
tables
[((
key
>>
24
)
&
0xff
)
|
0x300
]
)
%
num_buckets
;
}
};
// Hash function using polynomial modulo a prime.
template
<
int
degree
,
uint
prime
=
2147483647
>
class
PolynomialHash
{
unsigned
num_buckets
;
vector
<
uint
>
coefs
;
PolynomialHash
(
unsigned
num_buckets
)
:
num_buckets
(
num_buckets
),
coefs
(
degree
+
1
)
{
for
(
uint
&
x
:
coefs
)
x
=
rng
.
next_u32
();
}
public:
static
HashFunction
factory
(
unsigned
num_buckets
)
{
return
HashFunction
(
PolynomialHash
(
num_buckets
));
}
uint
operator
()(
uint
key
)
{
uint64_t
acc
=
0
;
for
(
uint
c
:
coefs
)
acc
=
(
acc
*
key
+
c
)
%
prime
;
return
(
uint
)(
acc
%
num_buckets
);
}
};
typedef
PolynomialHash
<
1
>
LinearHash
;
typedef
PolynomialHash
<
2
>
QuadraticHash
;
// Multiply-shift hash function taking top bits of 32-bit word
class
MultiplyShiftLowHash
{
uint
mult
;
uint
mask
;
int
shift
=
0
;
MultiplyShiftLowHash
(
unsigned
num_buckets
)
{
mult
=
rng
.
next_u32
()
|
0x1
;
mask
=
num_buckets
-
1
;
if
(
mask
&
num_buckets
)
throw
runtime_error
(
"MultiplyShiftLowHash: num_buckets must be power of 2"
);
unsigned
tmp
=
num_buckets
-
1
;
while
((
0x80000000U
&
tmp
)
==
0
)
{
tmp
<<=
1
;
shift
++
;
}
}
public:
static
HashFunction
factory
(
unsigned
num_buckets
)
{
return
HashFunction
(
MultiplyShiftLowHash
(
num_buckets
));
}
uint
operator
()(
uint
key
)
{
return
((
key
*
mult
)
>>
shift
)
&
mask
;
}
};
// Multiply-shift hash function taking low bits of upper half of 64-bit word
class
MultiplyShiftHighHash
{
uint
mask
;
uint64_t
mult
;
MultiplyShiftHighHash
(
unsigned
num_buckets
)
{
mult
=
rng
.
next_u64
()
|
0x1
;
mask
=
num_buckets
-
1
;
if
(
mask
&
num_buckets
)
throw
runtime_error
(
"MultiplyShiftHighHash: num_buckets must be power of 2"
);
}
public:
static
HashFunction
factory
(
unsigned
num_buckets
)
{
return
HashFunction
(
MultiplyShiftHighHash
(
num_buckets
));
}
uint
operator
()(
uint
key
)
{
return
((
key
*
mult
)
>>
32
)
&
mask
;
}
};
// Hash table with linear probing
class
HashTable
{
HashFunction
hash
;
vector
<
uint
>
table
;
unsigned
size
=
0
;
unsigned
ops
;
unsigned
max_
;
uint64_t
steps
;
public:
// We reserve one integer to mark unused buckets. This integer
// cannot be stored in the table.
static
constexpr
uint
UNUSED
=
~
((
uint
)
0
);
HashTable
(
const
HashFunctionFactory
&
factory
,
unsigned
num_buckets
)
:
hash
(
factory
(
num_buckets
)),
table
(
num_buckets
,
+
UNUSED
)
{
reset_counter
();
}
// Check whether key is present in the table.
bool
lookup
(
uint
key
)
{
if
(
key
==
UNUSED
)
throw
runtime_error
(
"Cannot lookup UNUSED"
);
bool
ret
=
false
;
unsigned
steps
=
1
;
uint
b
=
hash
(
key
);
while
(
table
[
b
]
!=
UNUSED
)
{
if
(
table
[
b
]
==
key
)
{
ret
=
true
;
break
;
}
steps
++
;
b
=
next_bucket
(
b
);
}
update_counter
(
steps
);
return
ret
;
}
// Add the key in the table.
void
insert
(
uint
key
)
{
if
(
key
==
UNUSED
)
throw
runtime_error
(
"Cannot insert UNUSED"
);
if
(
size
>=
table
.
size
())
throw
runtime_error
(
"Insert: Table is full"
);
unsigned
steps
=
1
;
uint
b
=
hash
(
key
);
while
(
table
[
b
]
!=
UNUSED
)
{
if
(
table
[
b
]
==
key
)
goto
key_found
;
steps
++
;
b
=
next_bucket
(
b
);
}
table
[
b
]
=
key
;
size
++
;
key_found:
update_counter
(
steps
);
}
void
reset_counter
()
{
ops
=
steps
=
max_
=
0
;
}
double
report_avg
()
{
return
((
double
)
steps
)
/
max
(
1U
,
ops
);
}
double
report_max
()
{
return
max_
;
}
private:
void
update_counter
(
unsigned
steps
)
{
ops
++
;
this
->
steps
+=
steps
;
max_
=
max
(
steps
,
max_
);
}
unsigned
next_bucket
(
unsigned
b
)
{
return
(
b
+
1
)
%
table
.
size
();
}
};
void
usage_test
(
HashFunctionFactory
factory
,
int
max_usage
=
90
,
int
retry
=
40
)
{
vector
<
double
>
avg
(
max_usage
,
0.0
);
vector
<
double
>
avg2
(
max_usage
,
0.0
);
unsigned
N
=
1
<<
20
;
unsigned
step_size
=
N
/
100
;
vector
<
uint
>
elements
(
N
);
for
(
unsigned
i
=
0
;
i
<
N
;
i
++
)
elements
[
i
]
=
i
;