Commit c87aafd8 authored by Ondřej Mička's avatar Ondřej Mička

Fibonacci experiment

parent f0636eef
.PHONY: test
test: fibonacci_experiment
@for exp in star random; do \
for impl in std naive ; do \
echo "t-$$exp-$$impl" ; \
time ./fibonacci_experiment $$exp 42 $$impl >t-$$exp-$$impl ; \
done ; \
done
@for b in biased-10 biased-40 biased-70 biased-100 ; do \
echo "t-$$b" ; \
time ./fibonacci_experiment $$b 42 std >t-$$b ; \
done
INCLUDE ?= .
CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
fibonacci_experiment: fibonacci.h fibonacci_experiment.cpp $(INCLUDE)/random.h
$(CXX) $(CPPFLAGS) $(CXXFLAGS) fibonacci_experiment.cpp -o $@
.PHONY: clean
clean:
rm -f fibonacci_experiment
#include <algorithm>
#include <functional>
#include <string>
#include <utility>
#include <vector>
#include <iostream>
#include <cmath>
#include "fibonacci.h"
#include "random.h"
void expect_failed(const std::string& message) {
fprintf(stderr, "Test error: %s\n", message.c_str());
exit(1);
}
/*
* A modified Fibonacci heap for benchmarking.
*
* We inherit the implementation of operations from the FibonacciHeap class
* and extend it by keeping statistics on the number of operations
* and the total number of structural changes. Also, if naive is turned on,
* the decrease does not mark parent which just lost a child.
*/
class HeapTests : public FibonacciHeap {
int ops = 0;
int steps = 0;
bool naive;
size_t max_size = 0;
public:
HeapTests(bool naive = false) : naive(naive) {}
void stats() {
printf(" %.3lf\n", (double)(steps * 1.0 / std::max(1, ops)));
}
Node *insert(priority_t prio, payload_t payload = payload_t()) {
ops++;
return FibonacciHeap::insert(prio, payload);
}
std::pair<payload_t, priority_t> extract_min() {
ops++;
return FibonacciHeap::extract_min();
}
void decrease(Node *node, priority_t new_prio) {
ops++;
if (naive) {
EXPECT(node, "Cannot decrease null pointer.");
EXPECT(node->prio >= new_prio, "Decrase: new priority larger than old one.");
node->prio = new_prio;
if (is_root(node) || new_prio >= node->parent->prio) return;
add_child(&meta_root, remove(node));
} else FibonacciHeap::decrease(node, new_prio);
}
protected:
size_t max_rank() override {
max_size = std::max(max_size, size);
if (naive) return sqrt(2*max_size) + 2;
else return FibonacciHeap::max_rank();
}
void add_child(Node* parent, Node *node) override {
steps++;
FibonacciHeap::add_child(parent, node);
}
Node *remove(Node *n) override {
steps++;
return FibonacciHeap::remove(n);
}
};
using Heap = HeapTests;
RandomGen *rng; // Random generator object
std::vector<int> log_range(int b, int e, int n) {
std::vector<int> ret;
for (int i = 0; i < n; i++)
ret.push_back(b * exp(log(e / b) / (n - 1) * i));
return ret;
}
// An auxiliary function for generating a random permutation.
template < typename Vector >
void random_permutation(Vector& v)
{
for (int i = 0; i < v.size(); i++)
std::swap(v[i], v[i + rng->next_range(v.size() - i)]);
}
// Remove given node from heap.
void remove(Heap* H, Node *n) {
H->decrease(n, -1000*1000*1000);
H->extract_min();
}
// Force consolidation of the heap.
void consolidate(Heap* H) {
remove(H, H->insert(0));
}
// Construct a star with n nodes and root index r.
void star(Heap *H, Node **node_map, int n, int r, bool consolidate_) {
if (n == 1) {
EXPECT(!node_map[r], "");
node_map[r] = H->insert(r);
if (consolidate_) consolidate(H);
} else {
star(H, node_map, n - 1, r, false);
star(H, node_map, n - 1, r + n - 1, true);
for (int i = r + n; i < r + 2*n - 2; i++) {
remove(H, node_map[i]);
node_map[i] = nullptr;
}
}
}
/* Generates a sequence on which non-cascading heaps need lots of time.
* Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT
* -> modified so that only a quadratic number of elements are needed in the star construction.
*/
void test_star(bool naive) {
for (int i = 1; i < 17; i++) {
int start = 3;
int N = start + i * (i+1) / 2;
Heap H(naive);
Node **node_map = new Node*[N];
for (int i = 0; i < N; i++) node_map[i] = nullptr;
for (int j = i; j > 0; j--) {
star(&H, node_map, j, start, false);
start += j;
}
for (int j = 0; j < (1 << i); j++) {
H.insert(1);
H.insert(1);
H.extract_min();
H.extract_min();
}
printf("%i", N);
H.stats();
delete[] node_map;
}
}
/* A random test.
*
* The test does 2N insertions first, and then N random operations insert / decrease /
* extract_min, each with probability proprotional to its weight ins / dec / rem.
*/
void test_random(int ins, int dec, int rem, bool naive) {
for (int N : log_range(50, 80000, 30)) {
Heap H(naive);
std::vector<Node*> node_map;
int s = ins + dec + rem;
std::vector<int> ops;
for (int i = 0; i < (N*ins) / s; i++) ops.push_back(0);
for (int i = 0; i < (N*dec) / s; i++) ops.push_back(1);
for (int i = 0; i < (N*rem) / s; i++) ops.push_back(2);
random_permutation(ops);
# define INSERT() node_map.push_back(H.insert(rng->next_range(5*N), node_map.size()))
for (int i = 0; i < 2*N; i++) INSERT();
for (int op : ops) switch (op) {
case 0:
INSERT();
break;
case 1: {
Node *node = node_map[rng->next_range(node_map.size())];
int p = node->priority() - rng->next_range(N / 5) - 1;
H.decrease(node, p);
break;
}
default: {
auto ret = H.extract_min();
if (ret.first + 1 != node_map.size()) {
node_map[ret.first] = node_map.back();
node_map[ret.first]->payload = ret.first;
}
node_map.pop_back();
}
}
printf("%i", N);
H.stats();
# undef INSERT
}
}
std::vector<std::pair<std::string, std::function<void(bool)>>> tests = {
{ "star", test_star },
{ "random", [](bool n){ test_random(10, 10, 10, n); } },
{ "biased-10", [](bool n){ test_random(10, 10, 20, n); } },
{ "biased-40", [](bool n){ test_random(10, 40, 20, n); } },
{ "biased-70", [](bool n){ test_random(10, 70, 20, n); } },
{ "biased-100", [](bool n){ test_random(10, 100, 20, n); } },
};
int main(int argc, char **argv)
{
if (argc != 4) {
fprintf(stderr, "Usage: %s <test> <student-id> (std|naive)\n", argv[0]);
return 1;
}
std::string which_test = argv[1];
std::string id_str = argv[2];
std::string mode = argv[3];
try {
rng = new RandomGen(stoi(id_str));
} catch (...) {
fprintf(stderr, "Invalid student ID\n");
return 1;
}
bool naive;
if (mode == "std")
naive = false;
else if (mode == "naive")
naive = true;
else
{
fprintf(stderr, "Last argument must be either 'std' or 'naive'\n");
return 1;
}
for (const auto& test : tests) {
if (test.first == which_test) {
test.second(naive);
return 0;
}
}
fprintf(stderr, "Unknown test %s\n", which_test.c_str());
return 1;
}
#ifndef DS1_RANDOM_H
#define DS1_RANDOM_H
#include <cstdint>
/*
* This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
* and Sebastiano Vigna, distributed under the CC-0 license. For more details,
* see http://vigna.di.unimi.it/xorshift/.
*
* Rewritten to C++ by Martin Mares, also placed under CC-0.
*/
class RandomGen {
uint64_t state[2];
uint64_t rotl(uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
public:
// Initialize the generator, set its seed and warm it up.
RandomGen(unsigned int seed)
{
state[0] = seed * 0xdeadbeef;
state[1] = seed ^ 0xc0de1234;
for (int i=0; i<100; i++)
next_u64();
}
// Generate a random 64-bit number.
uint64_t next_u64(void)
{
uint64_t s0 = state[0], s1 = state[1];
uint64_t result = s0 + s1;
s1 ^= s0;
state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
state[1] = rotl(s1, 36);
return result;
}
// Generate a random 32-bit number.
uint32_t next_u32(void)
{
return next_u64() >> 11;
}
// Generate a number between 0 and range-1.
unsigned int next_range(unsigned int range)
{
/*
* This is not perfectly uniform, unless the range is a power of two.
* However, for 64-bit random values and 32-bit ranges, the bias is
* insignificant.
*/
return next_u64() % range;
}
};
#endif
.PHONY: plot
test:
@for exp in star random ; do \
for impl in std naive ; do \
echo "t-$$exp-$$impl" ; \
time ./fibonacci_experiment.py $$exp 42 $$impl >t-$$exp-$$impl ; \
done ; \
done
@for b in biased-10 biased-40 biased-70 biased-100 ; do \
echo "t-$$b" ; \
time ./fibonacci_experiment.py $$b 42 std >t-$$b ; \
done
#!/usr/bin/env python3
import sys, itertools, random
from math import sqrt, log, exp
from fibonacci import FibonacciHeap as Heap
class BenchmarkingHeap(Heap):
"""A modified Fibonacci heap for benchmarking.
We inherit the implementation of operations from the FibonacciHeap class
and extend it by keeping statistics on the number of operations
and the total number of structural changes. Also, if naive is turned on,
the decrease does not mark parent which just lost a child.
"""
def __init__(self, naive=False):
Heap.__init__(self)
self.naive = naive
self._ops = 0
self._steps = 0
self._max_size = 0 # used to get upper bound on number of buckets in naive version
def stats(self):
return " %.3f" % (self._steps / max(self._ops, 1))
def insert(self, *args):
self._ops += 1
return Heap.insert(self, *args)
def decrease(self, node, new_prio):
self._ops += 1
if self.naive:
assert node is not None, "Cannot decrease None."
assert node is not self._meta_root, "Cannot decrease meta root."
assert new_prio <= node._prio, \
"Decrease: new priority is bigger than old one."
node._prio = new_prio
if self._is_root(node) or node._prio >= node._parent._prio:
return
self._add_child(self._meta_root, self._remove(node))
else:
Heap.decrease(self, node, new_prio)
def extract_min(self):
self._ops += 1
return Heap.extract_min(self)
def _add_child(self, *args):
self._steps += 1
Heap._add_child(self, *args)
def _remove(self, *args):
self._steps += 1
return Heap._remove(self, *args)
def _max_rank(self):
self._max_size = max(self._max_size, self._size)
if self.naive: return int(sqrt(2 * self._max_size)) + 2
return Heap._max_rank(self)
def remove(H, node):
H.decrease(node, -10**9)
H.extract_min()
def consolidate(H):
remove(H, H.insert(0))
def log_range(b, e, n):
assert 0 < b < e
for i in range(n):
yield int(b * exp(log(e / b) / (n - 1) * i))
def star(H, node_map, n, r, consolidate_):
"""Construct a star with n nodes and root index r.
"""
if n == 1:
assert node_map[r] is None
node_map[r] = H.insert(r)
if consolidate_:
consolidate(H)
else:
star(H, node_map, n - 1, r, False)
star(H, node_map, n - 1, r + n - 1, True)
for i in range(r + n, r + 2*n - 2):
remove(H, node_map[i])
node_map[i] = None
def test_star(naive):
"""Generates a sequence on which non-cascading heaps need lots of time.
Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT
-> modified so that only a quadratic number of elements are needed in the star construction.
"""
for i in range(1, 17): # was 26
start = 3
N = start + i * (i+1) // 2
H = BenchmarkingHeap(naive)
node_map = [None] * N
for j in range(i, 0, -1):
star(H, node_map, j, start, False)
start += j
for i in range(1 << i):
H.insert(1)
H.insert(1)
H.extract_min()
H.extract_min()
print(N, H.stats())
def test_random(ins, dec, rem, naive):
"""A random test.
The test does 2N insertions first, and then N random operations insert / decrease /
extract_min each with probability proprotional to its weight ins / dec / rem.
"""
for N in log_range(50, 80000, 30):
H = BenchmarkingHeap(naive)
node_map = []
s = ins + dec + rem
ops = [ 0 ] * ((N*ins) // s) + [ 1 ] * ((N*dec) // s) + [ 2 ] * ((N*rem) // s)
random.shuffle(ops)
def insert():
node_map.append(H.insert(random.randrange(5 * N), len(node_map)))
for _ in range(2*N): insert()
for op in ops:
if op == 0: insert()
elif op == 1:
node = random.choice(node_map)
p = node.priority() - random.randrange(N // 5) - 1
H.decrease(node, p)
else:
i, _ = H.extract_min()
if i + 1 == len(node_map):
node_map.pop()
else:
node_map[i] = node_map.pop()
node_map[i].payload = i
print(N, H.stats())
tests = {
"star": test_star,
"random": lambda n: test_random(10, 10, 10, n),
"biased-10": lambda n: test_random(10, 10, 20, n),
"biased-40": lambda n: test_random(10, 40, 20, n),
"biased-70": lambda n: test_random(10, 70, 20, n),
"biased-100": lambda n: test_random(10, 100, 20, n),
}
if len(sys.argv) == 4:
test, student_id = sys.argv[1], sys.argv[2]
if sys.argv[3] == "std":
naive = False
elif sys.argv[3] == "naive":
naive = True
else:
raise ValueError("Last argument must be either 'std' or 'naive'")
random.seed(int(student_id))
if test in tests:
tests[test](naive)
else:
raise ValueError("Unknown test {}".format(test))
else:
raise ValueError("Usage: {} <test> <student-id> (std|naive)".format(sys.argv[0]))
## Goal
The goal of this assignment is to evaluate your implementation of Fibonacci heap
experimentally and to compare it with a "naive" implementation which does not
mark vertices after cutting a child.
You are given a test program (`fibonacci_experiment`) which calls your
implementation from the previous assignment to perform the following
experiments:
- _Star test:_ Specific sequence of operations on which naive implementation
creates $Θ(\sqrt{n})$ stars of size 1 up to $Θ(\sqrt{n})$.
- _Random test:_ Test of size $n$ first inserts $2n$ elements and then it does
$n$ operations of which one third are insertions, one third are decreases
and one third are extract\_mins in random order.
- _Biased tests:_ Like random test but the weights of operations are different.
First the test inserts $2n$ elements and then it does random $n$ operations.
Number of operations of each type is proportional to their weight.
Weight of insert is 10, weight of extract\_min 20 and weight of decrease is
the number in the name of the test, and implemented possibilities are 10, 40,
70 and 100.
The program tries each experiment with different values of $n$. In each try,
it prints the average number of structural changes (adding and removing a child
of a node) per one operation (insert, decrease and extract\_min).
You should perform these experiments and write a report, which contains one
plot of the measured data for each test. The plots should be following:
- _Star test:_ One plot with two curves – one for standard and the other one for naive
version.
- _Random test:_ One plot with two curves – one for standard and the other one for naive
version.
- _Biased tests:_ One plot with 4 curves – one for each value of bias, all of them tested
with standard implementation. Note that we do not do biased test naive implementation.
Each plot should show the dependence
of the average number of structural changes on the test size $n$.
The report should discuss the experimental results and try to explain the observed
behavior using theory from the lectures. (If you want, you can carry out further
experiments to gain better understanding of the data structure and include these
in the report. This is strictly optional.)
You should submit a PDF file with the report (and no source code).
You will get 1 temporary point upon submission if the file is syntactically correct;
proper points will be assigned later.
## Test program
The test program is given three arguments:
- The name of the test (`star`, `random`, `biased-10`, `biased-40`, `biased-70`, `biased-100`).
- The random seed: you should use the last 2 digits of your student ID (you can find
it in the Study Information System – just click on the Personal data icon). Please
include the random seed in your report.
- The implementation to test (`std` or `naive`).
The output of the program contains one line per experiment, which consists of
the set size and the average number of structural changes.
## Your implementation
Please use your implementation from the previous exercise. If you used C++,
make sure that the `add_child()`,
`remove()` and `max_rank()` methods are declared virtual (they will
be overriden by the test program). Also, if you are performing any structural changes
without using the `add_child()` and `remove()` helper methods, you need to adjust
the `BenchmarkingHeap` class accordingly.
## Hints
The following tools can be useful for producing nice plots:
- [pandas](https://pandas.pydata.org/)
- [matplotlib](https://matplotlib.org/)
- [gnuplot](http://www.gnuplot.info/)
A quick checklist for plots:
- Is there a caption explaining what is plotted?
- Are the axes clearly labelled? Do they have value ranges and units?
- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs
are more fitting in some cases, but you should tell.)
- Is it clear which curve means what?
- Is it clear what are the measured points and what is an interpolated
curve between them?
- Are there any overlaps? (E.g., the most interesting part of the curve
hidden underneath a label?)
In your discussion, please distinguish the following kinds of claims.
It should be always clear which is which:
- Experimental results (i.e., the raw data you obtained from the experiments)
- Theoretical facts (i.e., claims we have proved mathematically)
- Your hypotheses (e.g., when you claim that the graph looks like something is true,
but you are not able to prove rigorously that it always holds)