From c87aafd8744e58fdad2ed402700d80be8ffd0f50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondra=20Mi=C4=8Dka=20=40=20miles-teg?= <mitch.ondra@gmail.com> Date: Mon, 30 Mar 2020 14:42:54 +0200 Subject: [PATCH] Fibonacci experiment --- 06-fibonacci_experiment/cpp/Makefile | 22 ++ .../cpp/fibonacci_experiment.cpp | 256 ++++++++++++++++++ 06-fibonacci_experiment/cpp/random.h | 61 +++++ 06-fibonacci_experiment/python/Makefile | 13 + .../python/fibonacci_experiment.py | 172 ++++++++++++ 06-fibonacci_experiment/task.md | 93 +++++++ 6 files changed, 617 insertions(+) create mode 100644 06-fibonacci_experiment/cpp/Makefile create mode 100644 06-fibonacci_experiment/cpp/fibonacci_experiment.cpp create mode 100644 06-fibonacci_experiment/cpp/random.h create mode 100644 06-fibonacci_experiment/python/Makefile create mode 100644 06-fibonacci_experiment/python/fibonacci_experiment.py create mode 100644 06-fibonacci_experiment/task.md diff --git a/06-fibonacci_experiment/cpp/Makefile b/06-fibonacci_experiment/cpp/Makefile new file mode 100644 index 0000000..589ab7c --- /dev/null +++ b/06-fibonacci_experiment/cpp/Makefile @@ -0,0 +1,22 @@ +.PHONY: test +test: fibonacci_experiment + @for exp in star random; do \ + for impl in std naive ; do \ + echo "t-$$exp-$$impl" ; \ + time ./fibonacci_experiment $$exp 42 $$impl >t-$$exp-$$impl ; \ + done ; \ + done + @for b in biased-10 biased-40 biased-70 biased-100 ; do \ + echo "t-$$b" ; \ + time ./fibonacci_experiment $$b 42 std >t-$$b ; \ + done + +INCLUDE ?= . +CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE) + +fibonacci_experiment: fibonacci.h fibonacci_experiment.cpp $(INCLUDE)/random.h + $(CXX) $(CPPFLAGS) $(CXXFLAGS) fibonacci_experiment.cpp -o $@ + +.PHONY: clean +clean: + rm -f fibonacci_experiment diff --git a/06-fibonacci_experiment/cpp/fibonacci_experiment.cpp b/06-fibonacci_experiment/cpp/fibonacci_experiment.cpp new file mode 100644 index 0000000..3e8d9b5 --- /dev/null +++ b/06-fibonacci_experiment/cpp/fibonacci_experiment.cpp @@ -0,0 +1,256 @@ +#include <algorithm> +#include <functional> +#include <string> +#include <utility> +#include <vector> +#include <iostream> +#include <cmath> + +#include "fibonacci.h" +#include "random.h" + +void expect_failed(const std::string& message) { + fprintf(stderr, "Test error: %s\n", message.c_str()); + exit(1); +} + + +/* + * A modified Fibonacci heap for benchmarking. + * + * We inherit the implementation of operations from the FibonacciHeap class + * and extend it by keeping statistics on the number of operations + * and the total number of structural changes. Also, if naive is turned on, + * the decrease does not mark parent which just lost a child. + */ + +class HeapTests : public FibonacciHeap { + int ops = 0; + int steps = 0; + bool naive; + size_t max_size = 0; + + public: + HeapTests(bool naive = false) : naive(naive) {} + + void stats() { + printf(" %.3lf\n", (double)(steps * 1.0 / std::max(1, ops))); + } + + Node *insert(priority_t prio, payload_t payload = payload_t()) { + ops++; + return FibonacciHeap::insert(prio, payload); + } + + std::pair<payload_t, priority_t> extract_min() { + ops++; + return FibonacciHeap::extract_min(); + } + + void decrease(Node *node, priority_t new_prio) { + ops++; + if (naive) { + EXPECT(node, "Cannot decrease null pointer."); + EXPECT(node->prio >= new_prio, "Decrase: new priority larger than old one."); + + node->prio = new_prio; + if (is_root(node) || new_prio >= node->parent->prio) return; + + add_child(&meta_root, remove(node)); + } else FibonacciHeap::decrease(node, new_prio); + } + + protected: + size_t max_rank() override { + max_size = std::max(max_size, size); + if (naive) return sqrt(2*max_size) + 2; + else return FibonacciHeap::max_rank(); + } + + void add_child(Node* parent, Node *node) override { + steps++; + FibonacciHeap::add_child(parent, node); + } + + Node *remove(Node *n) override { + steps++; + return FibonacciHeap::remove(n); + } +}; + +using Heap = HeapTests; + +RandomGen *rng; // Random generator object + + +std::vector<int> log_range(int b, int e, int n) { + std::vector<int> ret; + for (int i = 0; i < n; i++) + ret.push_back(b * exp(log(e / b) / (n - 1) * i)); + return ret; +} + +// An auxiliary function for generating a random permutation. +template < typename Vector > +void random_permutation(Vector& v) +{ + for (int i = 0; i < v.size(); i++) + std::swap(v[i], v[i + rng->next_range(v.size() - i)]); +} + +// Remove given node from heap. +void remove(Heap* H, Node *n) { + H->decrease(n, -1000*1000*1000); + H->extract_min(); +} + +// Force consolidation of the heap. +void consolidate(Heap* H) { + remove(H, H->insert(0)); +} + +// Construct a star with n nodes and root index r. +void star(Heap *H, Node **node_map, int n, int r, bool consolidate_) { + if (n == 1) { + EXPECT(!node_map[r], ""); + node_map[r] = H->insert(r); + if (consolidate_) consolidate(H); + } else { + star(H, node_map, n - 1, r, false); + star(H, node_map, n - 1, r + n - 1, true); + + for (int i = r + n; i < r + 2*n - 2; i++) { + remove(H, node_map[i]); + node_map[i] = nullptr; + } + } +} + +/* Generates a sequence on which non-cascading heaps need lots of time. + * Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT + * -> modified so that only a quadratic number of elements are needed in the star construction. + */ +void test_star(bool naive) { + for (int i = 1; i < 17; i++) { + int start = 3; + int N = start + i * (i+1) / 2; + Heap H(naive); + Node **node_map = new Node*[N]; + for (int i = 0; i < N; i++) node_map[i] = nullptr; + + for (int j = i; j > 0; j--) { + star(&H, node_map, j, start, false); + start += j; + } + + for (int j = 0; j < (1 << i); j++) { + H.insert(1); + H.insert(1); + H.extract_min(); + H.extract_min(); + } + + printf("%i", N); + H.stats(); + + delete[] node_map; + } +} + +/* A random test. + * + * The test does 2N insertions first, and then N random operations insert / decrease / + * extract_min, each with probability proprotional to its weight ins / dec / rem. + */ +void test_random(int ins, int dec, int rem, bool naive) { + for (int N : log_range(50, 80000, 30)) { + Heap H(naive); + std::vector<Node*> node_map; + int s = ins + dec + rem; + + std::vector<int> ops; + for (int i = 0; i < (N*ins) / s; i++) ops.push_back(0); + for (int i = 0; i < (N*dec) / s; i++) ops.push_back(1); + for (int i = 0; i < (N*rem) / s; i++) ops.push_back(2); + random_permutation(ops); + +# define INSERT() node_map.push_back(H.insert(rng->next_range(5*N), node_map.size())) + + for (int i = 0; i < 2*N; i++) INSERT(); + + for (int op : ops) switch (op) { + case 0: + INSERT(); + break; + case 1: { + Node *node = node_map[rng->next_range(node_map.size())]; + int p = node->priority() - rng->next_range(N / 5) - 1; + H.decrease(node, p); + break; + } + default: { + auto ret = H.extract_min(); + if (ret.first + 1 != node_map.size()) { + node_map[ret.first] = node_map.back(); + node_map[ret.first]->payload = ret.first; + } + node_map.pop_back(); + } + } + + printf("%i", N); + H.stats(); + +# undef INSERT + } +} + + +std::vector<std::pair<std::string, std::function<void(bool)>>> tests = { + { "star", test_star }, + { "random", [](bool n){ test_random(10, 10, 10, n); } }, + { "biased-10", [](bool n){ test_random(10, 10, 20, n); } }, + { "biased-40", [](bool n){ test_random(10, 40, 20, n); } }, + { "biased-70", [](bool n){ test_random(10, 70, 20, n); } }, + { "biased-100", [](bool n){ test_random(10, 100, 20, n); } }, +}; + +int main(int argc, char **argv) +{ + if (argc != 4) { + fprintf(stderr, "Usage: %s <test> <student-id> (std|naive)\n", argv[0]); + return 1; + } + + std::string which_test = argv[1]; + std::string id_str = argv[2]; + std::string mode = argv[3]; + + try { + rng = new RandomGen(stoi(id_str)); + } catch (...) { + fprintf(stderr, "Invalid student ID\n"); + return 1; + } + + bool naive; + if (mode == "std") + naive = false; + else if (mode == "naive") + naive = true; + else + { + fprintf(stderr, "Last argument must be either 'std' or 'naive'\n"); + return 1; + } + + for (const auto& test : tests) { + if (test.first == which_test) { + test.second(naive); + return 0; + } + } + + fprintf(stderr, "Unknown test %s\n", which_test.c_str()); + return 1; +} diff --git a/06-fibonacci_experiment/cpp/random.h b/06-fibonacci_experiment/cpp/random.h new file mode 100644 index 0000000..5ef10ae --- /dev/null +++ b/06-fibonacci_experiment/cpp/random.h @@ -0,0 +1,61 @@ +#ifndef DS1_RANDOM_H +#define DS1_RANDOM_H + +#include <cstdint> + +/* + * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman + * and Sebastiano Vigna, distributed under the CC-0 license. For more details, + * see http://vigna.di.unimi.it/xorshift/. + * + * Rewritten to C++ by Martin Mares, also placed under CC-0. + */ + +class RandomGen { + uint64_t state[2]; + + uint64_t rotl(uint64_t x, int k) + { + return (x << k) | (x >> (64 - k)); + } + + public: + // Initialize the generator, set its seed and warm it up. + RandomGen(unsigned int seed) + { + state[0] = seed * 0xdeadbeef; + state[1] = seed ^ 0xc0de1234; + for (int i=0; i<100; i++) + next_u64(); + } + + // Generate a random 64-bit number. + uint64_t next_u64(void) + { + uint64_t s0 = state[0], s1 = state[1]; + uint64_t result = s0 + s1; + s1 ^= s0; + state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); + state[1] = rotl(s1, 36); + return result; + } + + // Generate a random 32-bit number. + uint32_t next_u32(void) + { + return next_u64() >> 11; + } + + // Generate a number between 0 and range-1. + unsigned int next_range(unsigned int range) + { + /* + * This is not perfectly uniform, unless the range is a power of two. + * However, for 64-bit random values and 32-bit ranges, the bias is + * insignificant. + */ + return next_u64() % range; + } +}; + +#endif diff --git a/06-fibonacci_experiment/python/Makefile b/06-fibonacci_experiment/python/Makefile new file mode 100644 index 0000000..656f937 --- /dev/null +++ b/06-fibonacci_experiment/python/Makefile @@ -0,0 +1,13 @@ +.PHONY: plot +test: + @for exp in star random ; do \ + for impl in std naive ; do \ + echo "t-$$exp-$$impl" ; \ + time ./fibonacci_experiment.py $$exp 42 $$impl >t-$$exp-$$impl ; \ + done ; \ + done + @for b in biased-10 biased-40 biased-70 biased-100 ; do \ + echo "t-$$b" ; \ + time ./fibonacci_experiment.py $$b 42 std >t-$$b ; \ + done + diff --git a/06-fibonacci_experiment/python/fibonacci_experiment.py b/06-fibonacci_experiment/python/fibonacci_experiment.py new file mode 100644 index 0000000..eb7a091 --- /dev/null +++ b/06-fibonacci_experiment/python/fibonacci_experiment.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +import sys, itertools, random +from math import sqrt, log, exp +from fibonacci import FibonacciHeap as Heap + +class BenchmarkingHeap(Heap): + """A modified Fibonacci heap for benchmarking. + + We inherit the implementation of operations from the FibonacciHeap class + and extend it by keeping statistics on the number of operations + and the total number of structural changes. Also, if naive is turned on, + the decrease does not mark parent which just lost a child. + """ + + def __init__(self, naive=False): + Heap.__init__(self) + self.naive = naive + self._ops = 0 + self._steps = 0 + self._max_size = 0 # used to get upper bound on number of buckets in naive version + + def stats(self): + return " %.3f" % (self._steps / max(self._ops, 1)) + + def insert(self, *args): + self._ops += 1 + return Heap.insert(self, *args) + + def decrease(self, node, new_prio): + self._ops += 1 + if self.naive: + assert node is not None, "Cannot decrease None." + assert node is not self._meta_root, "Cannot decrease meta root." + assert new_prio <= node._prio, \ + "Decrease: new priority is bigger than old one." + + node._prio = new_prio + if self._is_root(node) or node._prio >= node._parent._prio: + return + + self._add_child(self._meta_root, self._remove(node)) + else: + Heap.decrease(self, node, new_prio) + + def extract_min(self): + self._ops += 1 + return Heap.extract_min(self) + + def _add_child(self, *args): + self._steps += 1 + Heap._add_child(self, *args) + + def _remove(self, *args): + self._steps += 1 + return Heap._remove(self, *args) + + def _max_rank(self): + self._max_size = max(self._max_size, self._size) + if self.naive: return int(sqrt(2 * self._max_size)) + 2 + return Heap._max_rank(self) + +def remove(H, node): + H.decrease(node, -10**9) + H.extract_min() + +def consolidate(H): + remove(H, H.insert(0)) + +def log_range(b, e, n): + assert 0 < b < e + for i in range(n): + yield int(b * exp(log(e / b) / (n - 1) * i)) + +def star(H, node_map, n, r, consolidate_): + """Construct a star with n nodes and root index r. + """ + if n == 1: + assert node_map[r] is None + node_map[r] = H.insert(r) + if consolidate_: + consolidate(H) + else: + star(H, node_map, n - 1, r, False) + star(H, node_map, n - 1, r + n - 1, True) + + for i in range(r + n, r + 2*n - 2): + remove(H, node_map[i]) + node_map[i] = None + +def test_star(naive): + """Generates a sequence on which non-cascading heaps need lots of time. + Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT + -> modified so that only a quadratic number of elements are needed in the star construction. + """ + for i in range(1, 17): # was 26 + start = 3 + N = start + i * (i+1) // 2 + H = BenchmarkingHeap(naive) + node_map = [None] * N + + for j in range(i, 0, -1): + star(H, node_map, j, start, False) + start += j + + for i in range(1 << i): + H.insert(1) + H.insert(1) + H.extract_min() + H.extract_min() + + print(N, H.stats()) + +def test_random(ins, dec, rem, naive): + """A random test. + + The test does 2N insertions first, and then N random operations insert / decrease / + extract_min each with probability proprotional to its weight ins / dec / rem. + """ + for N in log_range(50, 80000, 30): + H = BenchmarkingHeap(naive) + node_map = [] + s = ins + dec + rem + ops = [ 0 ] * ((N*ins) // s) + [ 1 ] * ((N*dec) // s) + [ 2 ] * ((N*rem) // s) + random.shuffle(ops) + + def insert(): + node_map.append(H.insert(random.randrange(5 * N), len(node_map))) + + for _ in range(2*N): insert() + + for op in ops: + if op == 0: insert() + elif op == 1: + node = random.choice(node_map) + p = node.priority() - random.randrange(N // 5) - 1 + H.decrease(node, p) + else: + i, _ = H.extract_min() + if i + 1 == len(node_map): + node_map.pop() + else: + node_map[i] = node_map.pop() + node_map[i].payload = i + + print(N, H.stats()) + +tests = { + "star": test_star, + "random": lambda n: test_random(10, 10, 10, n), + "biased-10": lambda n: test_random(10, 10, 20, n), + "biased-40": lambda n: test_random(10, 40, 20, n), + "biased-70": lambda n: test_random(10, 70, 20, n), + "biased-100": lambda n: test_random(10, 100, 20, n), +} + +if len(sys.argv) == 4: + test, student_id = sys.argv[1], sys.argv[2] + if sys.argv[3] == "std": + naive = False + elif sys.argv[3] == "naive": + naive = True + else: + raise ValueError("Last argument must be either 'std' or 'naive'") + random.seed(int(student_id)) + if test in tests: + tests[test](naive) + else: + raise ValueError("Unknown test {}".format(test)) +else: + raise ValueError("Usage: {} <test> <student-id> (std|naive)".format(sys.argv[0])) + diff --git a/06-fibonacci_experiment/task.md b/06-fibonacci_experiment/task.md new file mode 100644 index 0000000..4e834ba --- /dev/null +++ b/06-fibonacci_experiment/task.md @@ -0,0 +1,93 @@ +## Goal + +The goal of this assignment is to evaluate your implementation of Fibonacci heap +experimentally and to compare it with a "naive" implementation which does not +mark vertices after cutting a child. + +You are given a test program (`fibonacci_experiment`) which calls your +implementation from the previous assignment to perform the following +experiments: + +- _Star test:_ Specific sequence of operations on which naive implementation + creates $Θ(\sqrt{n})$ stars of size 1 up to $Θ(\sqrt{n})$. +- _Random test:_ Test of size $n$ first inserts $2n$ elements and then it does + $n$ operations of which one third are insertions, one third are decreases + and one third are extract\_mins in random order. +- _Biased tests:_ Like random test but the weights of operations are different. + First the test inserts $2n$ elements and then it does random $n$ operations. + Number of operations of each type is proportional to their weight. + Weight of insert is 10, weight of extract\_min 20 and weight of decrease is + the number in the name of the test, and implemented possibilities are 10, 40, + 70 and 100. + +The program tries each experiment with different values of $n$. In each try, +it prints the average number of structural changes (adding and removing a child +of a node) per one operation (insert, decrease and extract\_min). + +You should perform these experiments and write a report, which contains one +plot of the measured data for each test. The plots should be following: + +- _Star test:_ One plot with two curves – one for standard and the other one for naive + version. +- _Random test:_ One plot with two curves – one for standard and the other one for naive + version. +- _Biased tests:_ One plot with 4 curves – one for each value of bias, all of them tested + with standard implementation. Note that we do not do biased test naive implementation. + +Each plot should show the dependence +of the average number of structural changes on the test size $n$. + +The report should discuss the experimental results and try to explain the observed +behavior using theory from the lectures. (If you want, you can carry out further +experiments to gain better understanding of the data structure and include these +in the report. This is strictly optional.) + +You should submit a PDF file with the report (and no source code). +You will get 1 temporary point upon submission if the file is syntactically correct; +proper points will be assigned later. + +## Test program + +The test program is given three arguments: +- The name of the test (`star`, `random`, `biased-10`, `biased-40`, `biased-70`, `biased-100`). +- The random seed: you should use the last 2 digits of your student ID (you can find + it in the Study Information System – just click on the Personal data icon). Please + include the random seed in your report. +- The implementation to test (`std` or `naive`). + +The output of the program contains one line per experiment, which consists of +the set size and the average number of structural changes. + +## Your implementation + +Please use your implementation from the previous exercise. If you used C++, +make sure that the `add_child()`, +`remove()` and `max_rank()` methods are declared virtual (they will +be overriden by the test program). Also, if you are performing any structural changes +without using the `add_child()` and `remove()` helper methods, you need to adjust +the `BenchmarkingHeap` class accordingly. + +## Hints + +The following tools can be useful for producing nice plots: +- [pandas](https://pandas.pydata.org/) +- [matplotlib](https://matplotlib.org/) +- [gnuplot](http://www.gnuplot.info/) + +A quick checklist for plots: +- Is there a caption explaining what is plotted? +- Are the axes clearly labelled? Do they have value ranges and units? +- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs + are more fitting in some cases, but you should tell.) +- Is it clear which curve means what? +- Is it clear what are the measured points and what is an interpolated + curve between them? +- Are there any overlaps? (E.g., the most interesting part of the curve + hidden underneath a label?) + +In your discussion, please distinguish the following kinds of claims. +It should be always clear which is which: +- Experimental results (i.e., the raw data you obtained from the experiments) +- Theoretical facts (i.e., claims we have proved mathematically) +- Your hypotheses (e.g., when you claim that the graph looks like something is true, + but you are not able to prove rigorously that it always holds) -- GitLab