Commit f15ed5f3 by Martin Mareš

Fibonacci heap experiment

parent d074d45a
 .PHONY: test test: fibonacci_experiment @for exp in star random; do \ for impl in std naive ; do \ echo "t-\$\$exp-\$\$impl" ; \ time ./fibonacci_experiment \$\$exp 42 \$\$impl >t-\$\$exp-\$\$impl ; \ done ; \ done @for b in biased-10 biased-40 biased-70 biased-100 ; do \ echo "t-\$\$b" ; \ time ./fibonacci_experiment \$\$b 42 std >t-\$\$b ; \ done INCLUDE ?= . CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I\$(INCLUDE) fibonacci_experiment: fibonacci.h fibonacci_experiment.cpp \$(INCLUDE)/random.h \$(CXX) \$(CPPFLAGS) \$(CXXFLAGS) fibonacci_experiment.cpp -o \$@ .PHONY: clean clean: rm -f fibonacci_experiment
 #include #include #include #include #include #include #include #include "fibonacci.h" #include "random.h" void expect_failed(const std::string& message) { fprintf(stderr, "Test error: %s\n", message.c_str()); exit(1); } /* * A modified Fibonacci heap for benchmarking. * * We inherit the implementation of operations from the FibonacciHeap class * and extend it by keeping statistics on the number of operations * and the total number of structural changes. Also, if naive is turned on, * the decrease does not mark parent which just lost a child. */ class HeapTests : public FibonacciHeap { int ops = 0; int steps = 0; bool naive; size_t max_size = 0; public: HeapTests(bool naive = false) : naive(naive) {} void stats() { printf(" %.3lf\n", (double)(steps * 1.0 / std::max(1, ops))); } Node *insert(priority_t prio, payload_t payload = payload_t()) { ops++; return FibonacciHeap::insert(prio, payload); } std::pair extract_min() { ops++; return FibonacciHeap::extract_min(); } void decrease(Node *node, priority_t new_prio) { ops++; if (naive) { EXPECT(node, "Cannot decrease null pointer."); EXPECT(node->prio >= new_prio, "Decrase: new priority larger than old one."); node->prio = new_prio; if (is_root(node) || new_prio >= node->parent->prio) return; add_child(&meta_root, remove(node)); } else FibonacciHeap::decrease(node, new_prio); } protected: size_t max_rank() override { max_size = std::max(max_size, size); if (naive) return sqrt(2*max_size) + 2; else return FibonacciHeap::max_rank(); } void add_child(Node* parent, Node *node) override { steps++; FibonacciHeap::add_child(parent, node); } Node *remove(Node *n) override { steps++; return FibonacciHeap::remove(n); } }; using Heap = HeapTests; RandomGen *rng; // Random generator object std::vector log_range(int b, int e, int n) { std::vector ret; for (int i = 0; i < n; i++) ret.push_back(b * exp(log(e / b) / (n - 1) * i)); return ret; } // An auxiliary function for generating a random permutation. template < typename Vector > void random_permutation(Vector& v) { for (int i = 0; i < v.size(); i++) std::swap(v[i], v[i + rng->next_range(v.size() - i)]); } // Remove given node from heap. void remove(Heap* H, Node *n) { H->decrease(n, -1000*1000*1000); H->extract_min(); } // Force consolidation of the heap. void consolidate(Heap* H) { remove(H, H->insert(0)); } // Construct a star with n nodes and root index r. void star(Heap *H, Node **node_map, int n, int r, bool consolidate_) { if (n == 1) { EXPECT(!node_map[r], ""); node_map[r] = H->insert(r); if (consolidate_) consolidate(H); } else { star(H, node_map, n - 1, r, false); star(H, node_map, n - 1, r + n - 1, true); for (int i = r + n; i < r + 2*n - 2; i++) { remove(H, node_map[i]); node_map[i] = nullptr; } } } /* Generates a sequence on which non-cascading heaps need lots of time. * Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT * -> modified so that only a quadratic number of elements are needed in the star construction. */ void test_star(bool naive) { for (int i = 1; i < 17; i++) { int start = 3; int N = start + i * (i+1) / 2; Heap H(naive); Node **node_map = new Node*[N]; for (int i = 0; i < N; i++) node_map[i] = nullptr; for (int j = i; j > 0; j--) { star(&H, node_map, j, start, false); start += j; } for (int j = 0; j < (1 << i); j++) { H.insert(1); H.insert(1); H.extract_min(); H.extract_min(); } printf("%i", N); H.stats(); delete[] node_map; } } /* A random test. * * The test does 2N insertions first, and then N random operations insert / decrease / * extract_min, each with probability proprotional to its weight ins / dec / rem. */ void test_random(int ins, int dec, int rem, bool naive) { for (int N : log_range(50, 80000, 30)) { Heap H(naive); std::vector node_map; int s = ins + dec + rem; std::vector ops; for (int i = 0; i < (N*ins) / s; i++) ops.push_back(0); for (int i = 0; i < (N*dec) / s; i++) ops.push_back(1); for (int i = 0; i < (N*rem) / s; i++) ops.push_back(2); random_permutation(ops); # define INSERT() node_map.push_back(H.insert(rng->next_range(5*N), node_map.size())) for (int i = 0; i < 2*N; i++) INSERT(); for (int op : ops) switch (op) { case 0: INSERT(); break; case 1: { Node *node = node_map[rng->next_range(node_map.size())]; int p = node->priority() - rng->next_range(N / 5) - 1; H.decrease(node, p); break; } default: { auto ret = H.extract_min(); if (ret.first + 1 != node_map.size()) { node_map[ret.first] = node_map.back(); node_map[ret.first]->payload = ret.first; } node_map.pop_back(); } } printf("%i", N); H.stats(); # undef INSERT } } std::vector>> tests = { { "star", test_star }, { "random", [](bool n){ test_random(10, 10, 10, n); } }, { "biased-10", [](bool n){ test_random(10, 10, 20, n); } }, { "biased-40", [](bool n){ test_random(10, 40, 20, n); } }, { "biased-70", [](bool n){ test_random(10, 70, 20, n); } }, { "biased-100", [](bool n){ test_random(10, 100, 20, n); } }, }; int main(int argc, char **argv) { if (argc != 4) { fprintf(stderr, "Usage: %s (std|naive)\n", argv[0]); return 1; } std::string which_test = argv[1]; std::string id_str = argv[2]; std::string mode = argv[3]; try { rng = new RandomGen(stoi(id_str)); } catch (...) { fprintf(stderr, "Invalid student ID\n"); return 1; } bool naive; if (mode == "std") naive = false; else if (mode == "naive") naive = true; else { fprintf(stderr, "Last argument must be either 'std' or 'naive'\n"); return 1; } for (const auto& test : tests) { if (test.first == which_test) { test.second(naive); return 0; } } fprintf(stderr, "Unknown test %s\n", which_test.c_str()); return 1; }
 #ifndef DS1_RANDOM_H #define DS1_RANDOM_H #include /* * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman * and Sebastiano Vigna, distributed under the CC-0 license. For more details, * see http://vigna.di.unimi.it/xorshift/. * * Rewritten to C++ by Martin Mares, also placed under CC-0. */ class RandomGen { uint64_t state[2]; uint64_t rotl(uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } public: // Initialize the generator, set its seed and warm it up. RandomGen(unsigned int seed) { state[0] = seed * 0xdeadbeef; state[1] = seed ^ 0xc0de1234; for (int i=0; i<100; i++) next_u64(); } // Generate a random 64-bit number. uint64_t next_u64(void) { uint64_t s0 = state[0], s1 = state[1]; uint64_t result = s0 + s1; s1 ^= s0; state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); state[1] = rotl(s1, 36); return result; } // Generate a random 32-bit number. uint32_t next_u32(void) { return next_u64() >> 11; } // Generate a number between 0 and range-1. unsigned int next_range(unsigned int range) { /* * This is not perfectly uniform, unless the range is a power of two. * However, for 64-bit random values and 32-bit ranges, the bias is * insignificant. */ return next_u64() % range; } }; #endif
 .PHONY: plot test: @for exp in star random ; do \ for impl in std naive ; do \ echo "t-\$\$exp-\$\$impl" ; \ time ./fibonacci_experiment.py \$\$exp 42 \$\$impl >t-\$\$exp-\$\$impl ; \ done ; \ done @for b in biased-10 biased-40 biased-70 biased-100 ; do \ echo "t-\$\$b" ; \ time ./fibonacci_experiment.py \$\$b 42 std >t-\$\$b ; \ done
 #!/usr/bin/env python3 import sys, itertools, random from math import sqrt, log, exp from fibonacci import FibonacciHeap as Heap class BenchmarkingHeap(Heap): """A modified Fibonacci heap for benchmarking. We inherit the implementation of operations from the FibonacciHeap class and extend it by keeping statistics on the number of operations and the total number of structural changes. Also, if naive is turned on, the decrease does not mark parent which just lost a child. """ def __init__(self, naive=False): Heap.__init__(self) self.naive = naive self._ops = 0 self._steps = 0 self._max_size = 0 # used to get upper bound on number of buckets in naive version def stats(self): return " %.3f" % (self._steps / max(self._ops, 1)) def insert(self, *args): self._ops += 1 return Heap.insert(self, *args) def decrease(self, node, new_prio): self._ops += 1 if self.naive: assert node is not None, "Cannot decrease None." assert node is not self._meta_root, "Cannot decrease meta root." assert new_prio <= node._prio, \ "Decrease: new priority is bigger than old one." node._prio = new_prio if self._is_root(node) or node._prio >= node._parent._prio: return self._add_child(self._meta_root, self._remove(node)) else: Heap.decrease(self, node, new_prio) def extract_min(self): self._ops += 1 return Heap.extract_min(self) def _add_child(self, *args): self._steps += 1 Heap._add_child(self, *args) def _remove(self, *args): self._steps += 1 return Heap._remove(self, *args) def _max_rank(self): self._max_size = max(self._max_size, self._size) if self.naive: return int(sqrt(2 * self._max_size)) + 2 return Heap._max_rank(self) def remove(H, node): H.decrease(node, -10**9) H.extract_min() def consolidate(H): remove(H, H.insert(0)) def log_range(b, e, n): assert 0 < b < e for i in range(n): yield int(b * exp(log(e / b) / (n - 1) * i)) def star(H, node_map, n, r, consolidate_): """Construct a star with n nodes and root index r. """ if n == 1: assert node_map[r] is None node_map[r] = H.insert(r) if consolidate_: consolidate(H) else: star(H, node_map, n - 1, r, False) star(H, node_map, n - 1, r + n - 1, True) for i in range(r + n, r + 2*n - 2): remove(H, node_map[i]) node_map[i] = None def test_star(naive): """Generates a sequence on which non-cascading heaps need lots of time. Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT -> modified so that only a quadratic number of elements are needed in the star construction. """ for i in range(1, 17): # was 26 start = 3 N = start + i * (i+1) // 2 H = BenchmarkingHeap(naive) node_map = [None] * N for j in range(i, 0, -1): star(H, node_map, j, start, False) start += j for i in range(1 << i): H.insert(1) H.insert(1) H.extract_min() H.extract_min() print(N, H.stats()) def test_random(ins, dec, rem, naive): """A random test. The test does 2N insertions first, and then N random operations insert / decrease / extract_min each with probability proprotional to its weight ins / dec / rem. """ for N in log_range(50, 80000, 30): H = BenchmarkingHeap(naive) node_map = [] s = ins + dec + rem ops = [ 0 ] * ((N*ins) // s) + [ 1 ] * ((N*dec) // s) + [ 2 ] * ((N*rem) // s) random.shuffle(ops) def insert(): node_map.append(H.insert(random.randrange(5 * N), len(node_map))) for _ in range(2*N): insert() for op in ops: if op == 0: insert() elif op == 1: node = random.choice(node_map) p = node.priority() - random.randrange(N // 5) - 1 H.decrease(node, p) else: i, _ = H.extract_min() if i + 1 == len(node_map): node_map.pop() else: node_map[i] = node_map.pop() node_map[i].payload = i print(N, H.stats()) tests = { "star": test_star, "random": lambda n: test_random(10, 10, 10, n), "biased-10": lambda n: test_random(10, 10, 20, n), "biased-40": lambda n: test_random(10, 40, 20, n), "biased-70": lambda n: test_random(10, 70, 20, n), "biased-100": lambda n: test_random(10, 100, 20, n), } if len(sys.argv) == 4: test, student_id = sys.argv[1], sys.argv[2] if sys.argv[3] == "std": naive = False elif sys.argv[3] == "naive": naive = True else: raise ValueError("Last argument must be either 'std' or 'naive'") random.seed(int(student_id)) if test in tests: tests[test](naive) else: raise ValueError("Unknown test {}".format(test)) else: raise ValueError("Usage: {} (std|naive)".format(sys.argv[0]))
 ## Goal The goal of this assignment is to evaluate your implementation of Fibonacci heap experimentally and to compare it with a "naive" implementation which does not mark vertices after cutting a child. You are given a test program (`fibonacci_experiment`) which calls your implementation from the previous assignment to perform the following experiments: - _Star test:_ Specific sequence of operations on which naive implementation creates \$Θ(\sqrt{n})\$ stars of size 1 up to \$Θ(\sqrt{n})\$. - _Random test:_ Test of size \$n\$ first inserts \$2n\$ elements and then it does \$n\$ operations of which one third are insertions, one third are decreases and one third are extract\_mins in random order. - _Biased tests:_ Like random test but the weights of operations are different. First the test inserts \$2n\$ elements and then it does random \$n\$ operations. Number of operations of each type is proportional to their weight. Weight of insert is 10, weight of extract\_min 20 and weight of decrease is the number in the name of the test, and implemented possibilities are 10, 40, 70 and 100. The program tries each experiment with different values of \$n\$. In each try, it prints the average number of structural changes (adding and removing a child of a node) per one operation (insert, decrease and extract\_min). You should perform these experiments and write a report, which contains one plot of the measured data for each test. The plots should be following: - _Star test:_ One plot with two curves – one for standard and the other one for naive version. - _Random test:_ One plot with two curves – one for standard and the other one for naive version. - _Biased tests:_ One plot with 4 curves – one for each value of bias, all of them tested with standard implementation. Note that we do not do biased test naive implementation. Each plot should show the dependence of the average number of structural changes on the test size \$n\$. The report should discuss the experimental results and try to explain the observed behavior using theory from the lectures. (If you want, you can carry out further experiments to gain better understanding of the data structure and include these in the report. This is strictly optional.) You should submit a PDF file with the report (and no source code). You will get 1 temporary point upon submission if the file is syntactically correct; proper points will be assigned later. ## Test program The test program is given three arguments: - The name of the test (`star`, `random`, `biased-10`, `biased-40`, `biased-70`, `biased-100`). - The random seed: you should use the last 2 digits of your student ID (you can find it in the Study Information System – just click on the Personal data icon). Please include the random seed in your report. - The implementation to test (`std` or `naive`). The output of the program contains one line per experiment, which consists of the set size and the average number of structural changes. ## Your implementation Please use your implementation from the previous exercise. If you used C++, make sure that the `add_child()`, `remove()` and `max_rank()` methods are declared virtual (they will be overriden by the test program). Also, if you are performing any structural changes without using the `add_child()` and `remove()` helper methods, you need to adjust the `BenchmarkingHeap` class accordingly. ## Hints The following tools can be useful for producing nice plots: - [pandas](https://pandas.pydata.org/) - [matplotlib](https://matplotlib.org/) - [gnuplot](http://www.gnuplot.info/) A quick checklist for plots: - Is there a caption explaining what is plotted? - Are the axes clearly labelled? Do they have value ranges and units? - Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs are more fitting in some cases, but you should tell.) - Is it clear which curve means what? - Is it clear what are the measured points and what is an interpolated curve between them? - Are there any overlaps? (E.g., the most interesting part of the curve hidden underneath a label?) In your discussion, please distinguish the following kinds of claims. It should be always clear which is which: - Experimental results (i.e., the raw data you obtained from the experiments) - Theoretical facts (i.e., claims we have proved mathematically) - Your hypotheses (e.g., when you claim that the graph looks like something is true, but you are not able to prove rigorously that it always holds)