From c87aafd8744e58fdad2ed402700d80be8ffd0f50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ondra=20Mi=C4=8Dka=20=40=20miles-teg?=
 <mitch.ondra@gmail.com>
Date: Mon, 30 Mar 2020 14:42:54 +0200
Subject: [PATCH] Fibonacci experiment

---
 06-fibonacci_experiment/cpp/Makefile          |  22 ++
 .../cpp/fibonacci_experiment.cpp              | 256 ++++++++++++++++++
 06-fibonacci_experiment/cpp/random.h          |  61 +++++
 06-fibonacci_experiment/python/Makefile       |  13 +
 .../python/fibonacci_experiment.py            | 172 ++++++++++++
 06-fibonacci_experiment/task.md               |  93 +++++++
 6 files changed, 617 insertions(+)
 create mode 100644 06-fibonacci_experiment/cpp/Makefile
 create mode 100644 06-fibonacci_experiment/cpp/fibonacci_experiment.cpp
 create mode 100644 06-fibonacci_experiment/cpp/random.h
 create mode 100644 06-fibonacci_experiment/python/Makefile
 create mode 100644 06-fibonacci_experiment/python/fibonacci_experiment.py
 create mode 100644 06-fibonacci_experiment/task.md

diff --git a/06-fibonacci_experiment/cpp/Makefile b/06-fibonacci_experiment/cpp/Makefile
new file mode 100644
index 0000000..589ab7c
--- /dev/null
+++ b/06-fibonacci_experiment/cpp/Makefile
@@ -0,0 +1,22 @@
+.PHONY: test
+test: fibonacci_experiment
+	@for exp in star random; do \
+		for impl in std naive ; do \
+			echo "t-$$exp-$$impl" ; \
+			time ./fibonacci_experiment $$exp 42 $$impl >t-$$exp-$$impl ; \
+		done ; \
+	done
+	@for b in biased-10 biased-40 biased-70 biased-100 ; do \
+		echo "t-$$b" ; \
+		time ./fibonacci_experiment $$b 42 std >t-$$b ; \
+	done
+
+INCLUDE ?= .
+CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
+
+fibonacci_experiment: fibonacci.h fibonacci_experiment.cpp $(INCLUDE)/random.h
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) fibonacci_experiment.cpp -o $@
+
+.PHONY: clean
+clean:
+	rm -f fibonacci_experiment
diff --git a/06-fibonacci_experiment/cpp/fibonacci_experiment.cpp b/06-fibonacci_experiment/cpp/fibonacci_experiment.cpp
new file mode 100644
index 0000000..3e8d9b5
--- /dev/null
+++ b/06-fibonacci_experiment/cpp/fibonacci_experiment.cpp
@@ -0,0 +1,256 @@
+#include <algorithm>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+#include <iostream>
+#include <cmath>
+
+#include "fibonacci.h"
+#include "random.h"
+
+void expect_failed(const std::string& message) {
+    fprintf(stderr, "Test error: %s\n", message.c_str());
+    exit(1);
+}
+
+
+/*
+ *  A modified Fibonacci heap for benchmarking.
+ *
+ *  We inherit the implementation of operations from the FibonacciHeap class
+ *  and extend it by keeping statistics on the number of operations
+ *  and the total number of structural changes. Also, if naive is turned on,
+ *  the decrease does not mark parent which just lost a child.
+ */
+
+class HeapTests : public FibonacciHeap {
+    int ops = 0;
+    int steps = 0;
+    bool naive;
+    size_t max_size = 0;
+
+  public:
+    HeapTests(bool naive = false) : naive(naive) {}
+
+    void stats() {
+        printf(" %.3lf\n", (double)(steps * 1.0 / std::max(1, ops)));
+    }
+
+    Node *insert(priority_t prio, payload_t payload = payload_t()) {
+        ops++;
+        return FibonacciHeap::insert(prio, payload);
+    }
+
+    std::pair<payload_t, priority_t> extract_min() {
+        ops++;
+        return FibonacciHeap::extract_min();
+    }
+
+    void decrease(Node *node, priority_t new_prio) {
+        ops++;
+        if (naive) {
+            EXPECT(node, "Cannot decrease null pointer.");
+            EXPECT(node->prio >= new_prio, "Decrase: new priority larger than old one.");
+
+            node->prio = new_prio;
+            if (is_root(node) || new_prio >= node->parent->prio) return;
+
+            add_child(&meta_root, remove(node));
+        } else FibonacciHeap::decrease(node, new_prio);
+    }
+
+  protected:
+    size_t max_rank() override {
+        max_size = std::max(max_size, size);
+        if (naive) return sqrt(2*max_size) + 2;
+        else return FibonacciHeap::max_rank();
+    }
+
+    void add_child(Node* parent, Node *node) override {
+        steps++;
+        FibonacciHeap::add_child(parent, node);
+    }
+
+    Node *remove(Node *n) override {
+        steps++;
+        return FibonacciHeap::remove(n);
+    }
+};
+
+using Heap = HeapTests;
+
+RandomGen *rng;         // Random generator object
+
+
+std::vector<int> log_range(int b, int e, int n) {
+    std::vector<int> ret;
+    for (int i = 0; i < n; i++)
+        ret.push_back(b * exp(log(e / b) / (n - 1) * i));
+    return ret;
+}
+
+// An auxiliary function for generating a random permutation.
+template < typename Vector >
+void random_permutation(Vector& v)
+{
+    for (int i = 0; i < v.size(); i++)
+        std::swap(v[i], v[i + rng->next_range(v.size() - i)]);
+}
+
+// Remove given node from heap.
+void remove(Heap* H, Node *n) {
+    H->decrease(n, -1000*1000*1000);
+    H->extract_min();
+}
+
+// Force consolidation of the heap.
+void consolidate(Heap* H) {
+    remove(H, H->insert(0));
+}
+
+// Construct a star with n nodes and root index r.
+void star(Heap *H, Node **node_map, int n, int r, bool consolidate_) {
+    if (n == 1) {
+        EXPECT(!node_map[r], "");
+        node_map[r] = H->insert(r);
+        if (consolidate_) consolidate(H);
+    } else {
+        star(H, node_map, n - 1, r, false);
+        star(H, node_map, n - 1, r + n - 1, true);
+
+        for (int i = r + n; i < r + 2*n - 2; i++) {
+            remove(H, node_map[i]);
+            node_map[i] = nullptr;
+        }
+    }
+}
+
+/* Generates a sequence on which non-cascading heaps need lots of time.
+ * Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT
+ * -> modified so that only a quadratic number of elements are needed in the star construction.
+ */
+void test_star(bool naive) {
+    for (int i = 1; i < 17; i++) {
+        int start = 3;
+        int N = start + i * (i+1) / 2;
+        Heap H(naive);
+        Node **node_map = new Node*[N];
+        for (int i = 0; i < N; i++) node_map[i] = nullptr;
+
+        for (int j = i; j > 0; j--) {
+            star(&H, node_map, j, start, false);
+            start += j;
+        }
+
+        for (int j = 0; j < (1 << i); j++) {
+            H.insert(1);
+            H.insert(1);
+            H.extract_min();
+            H.extract_min();
+        }
+
+        printf("%i", N);
+        H.stats();
+
+        delete[] node_map;
+    }
+}
+
+/* A random test.
+ *
+ * The test does 2N insertions first, and then N random operations insert / decrease /
+ * extract_min, each with probability proprotional to its weight ins / dec / rem.
+ */
+void test_random(int ins, int dec, int rem, bool naive) {
+    for (int N : log_range(50, 80000, 30)) {
+        Heap H(naive);
+        std::vector<Node*> node_map;
+        int s = ins + dec + rem;
+
+        std::vector<int> ops;
+        for (int i = 0; i < (N*ins) / s; i++) ops.push_back(0);
+        for (int i = 0; i < (N*dec) / s; i++) ops.push_back(1);
+        for (int i = 0; i < (N*rem) / s; i++) ops.push_back(2);
+        random_permutation(ops);
+
+#       define INSERT() node_map.push_back(H.insert(rng->next_range(5*N), node_map.size()))
+
+        for (int i = 0; i < 2*N; i++) INSERT();
+
+        for (int op : ops) switch (op) {
+          case 0:
+            INSERT();
+            break;
+          case 1: {
+            Node *node = node_map[rng->next_range(node_map.size())];
+            int p = node->priority() - rng->next_range(N / 5) - 1;
+            H.decrease(node, p);
+            break;
+          }
+          default: {
+            auto ret = H.extract_min();
+            if (ret.first + 1 != node_map.size()) {
+                node_map[ret.first] = node_map.back();
+                node_map[ret.first]->payload = ret.first;
+            }
+            node_map.pop_back();
+          }
+        }
+
+        printf("%i", N);
+        H.stats();
+
+#       undef INSERT
+    }
+}
+
+
+std::vector<std::pair<std::string, std::function<void(bool)>>> tests = {
+    { "star",       test_star },
+    { "random",     [](bool n){ test_random(10, 10, 10, n); } },
+    { "biased-10",  [](bool n){ test_random(10, 10, 20, n); } },
+    { "biased-40",  [](bool n){ test_random(10, 40, 20, n); } },
+    { "biased-70",  [](bool n){ test_random(10, 70, 20, n); } },
+    { "biased-100", [](bool n){ test_random(10, 100, 20, n); } },
+};
+
+int main(int argc, char **argv)
+{
+    if (argc != 4) {
+        fprintf(stderr, "Usage: %s <test> <student-id> (std|naive)\n", argv[0]);
+        return 1;
+    }
+
+    std::string which_test = argv[1];
+    std::string id_str = argv[2];
+    std::string mode = argv[3];
+
+    try {
+        rng = new RandomGen(stoi(id_str));
+    } catch (...) {
+        fprintf(stderr, "Invalid student ID\n");
+        return 1;
+    }
+
+    bool naive;
+    if (mode == "std")
+      naive = false;
+    else if (mode == "naive")
+      naive = true;
+    else
+      {
+        fprintf(stderr, "Last argument must be either 'std' or 'naive'\n");
+        return 1;
+      }
+
+    for (const auto& test : tests) {
+        if (test.first == which_test) {
+            test.second(naive);
+            return 0;
+        }
+    }
+
+    fprintf(stderr, "Unknown test %s\n", which_test.c_str());
+    return 1;
+}
diff --git a/06-fibonacci_experiment/cpp/random.h b/06-fibonacci_experiment/cpp/random.h
new file mode 100644
index 0000000..5ef10ae
--- /dev/null
+++ b/06-fibonacci_experiment/cpp/random.h
@@ -0,0 +1,61 @@
+#ifndef DS1_RANDOM_H
+#define DS1_RANDOM_H
+
+#include <cstdint>
+
+/*
+ * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
+ * and Sebastiano Vigna, distributed under the CC-0 license. For more details,
+ * see http://vigna.di.unimi.it/xorshift/.
+ *
+ * Rewritten to C++ by Martin Mares, also placed under CC-0.
+ */
+
+class RandomGen {
+    uint64_t state[2];
+
+    uint64_t rotl(uint64_t x, int k)
+    {
+        return (x << k) | (x >> (64 - k));
+    }
+
+  public:
+    // Initialize the generator, set its seed and warm it up.
+    RandomGen(unsigned int seed)
+    {
+        state[0] = seed * 0xdeadbeef;
+        state[1] = seed ^ 0xc0de1234;
+        for (int i=0; i<100; i++)
+            next_u64();
+    }
+
+    // Generate a random 64-bit number.
+    uint64_t next_u64(void)
+    {
+        uint64_t s0 = state[0], s1 = state[1];
+        uint64_t result = s0 + s1;
+        s1 ^= s0;
+        state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
+        state[1] = rotl(s1, 36);
+        return result;
+    }
+
+    // Generate a random 32-bit number.
+    uint32_t next_u32(void)
+    {
+      return next_u64() >> 11;
+    }
+
+    // Generate a number between 0 and range-1.
+    unsigned int next_range(unsigned int range)
+    {
+        /*
+         * This is not perfectly uniform, unless the range is a power of two.
+         * However, for 64-bit random values and 32-bit ranges, the bias is
+         * insignificant.
+         */
+        return next_u64() % range;
+    }
+};
+
+#endif
diff --git a/06-fibonacci_experiment/python/Makefile b/06-fibonacci_experiment/python/Makefile
new file mode 100644
index 0000000..656f937
--- /dev/null
+++ b/06-fibonacci_experiment/python/Makefile
@@ -0,0 +1,13 @@
+.PHONY: plot
+test:
+	@for exp in star random ; do \
+		for impl in std naive ; do \
+			echo "t-$$exp-$$impl" ; \
+			time ./fibonacci_experiment.py $$exp 42 $$impl >t-$$exp-$$impl ; \
+		done ; \
+	done
+	@for b in biased-10 biased-40 biased-70 biased-100 ; do \
+		echo "t-$$b" ; \
+		time ./fibonacci_experiment.py $$b 42 std >t-$$b ; \
+	done
+
diff --git a/06-fibonacci_experiment/python/fibonacci_experiment.py b/06-fibonacci_experiment/python/fibonacci_experiment.py
new file mode 100644
index 0000000..eb7a091
--- /dev/null
+++ b/06-fibonacci_experiment/python/fibonacci_experiment.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+import sys, itertools, random
+from math import sqrt, log, exp
+from fibonacci import FibonacciHeap as Heap
+
+class BenchmarkingHeap(Heap):
+    """A modified Fibonacci heap for benchmarking.
+
+    We inherit the implementation of operations from the FibonacciHeap class
+    and extend it by keeping statistics on the number of operations
+    and the total number of structural changes. Also, if naive is turned on,
+    the decrease does not mark parent which just lost a child.
+    """
+
+    def __init__(self, naive=False):
+        Heap.__init__(self)
+        self.naive = naive
+        self._ops = 0
+        self._steps = 0
+        self._max_size = 0 # used to get upper bound on number of buckets in naive version
+
+    def stats(self):
+        return " %.3f" % (self._steps / max(self._ops, 1))
+
+    def insert(self, *args):
+        self._ops += 1
+        return Heap.insert(self, *args)
+
+    def decrease(self, node, new_prio):
+        self._ops += 1
+        if self.naive:
+            assert node is not None, "Cannot decrease None."
+            assert node is not self._meta_root, "Cannot decrease meta root."
+            assert new_prio <= node._prio, \
+                "Decrease: new priority is bigger than old one."
+
+            node._prio = new_prio
+            if self._is_root(node) or node._prio >= node._parent._prio:
+                return
+
+            self._add_child(self._meta_root, self._remove(node))
+        else:
+            Heap.decrease(self, node, new_prio)
+
+    def extract_min(self):
+        self._ops += 1
+        return Heap.extract_min(self)
+
+    def _add_child(self, *args):
+        self._steps += 1
+        Heap._add_child(self, *args)
+
+    def _remove(self, *args):
+        self._steps += 1
+        return Heap._remove(self, *args)
+
+    def _max_rank(self):
+        self._max_size = max(self._max_size, self._size)
+        if self.naive: return int(sqrt(2 * self._max_size)) + 2
+        return Heap._max_rank(self)
+
+def remove(H, node):
+    H.decrease(node, -10**9)
+    H.extract_min()
+
+def consolidate(H):
+    remove(H, H.insert(0))
+
+def log_range(b, e, n):
+    assert 0 < b < e
+    for i in range(n):
+        yield int(b * exp(log(e / b) / (n - 1) * i))
+
+def star(H, node_map, n, r, consolidate_):
+    """Construct a star with n nodes and root index r.
+    """
+    if n == 1:
+        assert node_map[r] is None
+        node_map[r] = H.insert(r)
+        if consolidate_:
+            consolidate(H)
+    else:
+        star(H, node_map, n - 1, r, False)
+        star(H, node_map, n - 1, r + n - 1, True)
+
+        for i in range(r + n, r + 2*n - 2):
+            remove(H, node_map[i])
+            node_map[i] = None
+
+def test_star(naive):
+    """Generates a sequence on which non-cascading heaps need lots of time.
+    Source: "Replacing Mark Bits with Randomness in Fibonacci Heaps" Jerry Li and John Peebles, MIT
+    -> modified so that only a quadratic number of elements are needed in the star construction.
+    """
+    for i in range(1, 17): # was 26
+        start = 3
+        N = start + i * (i+1) // 2
+        H = BenchmarkingHeap(naive)
+        node_map = [None] * N
+
+        for j in range(i, 0, -1):
+            star(H, node_map, j, start, False)
+            start += j
+
+        for i in range(1 << i):
+            H.insert(1)
+            H.insert(1)
+            H.extract_min()
+            H.extract_min()
+
+        print(N, H.stats())
+
+def test_random(ins, dec, rem, naive):
+    """A random test.
+
+    The test does 2N insertions first, and then N random operations insert / decrease /
+    extract_min each with probability proprotional to its weight ins / dec / rem.
+    """
+    for N in log_range(50, 80000, 30):
+        H = BenchmarkingHeap(naive)
+        node_map = []
+        s = ins + dec + rem
+        ops = [ 0 ] * ((N*ins) // s) + [ 1 ] * ((N*dec) // s) + [ 2 ] * ((N*rem) // s)
+        random.shuffle(ops)
+
+        def insert():
+            node_map.append(H.insert(random.randrange(5 * N), len(node_map)))
+
+        for _ in range(2*N): insert()
+
+        for op in ops:
+            if op == 0: insert()
+            elif op == 1:
+                node = random.choice(node_map)
+                p = node.priority() - random.randrange(N // 5) - 1
+                H.decrease(node, p)
+            else:
+                i, _ = H.extract_min()
+                if i + 1 == len(node_map):
+                    node_map.pop()
+                else:
+                    node_map[i] = node_map.pop()
+                    node_map[i].payload = i
+
+        print(N, H.stats())
+
+tests = {
+    "star": test_star,
+    "random": lambda n: test_random(10, 10, 10, n),
+    "biased-10": lambda n: test_random(10, 10, 20, n),
+    "biased-40": lambda n: test_random(10, 40, 20, n),
+    "biased-70": lambda n: test_random(10, 70, 20, n),
+    "biased-100": lambda n: test_random(10, 100, 20, n),
+}
+
+if len(sys.argv) == 4:
+    test, student_id = sys.argv[1], sys.argv[2]
+    if sys.argv[3] == "std":
+        naive = False
+    elif sys.argv[3] == "naive":
+        naive = True
+    else:
+        raise ValueError("Last argument must be either 'std' or 'naive'")
+    random.seed(int(student_id))
+    if test in tests:
+        tests[test](naive)
+    else:
+        raise ValueError("Unknown test {}".format(test))
+else:
+    raise ValueError("Usage: {} <test> <student-id> (std|naive)".format(sys.argv[0]))
+
diff --git a/06-fibonacci_experiment/task.md b/06-fibonacci_experiment/task.md
new file mode 100644
index 0000000..4e834ba
--- /dev/null
+++ b/06-fibonacci_experiment/task.md
@@ -0,0 +1,93 @@
+## Goal
+
+The goal of this assignment is to evaluate your implementation of Fibonacci heap
+experimentally and to compare it with a "naive" implementation which does not
+mark vertices after cutting a child.
+
+You are given a test program (`fibonacci_experiment`) which calls your
+implementation from the previous assignment to perform the following
+experiments:
+
+- _Star test:_ Specific sequence of operations on which naive implementation
+  creates $Θ(\sqrt{n})$ stars of size 1 up to $Θ(\sqrt{n})$.
+- _Random test:_ Test of size $n$ first inserts $2n$ elements and then it does
+  $n$ operations of which one third are insertions, one third are decreases
+  and one third are extract\_mins in random order.
+- _Biased tests:_ Like random test but the weights of operations are different.
+  First the test inserts $2n$ elements and then it does random $n$ operations.
+  Number of operations of each type is proportional to their weight.
+  Weight of insert is 10, weight of extract\_min 20 and weight of decrease is
+  the number in the name of the test, and implemented possibilities are 10, 40,
+  70 and 100.
+
+The program tries each experiment with different values of $n$. In each try,
+it prints the average number of structural changes (adding and removing a child
+of a node) per one operation (insert, decrease and extract\_min).
+
+You should perform these experiments and write a report, which contains one
+plot of the measured data for each test. The plots should be following:
+
+- _Star test:_ One plot with two curves – one for standard and the other one for naive
+  version.
+- _Random test:_ One plot with two curves – one for standard and the other one for naive
+  version.
+- _Biased tests:_ One plot with 4 curves – one for each value of bias, all of them tested
+  with standard implementation. Note that we do not do biased test naive implementation.
+
+Each plot should show the dependence
+of the average number of structural changes on the test size $n$.
+
+The report should discuss the experimental results and try to explain the observed
+behavior using theory from the lectures. (If you want, you can carry out further
+experiments to gain better understanding of the data structure and include these
+in the report. This is strictly optional.)
+
+You should submit a PDF file with the report (and no source code).
+You will get 1 temporary point upon submission if the file is syntactically correct;
+proper points will be assigned later.
+
+## Test program
+
+The test program is given three arguments:
+- The name of the test (`star`, `random`, `biased-10`, `biased-40`, `biased-70`, `biased-100`).
+- The random seed: you should use the last 2 digits of your student ID (you can find
+  it in the Study Information System – just click on the Personal data icon). Please
+  include the random seed in your report.
+- The implementation to test (`std` or `naive`).
+
+The output of the program contains one line per experiment, which consists of
+the set size and the average number of structural changes.
+
+## Your implementation
+
+Please use your implementation from the previous exercise. If you used C++,
+make sure that the `add_child()`,
+`remove()` and `max_rank()` methods are declared virtual (they will
+be overriden by the test program). Also, if you are performing any structural changes
+without using the `add_child()` and `remove()` helper methods, you need to adjust
+the `BenchmarkingHeap` class accordingly.
+
+## Hints
+
+The following tools can be useful for producing nice plots:
+- [pandas](https://pandas.pydata.org/)
+- [matplotlib](https://matplotlib.org/)
+- [gnuplot](http://www.gnuplot.info/)
+
+A quick checklist for plots:
+- Is there a caption explaining what is plotted?
+- Are the axes clearly labelled? Do they have value ranges and units?
+- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs
+  are more fitting in some cases, but you should tell.)
+- Is it clear which curve means what?
+- Is it clear what are the measured points and what is an interpolated
+  curve between them?
+- Are there any overlaps? (E.g., the most interesting part of the curve
+  hidden underneath a label?)
+
+In your discussion, please distinguish the following kinds of claims.
+It should be always clear which is which:
+- Experimental results (i.e., the raw data you obtained from the experiments)
+- Theoretical facts (i.e., claims we have proved mathematically)
+- Your hypotheses (e.g., when you claim that the graph looks like something is true,
+  but you are not able to prove rigorously that it always holds)
-- 
GitLab