Splay experiment

81ddad7f · Ondřej Mička · 99962c86 · 81ddad7f · 81ddad7f · 81ddad7f
Commit 81ddad7f authored Mar 2, 2020 by Ondřej Mička
--- a/03-splay_experiment/cpp/Makefile
+++ b/03-splay_experiment/cpp/Makefile
+STUDENT_ID ?= PLEASE_SET_STUDENT_ID
+
+.PHONY: test
+test: splay_experiment
+	@rm -rf out && mkdir out
+	@for test in sequential random subset ; do \
+		for mode in std naive ; do \
+			echo t-$$test-$$mode ; \
+			./splay_experiment $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \
+		done ; \
+	done
+
+INCLUDE ?= .
+CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
+
+splay_experiment: splay_operation.h splay_experiment.cpp $(INCLUDE)/random.h
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ -o $@
+
+.PHONY: clean
+clean:
+	rm -f splay_experiment
+	rm -rf out
--- a/03-splay_experiment/cpp/random.h
+++ b/03-splay_experiment/cpp/random.h
+#ifndef DS1_RANDOM_H
+#define DS1_RANDOM_H
+
+#include <cstdint>
+
+/*
+ * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
+ * and Sebastiano Vigna, distributed under the CC-0 license. For more details,
+ * see http://vigna.di.unimi.it/xorshift/.
+ *
+ * Rewritten to C++ by Martin Mares, also placed under CC-0.
+ */
+
+class RandomGen {
+    uint64_t state[2];
+
+    uint64_t rotl(uint64_t x, int k)
+    {
+        return (x << k) | (x >> (64 - k));
+    }
+
+  public:
+    // Initialize the generator, set its seed and warm it up.
+    RandomGen(unsigned int seed)
+    {
+        state[0] = seed * 0xdeadbeef;
+        state[1] = seed ^ 0xc0de1234;
+        for (int i=0; i<100; i++)
+            next_u64();
+    }
+
+    // Generate a random 64-bit number.
+    uint64_t next_u64(void)
+    {
+        uint64_t s0 = state[0], s1 = state[1];
+        uint64_t result = s0 + s1;
+        s1 ^= s0;
+        state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
+        state[1] = rotl(s1, 36);
+        return result;
+    }
+
+    // Generate a random 32-bit number.
+    uint32_t next_u32(void)
+    {
+      return next_u64() >> 11;
+    }
+
+    // Generate a number between 0 and range-1.
+    unsigned int next_range(unsigned int range)
+    {
+        /*
+         * This is not perfectly uniform, unless the range is a power of two.
+         * However, for 64-bit random values and 32-bit ranges, the bias is
+         * insignificant.
+         */
+        return next_u64() % range;
+    }
+};
+
+#endif
--- a/03-splay_experiment/cpp/splay_experiment.cpp
+++ b/03-splay_experiment/cpp/splay_experiment.cpp
+#include <algorithm>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+#include <iostream>
+#include <cmath>
+
+#include "splay_operation.h"
+#include "random.h"
+
+using namespace std;
+
+/*
+ *  A modified Splay tree for benchmarking.
+ *
+ *  We inherit the implementation of operations from the Tree class
+ *  and extend it by keeping statistics on the number of splay operations
+ *  and the total number of rotations. Also, if naive is turned on,
+ *  splay uses only single rotations.
+ *
+ *  Please make sure that your Tree class defines the rotate() and splay()
+ *  methods as virtual.
+ */
+
+class BenchmarkingTree : public Tree {
+public:
+    int num_operations;
+    int num_rotations;
+    bool do_naive;
+
+    BenchmarkingTree(bool naive=false)
+    {
+        do_naive = naive;
+        reset();
+    }
+
+    void reset()
+    {
+        num_operations = 0;
+        num_rotations = 0;
+    }
+
+    void rotate(Node *node) override
+    {
+        num_rotations++;
+        Tree::rotate(node);
+    }
+
+    void splay(Node *node) override
+    {
+        num_operations++;
+        if (do_naive) {
+            while (node->parent)
+                rotate(node);
+        } else {
+            Tree::splay(node);
+        }
+    }
+
+    // Return the average number of rotations per operation.
+    double rot_per_op()
+    {
+        if (num_operations > 0)
+            return (double) num_rotations / num_operations;
+        else
+            return 0;
+    }
+};
+
+bool naive;             // Use of naive rotations requested
+RandomGen *rng;         // Random generator object
+
+void test_sequential()
+{
+    for (int n=100; n<=3000; n+=100) {
+        BenchmarkingTree tree = BenchmarkingTree(naive);
+
+        for (int x=0; x<n; x++)
+            tree.insert(x);
+
+        for (int i=0; i<5; i++)
+            for (int x=0; x<n; x++)
+                tree.lookup(x);
+
+        cout << n << " " << tree.rot_per_op() << endl;
+    }
+}
+
+// An auxiliary function for generating a random permutation.
+vector<int> random_permutation(int n)
+{
+    vector<int> perm;
+    for (int i=0; i<n; i++)
+        perm.push_back(i);
+    for (int i=0; i<n-1; i++)
+        swap(perm[i], perm[i + rng->next_range(n-i)]);
+    return perm;
+}
+
+void test_random()
+{
+    for (int e=32; e<=64; e++) {
+        int n = (int) pow(2, e/4.);
+        BenchmarkingTree tree = BenchmarkingTree(naive);
+
+        vector<int> perm = random_permutation(n);
+        for (int x : perm)
+            tree.insert(x);
+
+        for (int i=0; i<5*n; i++)
+            tree.lookup(rng->next_range(n));
+
+        cout << n << " " << tree.rot_per_op() << endl;
+    }
+}
+
+/*
+ *  An auxiliary function for constructing arithmetic progressions.
+ *  The vector seq will be modified to contain an arithmetic progression
+ *  of elements in interval [A,B] starting from position s with step inc.
+ */
+void make_progression(vector<int> &seq, int A, int B, int s, int inc)
+{
+    for (int i=0; i<seq.size(); i++)
+        while (seq[i] >= A && seq[i] <= B && s + inc*(seq[i]-A) != i)
+            swap(seq[i], seq[s + inc*(seq[i] - A)]);
+}
+
+void test_subset_s(int sub)
+{
+    for (int e=32; e<=64; e++) {
+        int n = (int) pow(2, e/4.);
+        if (n < sub)
+          continue;
+
+        // We will insert elements in order, which contain several
+        // arithmetic progressions interspersed with random elements.
+        vector<int> seq = random_permutation(n);
+        make_progression(seq, n/4, n/4 + n/20, n/10, 1);
+        make_progression(seq, n/2, n/2 + n/20, n/10, -1);
+        make_progression(seq, 3*n/4, 3*n/4 + n/20, n/2, -4);
+        make_progression(seq, 17*n/20, 17*n/20 + n/20, 2*n/5, 5);
+
+        BenchmarkingTree tree = BenchmarkingTree(naive);
+        for (int x : seq)
+            tree.insert(x);
+        tree.reset();
+
+        for (int i=0; i<10000; i++)
+            tree.lookup(seq[rng->next_range(sub)]);
+
+        cout << sub << " " << n << " " << tree.rot_per_op() << endl;
+    }
+}
+
+void test_subset()
+{
+    test_subset_s(10);
+    test_subset_s(100);
+    test_subset_s(1000);
+}
+
+vector<pair<string, function<void()>>> tests = {
+    { "sequential", test_sequential },
+    { "random",     test_random },
+    { "subset",     test_subset },
+};
+
+int main(int argc, char **argv)
+{
+    if (argc != 4) {
+        cerr << "Usage: " << argv[0] << " <test> <student-id> (std|naive)" << endl;
+        return 1;
+    }
+
+    string which_test = argv[1];
+    string id_str = argv[2];
+    string mode = argv[3];
+
+    try {
+        rng = new RandomGen(stoi(id_str));
+    } catch (...) {
+        cerr << "Invalid student ID" << endl;
+        return 1;
+    }
+
+    if (mode == "std")
+      naive = false;
+    else if (mode == "naive")
+      naive = true;
+    else
+      {
+        cerr << "Last argument must be either 'std' or 'naive'" << endl;
+        return 1;
+      }
+
+    for (const auto& test : tests) {
+        if (test.first == which_test)
+          {
+            cout.precision(12);
+            test.second();
+            return 0;
+          }
+    }
+    cerr << "Unknown test " << which_test << endl;
+    return 1;
+}
--- a/03-splay_experiment/python/Makefile
+++ b/03-splay_experiment/python/Makefile
+STUDENT_ID ?= PLEASE_SET_STUDENT_ID
+
+.PHONY: test
+test: splay_experiment.py
+	@rm -rf out && mkdir out
+	@for test in sequential random subset ; do \
+		for mode in std naive ; do \
+			echo t-$$test-$$mode ; \
+			./splay_experiment.py $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \
+		done ; \
+	done
+
+.PHONY: clean
+clean:
+	rm -rf out
--- a/03-splay_experiment/python/splay_experiment.py
+++ b/03-splay_experiment/python/splay_experiment.py
+#!/usr/bin/env python3
+
+import sys
+import random
+
+from splay_operation import Tree
+
+class BenchmarkingTree(Tree):
+    """ A modified Splay tree for benchmarking.
+
+    We inherit the implementation of operations from the Tree class
+    and extend it by keeping statistics on the number of splay operations
+    and the total number of rotations. Also, if naive is turned on,
+    splay uses only single rotations.
+    """
+
+    def __init__(self, naive=False):
+        Tree.__init__(self)
+        self.do_naive = naive
+        self.reset()
+
+    def reset(self):
+        """Reset statistics."""
+        self.num_rotations = 0;
+        self.num_operations = 0;
+
+    def rotate(self, node):
+        self.num_rotations += 1
+        Tree.rotate(self, node)
+
+    def splay(self, node):
+        self.num_operations += 1
+        if self.do_naive:
+            while node.parent is not None:
+                self.rotate(node)
+        else:
+            Tree.splay(self, node)
+
+    def rot_per_op(self):
+        """Return the average number of rotations per operation."""
+        if self.num_operations > 0:
+            return self.num_rotations / self.num_operations
+        else:
+            return 0
+
+def test_sequential():
+    for n in range(100, 3001, 100):
+        tree = BenchmarkingTree(naive)
+        for elem in range(n):
+            tree.insert(elem)
+
+        for _ in range(5):
+            for elem in range(n):
+                tree.lookup(elem)
+
+        print(n, tree.rot_per_op())
+
+def test_random():
+    for exp in range(32, 64):
+        n = int(2**(exp/4))
+        tree = BenchmarkingTree(naive)
+
+        for elem in random.sample(range(n), n):
+            tree.insert(elem)
+
+        for _ in range(5*n):
+            tree.lookup(random.randrange(n))
+
+        print(n, tree.rot_per_op())
+
+def make_progression(seq, A, B, s, inc):
+    """An auxiliary function for constructing arithmetic progressions.
+
+    The array seq will be modified to contain an arithmetic progression
+    of elements in interval [A,B] starting from position s with step inc.
+    """
+    for i in range(len(seq)):
+        while seq[i] >= A and seq[i] <= B and s + inc*(seq[i]-A) != i:
+            pos = s + inc*(seq[i]-A)
+            seq[i], seq[pos] = seq[pos], seq[i]
+
+def test_subset():
+    for sub in [10, 100, 1000]:
+        for exp in range(32,64):
+            n = int(2**(exp/4))
+            if n < sub:
+                continue
+
+            # We will insert elements in order, which contain several
+            # arithmetic progressions interspersed with random elements.
+            seq = random.sample(range(n), n)
+            make_progression(seq, n//4, n//4 + n//20, n//10, 1)
+            make_progression(seq, n//2, n//2 + n//20, n//10, -1)
+            make_progression(seq, 3*n//4, 3*n//4 + n//20, n//2, -4)
+            make_progression(seq, 17*n//20, 17*n//20 + n//20, 2*n//5, 5)
+
+            tree = BenchmarkingTree(naive)
+            for elem in seq:
+                tree.insert(elem)
+            tree.reset()
+
+            for _ in range(10000):
+                tree.lookup(seq[random.randrange(sub)])
+
+            print(sub, n, tree.rot_per_op())
+
+tests = {
+    "sequential": test_sequential,
+    "random": test_random,
+    "subset": test_subset,
+}
+
+if len(sys.argv) == 4:
+    test, student_id = sys.argv[1], sys.argv[2]
+    if sys.argv[3] == "std":
+        naive = False
+    elif sys.argv[3] == "naive":
+        naive = True
+    else:
+        raise ValueError("Last argument must be either 'std' or 'naive'")
+    random.seed(student_id)
+    if test in tests:
+        tests[test]()
+    else:
+        raise ValueError("Unknown test {}".format(test))
+else:
+    raise ValueError("Usage: {} <test> <student-id> (std|naive)".format(sys.argv[0]))
--- a/03-splay_experiment/task.md
+++ b/03-splay_experiment/task.md
+## Goal
+
+The goal of this assignment is to evaluate your implementation of Splay trees
+experimentally and to compare it with a "naive" implementation which splays
+using single rotations only.
+
+You are given a test program (`splay_experiment`) which calls your
+implementation from the previous assignment to perform the following
+experiments:
+
+- _Sequential test:_ Insert _n_ elements sequentially and then repeatedly
+  find them all in sequential order.
+- _Random test:_ Insert _n_ elements in random order and then find _5n_
+  random elements.
+- _Subset test:_ Insert a sequence of _n_ elements, which contains arithmetic
+  progressions interspersed with random elements. Then repeatedly access
+  a small subset of these elements in random order. Try this with subsets of
+  different cardinalities.
+
+The program tries each experiment with different values of _n_. In each try,
+it prints the average number of rotations per splay operation.
+
+You should perform these experiments and write a report, which contains the following
+plots of the measured data. Each plot should show the dependence of the average
+number of rotations on the set size _n_.
+
+- The sequential test: one curve for the standard implementation, one for the naive one.
+- The random test: one curve for the standard implementation, one for the naive one.
+- The subset test: three curves for the standard implementation with different sizes
+  of the subset, three for the naive implementation with the same sizes.
+
+The report should discuss the experimental results and try to explain the observed
+behavior using theory from the lectures. (If you want, you can carry out further
+experiments to gain better understanding of the data structure and include these
+in the report. This is strictly optional.)
+
+You should submit a PDF file with the report (and no source code).
+You will get 1 temporary point upon submission if the file is syntantically correct;
+proper points will be assigned later.
+
+## Test program
+
+The test program is given three arguments:
+- The name of the test (`sequential`, `random`, `subset`).
+- The random seed: you should use the last 2 digits of your student ID (you can find
+  it in the Study Information System – just click on the Personal data icon). Please
+  include the random seed in your report.
+- The implementation to test (`std` or `naive`).
+
+The output of the program contains one line per experiment, which consists of:
+- For the sequential and random test: the set size and the average number of rotations.
+- For the subset test: the subset size, the set size, and the average number of rotations
+  per find. The initial insertions of the full set are not counted.
+
+## Your implementation
+
+Please use your implementation from the previous exercise. Methods `splay()`
+and `rotate()` will be augmented by the test program. If you are performing
+a double rotation directly instead of composing it from single rotations, you
+need to adjust the `BenchmarkingTree` class accordingly.
+
+## Hints
+
+The following tools can be useful for producing nice plots:
+- [pandas](https://pandas.pydata.org/)
+- [matplotlib](https://matplotlib.org/)
+- [gnuplot](http://www.gnuplot.info/)
+
+A quick checklist for plots:
+- Is there a caption explaining what is plotted?
+- Are the axes clearly labelled? Do they have value ranges and units?
+- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs
+  are more fitting in some cases, but you should tell.)
+- Is it clear which curve means what?
+- Is it clear what are the measured points and what is an interpolated
+  curve between them?
+- Are there any overlaps? (E.g., the most interesting part of the curve
+  hidden underneath a label?)
+
+In your discussion, please distinguish the following kinds of claims.
+It should be always clear which is which:
+- Experimental results (i.e., the raw data you obtained from the experiments)
+- Theoretical facts (i.e., claims we have proved mathematically)
+- Your hypotheses (e.g., when you claim that the graph looks like something is true,
+  but you are not able to prove rigorously that it always holds)