diff --git a/03-splay_experiment/cpp/Makefile b/03-splay_experiment/cpp/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f97bee94ba375fb19bfd3418bfd91d7dbd5b9a7e --- /dev/null +++ b/03-splay_experiment/cpp/Makefile @@ -0,0 +1,22 @@ +STUDENT_ID ?= PLEASE_SET_STUDENT_ID + +.PHONY: test +test: splay_experiment + @rm -rf out && mkdir out + @for test in sequential random subset ; do \ + for mode in std naive ; do \ + echo t-$$test-$$mode ; \ + ./splay_experiment $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \ + done ; \ + done + +INCLUDE ?= . +CXXFLAGS=-std=c++23 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE) + +splay_experiment: splay_operation.h splay_experiment.cpp $(INCLUDE)/random.h + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ -o $@ + +.PHONY: clean +clean:: + rm -f splay_experiment + rm -rf out diff --git a/03-splay_experiment/cpp/random.h b/03-splay_experiment/cpp/random.h new file mode 100644 index 0000000000000000000000000000000000000000..5ef10aeb1fe7e58a48277fb3565169ec267d43d9 --- /dev/null +++ b/03-splay_experiment/cpp/random.h @@ -0,0 +1,61 @@ +#ifndef DS1_RANDOM_H +#define DS1_RANDOM_H + +#include <cstdint> + +/* + * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman + * and Sebastiano Vigna, distributed under the CC-0 license. For more details, + * see http://vigna.di.unimi.it/xorshift/. + * + * Rewritten to C++ by Martin Mares, also placed under CC-0. + */ + +class RandomGen { + uint64_t state[2]; + + uint64_t rotl(uint64_t x, int k) + { + return (x << k) | (x >> (64 - k)); + } + + public: + // Initialize the generator, set its seed and warm it up. + RandomGen(unsigned int seed) + { + state[0] = seed * 0xdeadbeef; + state[1] = seed ^ 0xc0de1234; + for (int i=0; i<100; i++) + next_u64(); + } + + // Generate a random 64-bit number. + uint64_t next_u64(void) + { + uint64_t s0 = state[0], s1 = state[1]; + uint64_t result = s0 + s1; + s1 ^= s0; + state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); + state[1] = rotl(s1, 36); + return result; + } + + // Generate a random 32-bit number. + uint32_t next_u32(void) + { + return next_u64() >> 11; + } + + // Generate a number between 0 and range-1. + unsigned int next_range(unsigned int range) + { + /* + * This is not perfectly uniform, unless the range is a power of two. + * However, for 64-bit random values and 32-bit ranges, the bias is + * insignificant. + */ + return next_u64() % range; + } +}; + +#endif diff --git a/03-splay_experiment/cpp/splay_experiment.cpp b/03-splay_experiment/cpp/splay_experiment.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a47da8b11bcff20db58805a330f8b05d0a6c15b5 --- /dev/null +++ b/03-splay_experiment/cpp/splay_experiment.cpp @@ -0,0 +1,208 @@ +#include <algorithm> +#include <functional> +#include <string> +#include <utility> +#include <vector> +#include <iostream> +#include <cmath> + +#include "splay_operation.h" +#include "random.h" + +using namespace std; + +/* + * A modified Splay tree for benchmarking. + * + * We inherit the implementation of operations from the Tree class + * and extend it by keeping statistics on the number of splay operations + * and the total number of rotations. Also, if naive is turned on, + * splay uses only single rotations. + * + * Please make sure that your Tree class defines the rotate() and splay() + * methods as virtual. + */ + +class BenchmarkingTree : public Tree { +public: + int num_operations; + int num_rotations; + bool do_naive; + + BenchmarkingTree(bool naive=false) + { + do_naive = naive; + reset(); + } + + void reset() + { + num_operations = 0; + num_rotations = 0; + } + + void rotate(Node *node) override + { + num_rotations++; + Tree::rotate(node); + } + + void splay(Node *node) override + { + num_operations++; + if (do_naive) { + while (node->parent) + rotate(node); + } else { + Tree::splay(node); + } + } + + // Return the average number of rotations per operation. + double rot_per_op() + { + if (num_operations > 0) + return (double) num_rotations / num_operations; + else + return 0; + } +}; + +bool naive; // Use of naive rotations requested +RandomGen *rng; // Random generator object + +void test_sequential() +{ + for (int n=100; n<=3000; n+=100) { + BenchmarkingTree tree = BenchmarkingTree(naive); + + for (int x=0; x<n; x++) + tree.insert(x); + + for (int i=0; i<5; i++) + for (int x=0; x<n; x++) + tree.lookup(x); + + cout << n << " " << tree.rot_per_op() << endl; + } +} + +// An auxiliary function for generating a random permutation. +vector<int> random_permutation(int n) +{ + vector<int> perm; + for (int i=0; i<n; i++) + perm.push_back(i); + for (int i=0; i<n-1; i++) + swap(perm[i], perm[i + rng->next_range(n-i)]); + return perm; +} + +void test_random() +{ + for (int e=32; e<=64; e++) { + int n = (int) pow(2, e/4.); + BenchmarkingTree tree = BenchmarkingTree(naive); + + vector<int> perm = random_permutation(n); + for (int x : perm) + tree.insert(x); + + for (int i=0; i<5*n; i++) + tree.lookup(rng->next_range(n)); + + cout << n << " " << tree.rot_per_op() << endl; + } +} + +/* + * An auxiliary function for constructing arithmetic progressions. + * The vector seq will be modified to contain an arithmetic progression + * of elements in interval [A,B] starting from position s with step inc. + */ +void make_progression(vector<int> &seq, int A, int B, int s, int inc) +{ + for (int i=0; i<seq.size(); i++) + while (seq[i] >= A && seq[i] <= B && s + inc*(seq[i]-A) != i) + swap(seq[i], seq[s + inc*(seq[i] - A)]); +} + +void test_subset_s(int sub) +{ + for (int e=32; e<=64; e++) { + int n = (int) pow(2, e/4.); + if (n < sub) + continue; + + // We will insert elements in order, which contain several + // arithmetic progressions interspersed with random elements. + vector<int> seq = random_permutation(n); + make_progression(seq, n/4, n/4 + n/20, n/10, 1); + make_progression(seq, n/2, n/2 + n/20, n/10, -1); + make_progression(seq, 3*n/4, 3*n/4 + n/20, n/2, -4); + make_progression(seq, 17*n/20, 17*n/20 + n/20, 2*n/5, 5); + + BenchmarkingTree tree = BenchmarkingTree(naive); + for (int x : seq) + tree.insert(x); + tree.reset(); + + for (int i=0; i<10000; i++) + tree.lookup(seq[rng->next_range(sub)]); + + cout << sub << " " << n << " " << tree.rot_per_op() << endl; + } +} + +void test_subset() +{ + test_subset_s(10); + test_subset_s(100); + test_subset_s(1000); +} + +vector<pair<string, function<void()>>> tests = { + { "sequential", test_sequential }, + { "random", test_random }, + { "subset", test_subset }, +}; + +int main(int argc, char **argv) +{ + if (argc != 4) { + cerr << "Usage: " << argv[0] << " <test> <student-id> (std|naive)" << endl; + return 1; + } + + string which_test = argv[1]; + string id_str = argv[2]; + string mode = argv[3]; + + try { + rng = new RandomGen(stoi(id_str)); + } catch (...) { + cerr << "Invalid student ID" << endl; + return 1; + } + + if (mode == "std") + naive = false; + else if (mode == "naive") + naive = true; + else + { + cerr << "Last argument must be either 'std' or 'naive'" << endl; + return 1; + } + + for (const auto& test : tests) { + if (test.first == which_test) + { + cout.precision(12); + test.second(); + return 0; + } + } + cerr << "Unknown test " << which_test << endl; + return 1; +} diff --git a/03-splay_experiment/python/Makefile b/03-splay_experiment/python/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4a5efbfa04f2684c0cca17635b219a22b2a16fab --- /dev/null +++ b/03-splay_experiment/python/Makefile @@ -0,0 +1,15 @@ +STUDENT_ID ?= PLEASE_SET_STUDENT_ID + +.PHONY: test +test: splay_experiment.py splay_operation.py + @rm -rf out && mkdir out + @for test in sequential random subset ; do \ + for mode in std naive ; do \ + echo t-$$test-$$mode ; \ + ./splay_experiment.py $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \ + done ; \ + done + +.PHONY: clean +clean:: + rm -rf out __pycache__ diff --git a/03-splay_experiment/python/splay_experiment.py b/03-splay_experiment/python/splay_experiment.py new file mode 100644 index 0000000000000000000000000000000000000000..8cf3d6d6af1564973bd17a43f2fa731cce083da7 --- /dev/null +++ b/03-splay_experiment/python/splay_experiment.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import sys +import random + +from splay_operation import Tree + +class BenchmarkingTree(Tree): + """ A modified Splay tree for benchmarking. + + We inherit the implementation of operations from the Tree class + and extend it by keeping statistics on the number of splay operations + and the total number of rotations. Also, if naive is turned on, + splay uses only single rotations. + """ + + def __init__(self, naive=False): + Tree.__init__(self) + self.do_naive = naive + self.reset() + + def reset(self): + """Reset statistics.""" + self.num_rotations = 0; + self.num_operations = 0; + + def rotate(self, node): + self.num_rotations += 1 + Tree.rotate(self, node) + + def splay(self, node): + self.num_operations += 1 + if self.do_naive: + while node.parent is not None: + self.rotate(node) + else: + Tree.splay(self, node) + + def rot_per_op(self): + """Return the average number of rotations per operation.""" + if self.num_operations > 0: + return self.num_rotations / self.num_operations + else: + return 0 + +def test_sequential(): + for n in range(100, 3001, 100): + tree = BenchmarkingTree(naive) + for elem in range(n): + tree.insert(elem) + + for _ in range(5): + for elem in range(n): + tree.lookup(elem) + + print(n, tree.rot_per_op()) + +def test_random(): + for exp in range(32, 64): + n = int(2**(exp/4)) + tree = BenchmarkingTree(naive) + + for elem in random.sample(range(n), n): + tree.insert(elem) + + for _ in range(5*n): + tree.lookup(random.randrange(n)) + + print(n, tree.rot_per_op()) + +def make_progression(seq, A, B, s, inc): + """An auxiliary function for constructing arithmetic progressions. + + The array seq will be modified to contain an arithmetic progression + of elements in interval [A,B] starting from position s with step inc. + """ + for i in range(len(seq)): + while seq[i] >= A and seq[i] <= B and s + inc*(seq[i]-A) != i: + pos = s + inc*(seq[i]-A) + seq[i], seq[pos] = seq[pos], seq[i] + +def test_subset(): + for sub in [10, 100, 1000]: + for exp in range(32,64): + n = int(2**(exp/4)) + if n < sub: + continue + + # We will insert elements in order, which contain several + # arithmetic progressions interspersed with random elements. + seq = random.sample(range(n), n) + make_progression(seq, n//4, n//4 + n//20, n//10, 1) + make_progression(seq, n//2, n//2 + n//20, n//10, -1) + make_progression(seq, 3*n//4, 3*n//4 + n//20, n//2, -4) + make_progression(seq, 17*n//20, 17*n//20 + n//20, 2*n//5, 5) + + tree = BenchmarkingTree(naive) + for elem in seq: + tree.insert(elem) + tree.reset() + + for _ in range(10000): + tree.lookup(seq[random.randrange(sub)]) + + print(sub, n, tree.rot_per_op()) + +tests = { + "sequential": test_sequential, + "random": test_random, + "subset": test_subset, +} + +if len(sys.argv) == 4: + test, student_id = sys.argv[1], sys.argv[2] + if sys.argv[3] == "std": + naive = False + elif sys.argv[3] == "naive": + naive = True + else: + raise ValueError("Last argument must be either 'std' or 'naive'") + random.seed(student_id) + if test in tests: + tests[test]() + else: + raise ValueError("Unknown test {}".format(test)) +else: + raise ValueError("Usage: {} <test> <student-id> (std|naive)".format(sys.argv[0])) diff --git a/03-splay_experiment/task.md b/03-splay_experiment/task.md new file mode 100644 index 0000000000000000000000000000000000000000..0d968113578f0038a6a442bc8daac400f250e42c --- /dev/null +++ b/03-splay_experiment/task.md @@ -0,0 +1,87 @@ +## Goal + +The goal of this assignment is to evaluate your implementation of Splay trees +experimentally and to compare it with a "naive" implementation which splays +using single rotations only. + +You are given a test program (`splay_experiment`) which calls your +implementation from the previous assignment to perform the following +experiments: + +- _Sequential test:_ Insert _n_ elements sequentially and then repeatedly + find them all in sequential order. +- _Random test:_ Insert _n_ elements in random order and then find _5n_ + random elements. +- _Subset test:_ Insert a sequence of _n_ elements, which contains arithmetic + progressions interspersed with random elements. Then repeatedly access + a small subset of these elements in random order. Try this with subsets of + different cardinalities. + +The program tries each experiment with different values of _n_. In each try, +it prints the average number of rotations per splay operation. + +You should perform these experiments and write a report, which contains the following +plots of the measured data. Each plot should show the dependence of the average +number of rotations on the set size _n_. + +- The sequential test: one curve for the standard implementation, one for the naive one. +- The random test: one curve for the standard implementation, one for the naive one. +- The subset test: three curves for the standard implementation with different sizes + of the subset, three for the naive implementation with the same sizes. + +The report should discuss the experimental results and try to explain the observed +behavior using theory from the lectures. (If you want, you can carry out further +experiments to gain better understanding of the data structure and include these +in the report. This is strictly optional.) + +You should submit a PDF file with the report (and no source code). +You will get 1 temporary point upon submission if the file is syntantically correct; +proper points will be assigned later. + +## Test program + +The test program is given three arguments: +- The name of the test (`sequential`, `random`, `subset`). +- The random seed: you should use the last 2 digits of your student ID (you can find + it in the Study Information System – just click on the Personal data icon). Please + include the random seed in your report. +- The implementation to test (`std` or `naive`). + +The output of the program contains one line per experiment, which consists of: +- For the sequential and random test: the set size and the average number of rotations. +- For the subset test: the subset size, the set size, and the average number of rotations + per find. The initial insertions of the full set are not counted. + +## Your implementation + +Please use your implementation from the previous exercise. Methods `splay()` +and `rotate()` will be augmented by the test program. If you are performing +a double rotation directly instead of composing it from single rotations, you +need to adjust the `BenchmarkingTree` class accordingly. + +## Hints + +The following tools can be useful for producing nice plots: +- [pandas](https://pandas.pydata.org/) +- [matplotlib](https://matplotlib.org/) +- [gnuplot](http://www.gnuplot.info/) + +A quick checklist for plots: +- Is there a caption explaining what is plotted? +- Are the axes clearly labelled? Do they have value ranges and units? +- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs + are more fitting in some cases, but you should tell.) +- Is it clear which curve means what? +- Is it clear what are the measured points and what is an interpolated + curve between them? +- Are there any overlaps? (E.g., the most interesting part of the curve + hidden underneath a label?) + +In your discussion, please distinguish the following kinds of claims. +It should be always clear which is which: +- Experimental results (i.e., the raw data you obtained from the experiments) +- Theoretical facts (i.e., claims we have proved mathematically) +- Your hypotheses (e.g., when you claim that the graph looks like something is true, + but you are not able to prove rigorously that it always holds) + +Source code templates can be found in [git](https://gitlab.kam.mff.cuni.cz/datovky/assignments/-/tree/master).