From 199e3d360c472f938b420b090952d3886e1818d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?= <ondracek.lukas@gmail.com> Date: Mon, 19 Oct 2020 17:54:44 +0200 Subject: [PATCH] Splay experiment, ab-tree --- 03-splay_experiment/cpp/Makefile | 22 ++ 03-splay_experiment/cpp/random.h | 61 +++++ 03-splay_experiment/cpp/splay_experiment.cpp | 208 ++++++++++++++++++ 03-splay_experiment/python/Makefile | 15 ++ .../python/splay_experiment.py | 127 +++++++++++ 03-splay_experiment/task.md | 85 +++++++ 04-ab_tree/cpp/Makefile | 12 + 04-ab_tree/cpp/ab_tree.h | 124 +++++++++++ 04-ab_tree/cpp/ab_tree_test.cpp | 148 +++++++++++++ 04-ab_tree/cpp/test_main.cpp | 43 ++++ 04-ab_tree/python/ab_tree.py | 53 +++++ 04-ab_tree/python/ab_tree_test.py | 108 +++++++++ 04-ab_tree/task.md | 7 + 13 files changed, 1013 insertions(+) create mode 100644 03-splay_experiment/cpp/Makefile create mode 100644 03-splay_experiment/cpp/random.h create mode 100644 03-splay_experiment/cpp/splay_experiment.cpp create mode 100644 03-splay_experiment/python/Makefile create mode 100755 03-splay_experiment/python/splay_experiment.py create mode 100644 03-splay_experiment/task.md create mode 100644 04-ab_tree/cpp/Makefile create mode 100644 04-ab_tree/cpp/ab_tree.h create mode 100644 04-ab_tree/cpp/ab_tree_test.cpp create mode 100644 04-ab_tree/cpp/test_main.cpp create mode 100644 04-ab_tree/python/ab_tree.py create mode 100644 04-ab_tree/python/ab_tree_test.py create mode 100644 04-ab_tree/task.md diff --git a/03-splay_experiment/cpp/Makefile b/03-splay_experiment/cpp/Makefile new file mode 100644 index 0000000..aa0b7d5 --- /dev/null +++ b/03-splay_experiment/cpp/Makefile @@ -0,0 +1,22 @@ +STUDENT_ID ?= PLEASE_SET_STUDENT_ID + +.PHONY: test +test: splay_experiment + @rm -rf out && mkdir out + @for test in sequential random subset ; do \ + for mode in std naive ; do \ + echo t-$$test-$$mode ; \ + ./splay_experiment $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \ + done ; \ + done + +INCLUDE ?= . +CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE) + +splay_experiment: splay_operation.h splay_experiment.cpp $(INCLUDE)/random.h + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ -o $@ + +.PHONY: clean +clean: + rm -f splay_experiment + rm -rf out diff --git a/03-splay_experiment/cpp/random.h b/03-splay_experiment/cpp/random.h new file mode 100644 index 0000000..5ef10ae --- /dev/null +++ b/03-splay_experiment/cpp/random.h @@ -0,0 +1,61 @@ +#ifndef DS1_RANDOM_H +#define DS1_RANDOM_H + +#include <cstdint> + +/* + * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman + * and Sebastiano Vigna, distributed under the CC-0 license. For more details, + * see http://vigna.di.unimi.it/xorshift/. + * + * Rewritten to C++ by Martin Mares, also placed under CC-0. + */ + +class RandomGen { + uint64_t state[2]; + + uint64_t rotl(uint64_t x, int k) + { + return (x << k) | (x >> (64 - k)); + } + + public: + // Initialize the generator, set its seed and warm it up. + RandomGen(unsigned int seed) + { + state[0] = seed * 0xdeadbeef; + state[1] = seed ^ 0xc0de1234; + for (int i=0; i<100; i++) + next_u64(); + } + + // Generate a random 64-bit number. + uint64_t next_u64(void) + { + uint64_t s0 = state[0], s1 = state[1]; + uint64_t result = s0 + s1; + s1 ^= s0; + state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); + state[1] = rotl(s1, 36); + return result; + } + + // Generate a random 32-bit number. + uint32_t next_u32(void) + { + return next_u64() >> 11; + } + + // Generate a number between 0 and range-1. + unsigned int next_range(unsigned int range) + { + /* + * This is not perfectly uniform, unless the range is a power of two. + * However, for 64-bit random values and 32-bit ranges, the bias is + * insignificant. + */ + return next_u64() % range; + } +}; + +#endif diff --git a/03-splay_experiment/cpp/splay_experiment.cpp b/03-splay_experiment/cpp/splay_experiment.cpp new file mode 100644 index 0000000..a47da8b --- /dev/null +++ b/03-splay_experiment/cpp/splay_experiment.cpp @@ -0,0 +1,208 @@ +#include <algorithm> +#include <functional> +#include <string> +#include <utility> +#include <vector> +#include <iostream> +#include <cmath> + +#include "splay_operation.h" +#include "random.h" + +using namespace std; + +/* + * A modified Splay tree for benchmarking. + * + * We inherit the implementation of operations from the Tree class + * and extend it by keeping statistics on the number of splay operations + * and the total number of rotations. Also, if naive is turned on, + * splay uses only single rotations. + * + * Please make sure that your Tree class defines the rotate() and splay() + * methods as virtual. + */ + +class BenchmarkingTree : public Tree { +public: + int num_operations; + int num_rotations; + bool do_naive; + + BenchmarkingTree(bool naive=false) + { + do_naive = naive; + reset(); + } + + void reset() + { + num_operations = 0; + num_rotations = 0; + } + + void rotate(Node *node) override + { + num_rotations++; + Tree::rotate(node); + } + + void splay(Node *node) override + { + num_operations++; + if (do_naive) { + while (node->parent) + rotate(node); + } else { + Tree::splay(node); + } + } + + // Return the average number of rotations per operation. + double rot_per_op() + { + if (num_operations > 0) + return (double) num_rotations / num_operations; + else + return 0; + } +}; + +bool naive; // Use of naive rotations requested +RandomGen *rng; // Random generator object + +void test_sequential() +{ + for (int n=100; n<=3000; n+=100) { + BenchmarkingTree tree = BenchmarkingTree(naive); + + for (int x=0; x<n; x++) + tree.insert(x); + + for (int i=0; i<5; i++) + for (int x=0; x<n; x++) + tree.lookup(x); + + cout << n << " " << tree.rot_per_op() << endl; + } +} + +// An auxiliary function for generating a random permutation. +vector<int> random_permutation(int n) +{ + vector<int> perm; + for (int i=0; i<n; i++) + perm.push_back(i); + for (int i=0; i<n-1; i++) + swap(perm[i], perm[i + rng->next_range(n-i)]); + return perm; +} + +void test_random() +{ + for (int e=32; e<=64; e++) { + int n = (int) pow(2, e/4.); + BenchmarkingTree tree = BenchmarkingTree(naive); + + vector<int> perm = random_permutation(n); + for (int x : perm) + tree.insert(x); + + for (int i=0; i<5*n; i++) + tree.lookup(rng->next_range(n)); + + cout << n << " " << tree.rot_per_op() << endl; + } +} + +/* + * An auxiliary function for constructing arithmetic progressions. + * The vector seq will be modified to contain an arithmetic progression + * of elements in interval [A,B] starting from position s with step inc. + */ +void make_progression(vector<int> &seq, int A, int B, int s, int inc) +{ + for (int i=0; i<seq.size(); i++) + while (seq[i] >= A && seq[i] <= B && s + inc*(seq[i]-A) != i) + swap(seq[i], seq[s + inc*(seq[i] - A)]); +} + +void test_subset_s(int sub) +{ + for (int e=32; e<=64; e++) { + int n = (int) pow(2, e/4.); + if (n < sub) + continue; + + // We will insert elements in order, which contain several + // arithmetic progressions interspersed with random elements. + vector<int> seq = random_permutation(n); + make_progression(seq, n/4, n/4 + n/20, n/10, 1); + make_progression(seq, n/2, n/2 + n/20, n/10, -1); + make_progression(seq, 3*n/4, 3*n/4 + n/20, n/2, -4); + make_progression(seq, 17*n/20, 17*n/20 + n/20, 2*n/5, 5); + + BenchmarkingTree tree = BenchmarkingTree(naive); + for (int x : seq) + tree.insert(x); + tree.reset(); + + for (int i=0; i<10000; i++) + tree.lookup(seq[rng->next_range(sub)]); + + cout << sub << " " << n << " " << tree.rot_per_op() << endl; + } +} + +void test_subset() +{ + test_subset_s(10); + test_subset_s(100); + test_subset_s(1000); +} + +vector<pair<string, function<void()>>> tests = { + { "sequential", test_sequential }, + { "random", test_random }, + { "subset", test_subset }, +}; + +int main(int argc, char **argv) +{ + if (argc != 4) { + cerr << "Usage: " << argv[0] << " <test> <student-id> (std|naive)" << endl; + return 1; + } + + string which_test = argv[1]; + string id_str = argv[2]; + string mode = argv[3]; + + try { + rng = new RandomGen(stoi(id_str)); + } catch (...) { + cerr << "Invalid student ID" << endl; + return 1; + } + + if (mode == "std") + naive = false; + else if (mode == "naive") + naive = true; + else + { + cerr << "Last argument must be either 'std' or 'naive'" << endl; + return 1; + } + + for (const auto& test : tests) { + if (test.first == which_test) + { + cout.precision(12); + test.second(); + return 0; + } + } + cerr << "Unknown test " << which_test << endl; + return 1; +} diff --git a/03-splay_experiment/python/Makefile b/03-splay_experiment/python/Makefile new file mode 100644 index 0000000..ac6c02d --- /dev/null +++ b/03-splay_experiment/python/Makefile @@ -0,0 +1,15 @@ +STUDENT_ID ?= PLEASE_SET_STUDENT_ID + +.PHONY: test +test: splay_experiment.py + @rm -rf out && mkdir out + @for test in sequential random subset ; do \ + for mode in std naive ; do \ + echo t-$$test-$$mode ; \ + ./splay_experiment.py $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \ + done ; \ + done + +.PHONY: clean +clean: + rm -rf out diff --git a/03-splay_experiment/python/splay_experiment.py b/03-splay_experiment/python/splay_experiment.py new file mode 100755 index 0000000..8cf3d6d --- /dev/null +++ b/03-splay_experiment/python/splay_experiment.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import sys +import random + +from splay_operation import Tree + +class BenchmarkingTree(Tree): + """ A modified Splay tree for benchmarking. + + We inherit the implementation of operations from the Tree class + and extend it by keeping statistics on the number of splay operations + and the total number of rotations. Also, if naive is turned on, + splay uses only single rotations. + """ + + def __init__(self, naive=False): + Tree.__init__(self) + self.do_naive = naive + self.reset() + + def reset(self): + """Reset statistics.""" + self.num_rotations = 0; + self.num_operations = 0; + + def rotate(self, node): + self.num_rotations += 1 + Tree.rotate(self, node) + + def splay(self, node): + self.num_operations += 1 + if self.do_naive: + while node.parent is not None: + self.rotate(node) + else: + Tree.splay(self, node) + + def rot_per_op(self): + """Return the average number of rotations per operation.""" + if self.num_operations > 0: + return self.num_rotations / self.num_operations + else: + return 0 + +def test_sequential(): + for n in range(100, 3001, 100): + tree = BenchmarkingTree(naive) + for elem in range(n): + tree.insert(elem) + + for _ in range(5): + for elem in range(n): + tree.lookup(elem) + + print(n, tree.rot_per_op()) + +def test_random(): + for exp in range(32, 64): + n = int(2**(exp/4)) + tree = BenchmarkingTree(naive) + + for elem in random.sample(range(n), n): + tree.insert(elem) + + for _ in range(5*n): + tree.lookup(random.randrange(n)) + + print(n, tree.rot_per_op()) + +def make_progression(seq, A, B, s, inc): + """An auxiliary function for constructing arithmetic progressions. + + The array seq will be modified to contain an arithmetic progression + of elements in interval [A,B] starting from position s with step inc. + """ + for i in range(len(seq)): + while seq[i] >= A and seq[i] <= B and s + inc*(seq[i]-A) != i: + pos = s + inc*(seq[i]-A) + seq[i], seq[pos] = seq[pos], seq[i] + +def test_subset(): + for sub in [10, 100, 1000]: + for exp in range(32,64): + n = int(2**(exp/4)) + if n < sub: + continue + + # We will insert elements in order, which contain several + # arithmetic progressions interspersed with random elements. + seq = random.sample(range(n), n) + make_progression(seq, n//4, n//4 + n//20, n//10, 1) + make_progression(seq, n//2, n//2 + n//20, n//10, -1) + make_progression(seq, 3*n//4, 3*n//4 + n//20, n//2, -4) + make_progression(seq, 17*n//20, 17*n//20 + n//20, 2*n//5, 5) + + tree = BenchmarkingTree(naive) + for elem in seq: + tree.insert(elem) + tree.reset() + + for _ in range(10000): + tree.lookup(seq[random.randrange(sub)]) + + print(sub, n, tree.rot_per_op()) + +tests = { + "sequential": test_sequential, + "random": test_random, + "subset": test_subset, +} + +if len(sys.argv) == 4: + test, student_id = sys.argv[1], sys.argv[2] + if sys.argv[3] == "std": + naive = False + elif sys.argv[3] == "naive": + naive = True + else: + raise ValueError("Last argument must be either 'std' or 'naive'") + random.seed(student_id) + if test in tests: + tests[test]() + else: + raise ValueError("Unknown test {}".format(test)) +else: + raise ValueError("Usage: {} <test> <student-id> (std|naive)".format(sys.argv[0])) diff --git a/03-splay_experiment/task.md b/03-splay_experiment/task.md new file mode 100644 index 0000000..8120d9a --- /dev/null +++ b/03-splay_experiment/task.md @@ -0,0 +1,85 @@ +## Goal + +The goal of this assignment is to evaluate your implementation of Splay trees +experimentally and to compare it with a "naive" implementation which splays +using single rotations only. + +You are given a test program (`splay_experiment`) which calls your +implementation from the previous assignment to perform the following +experiments: + +- _Sequential test:_ Insert _n_ elements sequentially and then repeatedly + find them all in sequential order. +- _Random test:_ Insert _n_ elements in random order and then find _5n_ + random elements. +- _Subset test:_ Insert a sequence of _n_ elements, which contains arithmetic + progressions interspersed with random elements. Then repeatedly access + a small subset of these elements in random order. Try this with subsets of + different cardinalities. + +The program tries each experiment with different values of _n_. In each try, +it prints the average number of rotations per splay operation. + +You should perform these experiments and write a report, which contains the following +plots of the measured data. Each plot should show the dependence of the average +number of rotations on the set size _n_. + +- The sequential test: one curve for the standard implementation, one for the naive one. +- The random test: one curve for the standard implementation, one for the naive one. +- The subset test: three curves for the standard implementation with different sizes + of the subset, three for the naive implementation with the same sizes. + +The report should discuss the experimental results and try to explain the observed +behavior using theory from the lectures. (If you want, you can carry out further +experiments to gain better understanding of the data structure and include these +in the report. This is strictly optional.) + +You should submit a PDF file with the report (and no source code). +You will get 1 temporary point upon submission if the file is syntantically correct; +proper points will be assigned later. + +## Test program + +The test program is given three arguments: +- The name of the test (`sequential`, `random`, `subset`). +- The random seed: you should use the last 2 digits of your student ID (you can find + it in the Study Information System – just click on the Personal data icon). Please + include the random seed in your report. +- The implementation to test (`std` or `naive`). + +The output of the program contains one line per experiment, which consists of: +- For the sequential and random test: the set size and the average number of rotations. +- For the subset test: the subset size, the set size, and the average number of rotations + per find. The initial insertions of the full set are not counted. + +## Your implementation + +Please use your implementation from the previous exercise. Methods `splay()` +and `rotate()` will be augmented by the test program. If you are performing +a double rotation directly instead of composing it from single rotations, you +need to adjust the `BenchmarkingTree` class accordingly. + +## Hints + +The following tools can be useful for producing nice plots: +- [pandas](https://pandas.pydata.org/) +- [matplotlib](https://matplotlib.org/) +- [gnuplot](http://www.gnuplot.info/) + +A quick checklist for plots: +- Is there a caption explaining what is plotted? +- Are the axes clearly labelled? Do they have value ranges and units? +- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs + are more fitting in some cases, but you should tell.) +- Is it clear which curve means what? +- Is it clear what are the measured points and what is an interpolated + curve between them? +- Are there any overlaps? (E.g., the most interesting part of the curve + hidden underneath a label?) + +In your discussion, please distinguish the following kinds of claims. +It should be always clear which is which: +- Experimental results (i.e., the raw data you obtained from the experiments) +- Theoretical facts (i.e., claims we have proved mathematically) +- Your hypotheses (e.g., when you claim that the graph looks like something is true, + but you are not able to prove rigorously that it always holds) diff --git a/04-ab_tree/cpp/Makefile b/04-ab_tree/cpp/Makefile new file mode 100644 index 0000000..e6ab228 --- /dev/null +++ b/04-ab_tree/cpp/Makefile @@ -0,0 +1,12 @@ +test: ab_tree_test + ./$< + +CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare + +ab_tree_test: ab_tree_test.cpp ab_tree.h test_main.cpp + $(CXX) $(CXXFLAGS) $^ -o $@ + +clean: + rm -f ab_tree_test + +.PHONY: clean test diff --git a/04-ab_tree/cpp/ab_tree.h b/04-ab_tree/cpp/ab_tree.h new file mode 100644 index 0000000..cec3391 --- /dev/null +++ b/04-ab_tree/cpp/ab_tree.h @@ -0,0 +1,124 @@ +#include <limits> +#include <vector> +#include <iostream> + +using namespace std; + +// If the condition is not true, report an error and halt. +#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0) + +void expect_failed(const string& message); + +/*** One node ***/ + +class ab_node { + public: + // Keys stored in this node and the corresponding children + // The vectors are large enough to accomodate one extra entry + // in overflowing nodes. + vector<ab_node *> children; + vector<int> keys; + + // If this node contains the given key, return true and set i to key's position. + // Otherwise return false and set i to the first key greater than the given one. + bool find_branch(int key, int &i) + { + i = 0; + while (i < keys.size() && keys[i] <= key) { + if (keys[i] == key) + return true; + i++; + } + return false; + } + + // Insert a new key at posision i and add a new child between keys i and i+1. + void insert_branch(int i, int key, ab_node *child) + { + keys.insert(keys.begin() + i, key); + children.insert(children.begin() + i + 1, child); + } + + // An auxiliary function for displaying a sub-tree under this node. + void show(int indent); +}; + +/*** Tree ***/ + +class ab_tree { + public: + int a; // Minimum allowed number of children + int b; // Maximum allowed number of children + ab_node *root; // Root node (even a tree with no keys has a root) + int num_nodes; // We keep track of how many nodes the tree has + + // Create a new node and return a pointer to it. + ab_node *new_node() + { + ab_node *n = new ab_node; + n->keys.reserve(b); + n->children.reserve(b+1); + num_nodes++; + return n; + } + + // Delete a given node, assuming that its children have been already unlinked. + void delete_node(ab_node *n) + { + num_nodes--; + delete n; + } + + // Constructor: initialize an empty tree with just the root. + ab_tree(int a, int b) + { + EXPECT(a >= 2 && b >= 2*a - 1, "Invalid values of a,b"); + this->a = a; + this->b = b; + num_nodes = 0; + // The root has no keys and one null child pointer. + root = new_node(); + root->children.push_back(nullptr); + } + + // An auxiliary function for deleting a subtree recursively. + void delete_tree(ab_node *n) + { + for (int i=0; i < n->children.size(); i++) + if (n->children[i]) + delete_tree(n->children[i]); + delete_node(n); + } + + // Destructor: delete all nodes. + ~ab_tree() + { + delete_tree(root); + EXPECT(num_nodes == 0, "Memory leak detected: some nodes were not deleted"); + } + + // Find a key: returns true if it is present in the tree. + bool find(int key) + { + ab_node *n = root; + while (n) { + int i; + if (n->find_branch(key, i)) + return true; + n = n->children[i]; + } + return false; + } + + // Display the tree on standard output in human-readable form. + void show(); + + // Check that the data structure satisfies all invariants. + void audit(); + + // Insert: add key to the tree (unless it was already present). + void insert(int key) + { + // FIXME: Implement + } +}; diff --git a/04-ab_tree/cpp/ab_tree_test.cpp b/04-ab_tree/cpp/ab_tree_test.cpp new file mode 100644 index 0000000..8b1651f --- /dev/null +++ b/04-ab_tree/cpp/ab_tree_test.cpp @@ -0,0 +1,148 @@ +#include <functional> +#include <cstdlib> +#include <vector> + +#include "ab_tree.h" + +// Debugging output: showing trees prettily on standard output. + +void ab_tree::show() +{ + root->show(0); + for (int i=0; i<70; i++) + cout << '='; + cout << endl; +} + +void ab_node::show(int indent) +{ + for (int i = children.size() - 1; i >= 0 ; i--) { + if (i < keys.size()) { + for (int j = 0; j < indent; j++) + cout << " "; + cout << keys[i] << endl; + } + if (children[i]) + children[i]->show(indent+1); + } +} + +// Invariant checks + +void audit_subtree(ab_tree *tree, ab_node *n, int key_min, int key_max, int depth, int &leaf_depth) +{ + if (!n) { + // Check that all leaves are on the same level. + if (leaf_depth < 0) + leaf_depth = depth; + else + EXPECT(depth == leaf_depth, "Leaves are not on the same level"); + return; + } + + // The number of children must be in the allowed range. + if (depth > 0) + EXPECT(n->children.size() >= tree->a, "Too few children"); + EXPECT(n->children.size() <= tree->b, "Too many children"); + + // We must have one more children than keys. + EXPECT(n->children.size() == n->keys.size() + 1, "Number of keys does not match number of children"); + + // Allow degenerate trees with 0 keys in the root. + if (n->children.size() == 1) + return; + + // Check order of keys: they must be increasing and bounded by the keys on the higher levels. + for (int i = 0; i < n->keys.size(); i++) { + EXPECT(n->keys[i] >= key_min && n->keys[i] <= key_max, "Wrong key order"); + EXPECT(i == 0 || n->keys[i-1] < n->keys[i], "Wrong key order"); + } + + // Call on children recursively. + for (int i = 0; i < n->children.size(); i++) { + int tmin, tmax; + if (i == 0) + tmin = key_min; + else + tmin = n->keys[i-1] + 1; + if (i < n->keys.size()) + tmax = n->keys[i] - 1; + else + tmax = key_max; + audit_subtree(tree, n->children[i], tmin, tmax, depth+1, leaf_depth); + } +} + +void ab_tree::audit() +{ + EXPECT(root, "Tree has no root"); + int leaf_depth = -1; + audit_subtree(this, root, numeric_limits<int>::min(), numeric_limits<int>::max(), 0, leaf_depth); +} + +// A basic test: insert a couple of keys and show how the tree evolves. + +void test_basic() +{ + cout << "## Basic test" << endl; + + ab_tree t(2, 3); + vector<int> keys = { 3, 1, 4, 5, 9, 2, 6, 8, 7, 0 }; + for (int k : keys) { + t.insert(k); + t.show(); + t.audit(); + EXPECT(t.find(k), "Inserted key disappeared"); + } + + for (int k : keys) + EXPECT(t.find(k), "Some keys are missing at the end"); +} + +// The main test: inserting a lot of keys and checking that they are really there. +// We will insert num_items keys from the set {1,...,range-1}, where range is a prime. + +void test_main(int a, int b, int range, int num_items) +{ + // Create a new tree. + cout << "## Test: a=" << a << " b=" << b << " range=" << range << " num_items=" << num_items << endl; + ab_tree t(a, b); + + int key = 1; + int step = (int)(range * 1.618); + int audit_time = 1; + + // Insert keys. + for (int i=1; i <= num_items; i++) { + t.insert(key); + // Audit the tree occasionally. + if (i == audit_time || i == num_items) { + // cout << "== Audit at " << i << endl; + // t.show(); + t.audit(); + audit_time = (int)(audit_time * 1.33) + 1; + } + key = (key + step) % range; + } + + // Check that the tree contains exactly the items it should contain. + key = 1; + for (int i=1; i < range; i++) { + bool found = t.find(key); + // cout << "Step #" << i << ": find(" << key << ") = " << found << endl; + EXPECT(found == (i <= num_items), "Tree contains wrong keys"); + key = (key + step) % range; + } +} + +/*** A list of all tests ***/ + +vector<pair<string, function<void()>>> tests = { + { "basic", [] { test_basic(); } }, + { "small-2,3", [] { test_main(2, 3, 997, 700); } }, + { "small-2,4", [] { test_main(2, 4, 997, 700); } }, + { "big-2,3", [] { test_main(2, 3, 999983, 700000); } }, + { "big-2,4", [] { test_main(2, 4, 999983, 700000); } }, + { "big-10,20", [] { test_main(10, 20, 999983, 700000); } }, + { "big-100,200", [] { test_main(100, 200, 999983, 700000); } }, +}; diff --git a/04-ab_tree/cpp/test_main.cpp b/04-ab_tree/cpp/test_main.cpp new file mode 100644 index 0000000..3f4aff0 --- /dev/null +++ b/04-ab_tree/cpp/test_main.cpp @@ -0,0 +1,43 @@ +#include <cstdlib> +#include <functional> +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +using namespace std; + +extern vector<pair<string, function<void()>>> tests; + +void expect_failed(const string& message) { + cerr << "Test error: " << message << endl; + exit(1); +} + +int main(int argc, char* argv[]) { + vector<string> required_tests; + + if (argc > 1) { + required_tests.assign(argv + 1, argv + argc); + } else { + for (const auto& test : tests) + required_tests.push_back(test.first); + } + + for (const auto& required_test : required_tests) { + bool found = false; + for (const auto& test : tests) + if (required_test == test.first) { + cerr << "Running test " << required_test << endl; + test.second(); + found = true; + break; + } + if (!found) { + cerr << "Unknown test " << required_test << endl; + return 1; + } + } + + return 0; +} diff --git a/04-ab_tree/python/ab_tree.py b/04-ab_tree/python/ab_tree.py new file mode 100644 index 0000000..83c17c4 --- /dev/null +++ b/04-ab_tree/python/ab_tree.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +class ABNode: + """Single node in an ABTree. + + Each node contains keys and childrens + (with one more children than there are keys). + """ + def __init__(self, keys = None, children = None): + self.keys = keys if keys is not None else [] + self.children = children if children is not None else [] + + def find_branch(self, key): + """ Try finding given key in this node. + + If this node contains the given key, returns (True, key_position). + If not, returns (False, first_position_with_key_greater_than_the_given). + """ + i = 0 + while (i < len(self.keys) and self.keys[i] < key): + i += 1 + + return (i < len(self.keys) and self.keys[i] == key, i) + + def insert_branch(self, i, key, child): + """ Insert a new key and a given child between keys i and i+1.""" + self.keys.insert(i, key) + self.children.insert(i + 1, child) + +class ABTree: + """A class representing the whole ABTree.""" + def __init__(self, a, b): + assert a >= 2 and b >= 2 * a - 1, "Invalid values of a, b: {}, {}".format(a, b) + self.a = a + self.b = b + self.root = ABNode(children=[None]) + + def find(self, key): + """Find a key in the tree. + + Returns True if the key is present, False otherwise. + """ + node = self.root + while node: + found, i = node.find_branch(key) + if found: return True + node = node.children[i] + return False + + def insert(self, key): + """Add a given key to the tree, unless already present.""" + # TODO: Implement + raise NotImplementedError diff --git a/04-ab_tree/python/ab_tree_test.py b/04-ab_tree/python/ab_tree_test.py new file mode 100644 index 0000000..5444dd9 --- /dev/null +++ b/04-ab_tree/python/ab_tree_test.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +import math +import sys + +from ab_tree import ABNode, ABTree + +def show(tree): + """Show a tree.""" + def show_node(node, indent): + for i in reversed(range(len(node.children))): + if i < len(node.keys): + print(" " * indent, node.keys[i], sep="") + if node.children[i]: + show_node(node.children[i], indent + 1) + + show_node(tree.root, 0) + print("=" * 70) + +def audit(tree): + """Invariant check for the given tree.""" + def audit_node(node, key_min, key_max, depth, leaf_depth): + if not node: + # Check that all leaves are on the same level. + if leaf_depth is None: + leaf_depth = depth + assert depth == leaf_depth, "Leaves are not on the same level" + + else: + # The number of children must be in the allowed range. + assert depth == 0 or len(node.children) >= tree.a, "Too few children" + assert len(node.children) <= tree.b, "Too many children" + + # We must have one more children than keys + assert len(node.children) == len(node.keys) + 1, "Number of keys does not match number of children" + + # Check that keys are increasing and in (key_min, key_max) range. + for i in range(len(node.keys)): + assert node.keys[i] > key_min and node.keys[i] < key_max, "Wrong key order" + assert i == 0 or node.keys[i - 1] < node.keys[i], "Wrong key order" + + # Check children recursively + for i in range(len(node.children)): + child_min = node.keys[i - 1] if i > 0 else key_min + child_max = node.keys[i] if i < len(node.keys) else key_max + leaf_depth = audit_node(node.children[i], child_min, child_max, depth + 1, leaf_depth) + + return leaf_depth + + assert tree.root, "Tree has no root" + audit_node(tree.root, -math.inf, math.inf, 0, None) + +def test_basic(): + """Insert a couple of keys and show how the tree evolves.""" + print("## Basic test") + + tree = ABTree(2, 3) + keys = [3, 1, 4, 5, 9, 2, 6, 8, 7, 0] + for key in keys: + tree.insert(key) + show(tree) + audit(tree) + assert tree.find(key), "Inserted key disappeared" + + for key in keys: + assert tree.find(key), "Some keys are missing at the end" + +def test_main(a, b, limit, num_items): + print("## Test: a={} b={} range={} num_items={}".format(a, b, limit, num_items)) + + tree = ABTree(a, b) + + # Insert keys + step = int(limit * 1.618) + key, audit_time = 1, 1 + for i in range(num_items): + tree.insert(key) + key = (key + step) % limit + + # Audit the tree occasionally + if i == audit_time or i + 1 == num_items: + audit(tree) + audit_time = int(audit_time * 1.33) + 1 + + # Check the content of the tree + key = 1 + for i in range(limit): + assert tree.find(key) == (i < num_items), "Tree contains wrong keys" + key = (key + step) % limit + +tests = [ + ("basic", test_basic), + ("small-2,3", lambda: test_main(2, 3, 997, 700)), + ("small-2,4", lambda: test_main(2, 4, 997, 700)), + ("big-2,3", lambda: test_main(2, 3, 99991, 70000)), + ("big-2,4", lambda: test_main(2, 4, 99991, 70000)), + ("big-10,20", lambda: test_main(10, 20, 99991, 70000)), + ("big-100,200", lambda: test_main(100, 200, 99991, 70000)), +] + +if __name__ == "__main__": + for required_test in sys.argv[1:] or [name for name, _ in tests]: + for name, test in tests: + if name == required_test: + print("Running test {}".format(name), file=sys.stderr) + test() + break + else: + raise ValueError("Unknown test {}".format(name)) diff --git a/04-ab_tree/task.md b/04-ab_tree/task.md new file mode 100644 index 0000000..d204f3b --- /dev/null +++ b/04-ab_tree/task.md @@ -0,0 +1,7 @@ +You are given a representation of _(a, b)-tree_ with a `find` operation, +and a representation of an _(a, b)-tree node_. + +Your goal is to implement an `insert` operation, which inserts the given +key in the tree (or does nothing if the key is already present). + +You should submit the `ab_tree.*` file (but not `ab_tree_test.*` files). -- GitLab