From 2f73b8bf62e0385b767a9104db7f469385382174 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Wed, 24 Mar 2021 00:41:26 +0100
Subject: [PATCH] (a,b)-tree operation & experiment
---
04-ab_tree/cpp/Makefile | 12 +
04-ab_tree/cpp/ab_tree.h | 135 ++++++++
04-ab_tree/cpp/ab_tree_test.cpp | 150 +++++++++
04-ab_tree/cpp/test_main.cpp | 43 +++
04-ab_tree/python/ab_tree.py | 64 ++++
04-ab_tree/python/ab_tree_test.py | 111 +++++++
04-ab_tree/task.md | 9 +
05-ab_experiment/cpp/Makefile | 22 ++
05-ab_experiment/cpp/ab_experiment.cpp | 389 +++++++++++++++++++++++
05-ab_experiment/cpp/random.h | 59 ++++
05-ab_experiment/python/Makefile | 15 +
05-ab_experiment/python/ab_experiment.py | 259 +++++++++++++++
05-ab_experiment/task.md | 82 +++++
13 files changed, 1350 insertions(+)
create mode 100644 04-ab_tree/cpp/Makefile
create mode 100644 04-ab_tree/cpp/ab_tree.h
create mode 100644 04-ab_tree/cpp/ab_tree_test.cpp
create mode 100644 04-ab_tree/cpp/test_main.cpp
create mode 100644 04-ab_tree/python/ab_tree.py
create mode 100644 04-ab_tree/python/ab_tree_test.py
create mode 100644 04-ab_tree/task.md
create mode 100644 05-ab_experiment/cpp/Makefile
create mode 100644 05-ab_experiment/cpp/ab_experiment.cpp
create mode 100644 05-ab_experiment/cpp/random.h
create mode 100644 05-ab_experiment/python/Makefile
create mode 100755 05-ab_experiment/python/ab_experiment.py
create mode 100644 05-ab_experiment/task.md
diff --git a/04-ab_tree/cpp/Makefile b/04-ab_tree/cpp/Makefile
new file mode 100644
index 0000000..e6ab228
--- /dev/null
+++ b/04-ab_tree/cpp/Makefile
@@ -0,0 +1,12 @@
+test: ab_tree_test
+ ./$<
+
+CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare
+
+ab_tree_test: ab_tree_test.cpp ab_tree.h test_main.cpp
+ $(CXX) $(CXXFLAGS) $^ -o $@
+
+clean:
+ rm -f ab_tree_test
+
+.PHONY: clean test
diff --git a/04-ab_tree/cpp/ab_tree.h b/04-ab_tree/cpp/ab_tree.h
new file mode 100644
index 0000000..e77374b
--- /dev/null
+++ b/04-ab_tree/cpp/ab_tree.h
@@ -0,0 +1,135 @@
+#include <limits>
+#include <vector>
+#include <tuple>
+#include <iostream>
+
+using namespace std;
+
+// If the condition is not true, report an error and halt.
+#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0)
+
+void expect_failed(const string& message);
+
+/*** One node ***/
+
+class ab_node {
+ public:
+ // Keys stored in this node and the corresponding children
+ // The vectors are large enough to accomodate one extra entry
+ // in overflowing nodes.
+ vector<ab_node *> children;
+ vector<int> keys;
+ ab_node *parent;
+
+ // If this node contains the given key, return true and set i to key's position.
+ // Otherwise return false and set i to the first key greater than the given one.
+ bool find_branch(int key, int &i)
+ {
+ i = 0;
+ while (i < keys.size() && keys[i] <= key) {
+ if (keys[i] == key)
+ return true;
+ i++;
+ }
+ return false;
+ }
+
+ // Insert a new key at posision i and add a new child between keys i and i+1.
+ void insert_branch(int i, int key, ab_node *child)
+ {
+ keys.insert(keys.begin() + i, key);
+ children.insert(children.begin() + i + 1, child);
+ }
+
+ // An auxiliary function for displaying a sub-tree under this node.
+ void show(int indent);
+};
+
+/*** Tree ***/
+
+class ab_tree {
+ public:
+ int a; // Minimum allowed number of children
+ int b; // Maximum allowed number of children
+ ab_node *root; // Root node (even a tree with no keys has a root)
+ int num_nodes; // We keep track of how many nodes the tree has
+
+ // Create a new node and return a pointer to it.
+ ab_node *new_node(ab_node* parent)
+ {
+ ab_node *n = new ab_node;
+ n->keys.reserve(b);
+ n->children.reserve(b+1);
+ n->parent = parent;
+ num_nodes++;
+ return n;
+ }
+
+ // Delete a given node, assuming that its children have been already unlinked.
+ void delete_node(ab_node *n)
+ {
+ num_nodes--;
+ delete n;
+ }
+
+ // Constructor: initialize an empty tree with just the root.
+ ab_tree(int a, int b)
+ {
+ EXPECT(a >= 2 && b >= 2*a - 1, "Invalid values of a,b");
+ this->a = a;
+ this->b = b;
+ num_nodes = 0;
+ // The root has no keys and one null child pointer.
+ root = new_node(nullptr);
+ root->children.push_back(nullptr);
+ }
+
+ // An auxiliary function for deleting a subtree recursively.
+ void delete_tree(ab_node *n)
+ {
+ for (int i=0; i < n->children.size(); i++)
+ if (n->children[i])
+ delete_tree(n->children[i]);
+ delete_node(n);
+ }
+
+ // Destructor: delete all nodes.
+ ~ab_tree()
+ {
+ delete_tree(root);
+ EXPECT(num_nodes == 0, "Memory leak detected: some nodes were not deleted");
+ }
+
+ // Find a key: returns true if it is present in the tree.
+ bool find(int key)
+ {
+ ab_node *n = root;
+ while (n) {
+ int i;
+ if (n->find_branch(key, i))
+ return true;
+ n = n->children[i];
+ }
+ return false;
+ }
+
+ // Display the tree on standard output in human-readable form.
+ void show();
+
+ // Check that the data structure satisfies all invariants.
+ void audit();
+
+ // Split the node into two nodes: move some children of n into
+ // a newly created node such that n contains exactly size children in the end.
+ // Return the new node and the key separating n and the new node.
+ virtual pair<ab_node*, int> split_node(ab_node* n, int size)
+ {
+ // FIXME: Implement
+ }
+
+ // Insert: add key to the tree (unless it was already present).
+ virtual void insert(int key)
+ {
+ // FIXME: Implement
+ }
+};
diff --git a/04-ab_tree/cpp/ab_tree_test.cpp b/04-ab_tree/cpp/ab_tree_test.cpp
new file mode 100644
index 0000000..d718d87
--- /dev/null
+++ b/04-ab_tree/cpp/ab_tree_test.cpp
@@ -0,0 +1,150 @@
+#include <functional>
+#include <cstdlib>
+#include <vector>
+
+#include "ab_tree.h"
+
+// Debugging output: showing trees prettily on standard output.
+
+void ab_tree::show()
+{
+ root->show(0);
+ for (int i=0; i<70; i++)
+ cout << '=';
+ cout << endl;
+}
+
+void ab_node::show(int indent)
+{
+ for (int i = children.size() - 1; i >= 0 ; i--) {
+ if (i < keys.size()) {
+ for (int j = 0; j < indent; j++)
+ cout << " ";
+ cout << keys[i] << endl;
+ }
+ if (children[i])
+ children[i]->show(indent+1);
+ }
+}
+
+// Invariant checks
+
+void audit_subtree(ab_tree *tree, ab_node *n, ab_node* parent, int key_min, int key_max, int depth, int &leaf_depth)
+{
+ if (!n) {
+ // Check that all leaves are on the same level.
+ if (leaf_depth < 0)
+ leaf_depth = depth;
+ else
+ EXPECT(depth == leaf_depth, "Leaves are not on the same level");
+ return;
+ }
+ // Check consistency of parent pointers
+ EXPECT(n->parent == parent, "Inconsistent parent pointers");
+
+ // The number of children must be in the allowed range.
+ if (depth > 0)
+ EXPECT(n->children.size() >= tree->a, "Too few children");
+ EXPECT(n->children.size() <= tree->b, "Too many children");
+
+ // We must have one more children than keys.
+ EXPECT(n->children.size() == n->keys.size() + 1, "Number of keys does not match number of children");
+
+ // Allow degenerate trees with 0 keys in the root.
+ if (n->children.size() == 1)
+ return;
+
+ // Check order of keys: they must be increasing and bounded by the keys on the higher levels.
+ for (int i = 0; i < n->keys.size(); i++) {
+ EXPECT(n->keys[i] >= key_min && n->keys[i] <= key_max, "Wrong key order");
+ EXPECT(i == 0 || n->keys[i-1] < n->keys[i], "Wrong key order");
+ }
+
+ // Call on children recursively.
+ for (int i = 0; i < n->children.size(); i++) {
+ int tmin, tmax;
+ if (i == 0)
+ tmin = key_min;
+ else
+ tmin = n->keys[i-1] + 1;
+ if (i < n->keys.size())
+ tmax = n->keys[i] - 1;
+ else
+ tmax = key_max;
+ audit_subtree(tree, n->children[i], n, tmin, tmax, depth+1, leaf_depth);
+ }
+}
+
+void ab_tree::audit()
+{
+ EXPECT(root, "Tree has no root");
+ int leaf_depth = -1;
+ audit_subtree(this, root, nullptr, numeric_limits<int>::min(), numeric_limits<int>::max(), 0, leaf_depth);
+}
+
+// A basic test: insert a couple of keys and show how the tree evolves.
+
+void test_basic()
+{
+ cout << "## Basic test" << endl;
+
+ ab_tree t(2, 3);
+ vector<int> keys = { 3, 1, 4, 5, 9, 2, 6, 8, 7, 0 };
+ for (int k : keys) {
+ t.insert(k);
+ t.show();
+ t.audit();
+ EXPECT(t.find(k), "Inserted key disappeared");
+ }
+
+ for (int k : keys)
+ EXPECT(t.find(k), "Some keys are missing at the end");
+}
+
+// The main test: inserting a lot of keys and checking that they are really there.
+// We will insert num_items keys from the set {1,...,range-1}, where range is a prime.
+
+void test_main(int a, int b, int range, int num_items)
+{
+ // Create a new tree.
+ cout << "## Test: a=" << a << " b=" << b << " range=" << range << " num_items=" << num_items << endl;
+ ab_tree t(a, b);
+
+ int key = 1;
+ int step = (int)(range * 1.618);
+ int audit_time = 1;
+
+ // Insert keys.
+ for (int i=1; i <= num_items; i++) {
+ t.insert(key);
+ // Audit the tree occasionally.
+ if (i == audit_time || i == num_items) {
+ // cout << "== Audit at " << i << endl;
+ // t.show();
+ t.audit();
+ audit_time = (int)(audit_time * 1.33) + 1;
+ }
+ key = (key + step) % range;
+ }
+
+ // Check that the tree contains exactly the items it should contain.
+ key = 1;
+ for (int i=1; i < range; i++) {
+ bool found = t.find(key);
+ // cout << "Step #" << i << ": find(" << key << ") = " << found << endl;
+ EXPECT(found == (i <= num_items), "Tree contains wrong keys");
+ key = (key + step) % range;
+ }
+}
+
+/*** A list of all tests ***/
+
+vector<pair<string, function<void()>>> tests = {
+ { "basic", [] { test_basic(); } },
+ { "small-2,3", [] { test_main(2, 3, 997, 700); } },
+ { "small-2,4", [] { test_main(2, 4, 997, 700); } },
+ { "big-2,3", [] { test_main(2, 3, 999983, 700000); } },
+ { "big-2,4", [] { test_main(2, 4, 999983, 700000); } },
+ { "big-10,20", [] { test_main(10, 20, 999983, 700000); } },
+ { "big-100,200", [] { test_main(100, 200, 999983, 700000); } },
+};
diff --git a/04-ab_tree/cpp/test_main.cpp b/04-ab_tree/cpp/test_main.cpp
new file mode 100644
index 0000000..3f4aff0
--- /dev/null
+++ b/04-ab_tree/cpp/test_main.cpp
@@ -0,0 +1,43 @@
+#include <cstdlib>
+#include <functional>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace std;
+
+extern vector<pair<string, function<void()>>> tests;
+
+void expect_failed(const string& message) {
+ cerr << "Test error: " << message << endl;
+ exit(1);
+}
+
+int main(int argc, char* argv[]) {
+ vector<string> required_tests;
+
+ if (argc > 1) {
+ required_tests.assign(argv + 1, argv + argc);
+ } else {
+ for (const auto& test : tests)
+ required_tests.push_back(test.first);
+ }
+
+ for (const auto& required_test : required_tests) {
+ bool found = false;
+ for (const auto& test : tests)
+ if (required_test == test.first) {
+ cerr << "Running test " << required_test << endl;
+ test.second();
+ found = true;
+ break;
+ }
+ if (!found) {
+ cerr << "Unknown test " << required_test << endl;
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/04-ab_tree/python/ab_tree.py b/04-ab_tree/python/ab_tree.py
new file mode 100644
index 0000000..baae16f
--- /dev/null
+++ b/04-ab_tree/python/ab_tree.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+
+class ABNode:
+ """Single node in an ABTree.
+
+ Each node contains keys and children
+ (with one more children than there are keys).
+ We also store a pointer to node's parent (None for root).
+ """
+ def __init__(self, keys = None, children = None, parent = None):
+ self.keys = keys if keys is not None else []
+ self.children = children if children is not None else []
+ self.parent = parent
+
+ def find_branch(self, key):
+ """ Try finding given key in this node.
+
+ If this node contains the given key, returns (True, key_position).
+ If not, returns (False, first_position_with_key_greater_than_the_given).
+ """
+ i = 0
+ while (i < len(self.keys) and self.keys[i] < key):
+ i += 1
+
+ return (i < len(self.keys) and self.keys[i] == key, i)
+
+ def insert_branch(self, i, key, child):
+ """ Insert a new key and a given child between keys i and i+1."""
+ self.keys.insert(i, key)
+ self.children.insert(i + 1, child)
+
+class ABTree:
+ """A class representing the whole ABTree."""
+ def __init__(self, a, b):
+ assert a >= 2 and b >= 2 * a - 1, "Invalid values of a, b: {}, {}".format(a, b)
+ self.a = a
+ self.b = b
+ self.root = ABNode(children=[None])
+
+ def find(self, key):
+ """Find a key in the tree.
+
+ Returns True if the key is present, False otherwise.
+ """
+ node = self.root
+ while node:
+ found, i = node.find_branch(key)
+ if found: return True
+ node = node.children[i]
+ return False
+
+ def split_node(self, node, size):
+ """Helper function for insert
+
+ Split node into two nodes such that original node contains first _size_ children.
+ Return new node and the key separating nodes.
+ """
+ # TODO: Implement and use in insert method
+ raise NotImplementedError
+
+ def insert(self, key):
+ """Add a given key to the tree, unless already present."""
+ # TODO: Implement
+ raise NotImplementedError
diff --git a/04-ab_tree/python/ab_tree_test.py b/04-ab_tree/python/ab_tree_test.py
new file mode 100644
index 0000000..110553c
--- /dev/null
+++ b/04-ab_tree/python/ab_tree_test.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+import math
+import sys
+
+from ab_tree import ABNode, ABTree
+
+def show(tree):
+ """Show a tree."""
+ def show_node(node, indent):
+ for i in reversed(range(len(node.children))):
+ if i < len(node.keys):
+ print(" " * indent, node.keys[i], sep="")
+ if node.children[i]:
+ show_node(node.children[i], indent + 1)
+
+ show_node(tree.root, 0)
+ print("=" * 70)
+
+def audit(tree):
+ """Invariant check for the given tree."""
+ def audit_node(node, parent, key_min, key_max, depth, leaf_depth):
+ if not node:
+ # Check that all leaves are on the same level.
+ if leaf_depth is None:
+ leaf_depth = depth
+ assert depth == leaf_depth, "Leaves are not on the same level"
+
+ else:
+ # Check consistency of parent pointers
+ assert node.parent == parent, "Inconsistent parent pointers"
+
+ # The number of children must be in the allowed range.
+ assert depth == 0 or len(node.children) >= tree.a, "Too few children"
+ assert len(node.children) <= tree.b, "Too many children"
+
+ # We must have one more children than keys
+ assert len(node.children) == len(node.keys) + 1, "Number of keys does not match number of children"
+
+ # Check that keys are increasing and in (key_min, key_max) range.
+ for i in range(len(node.keys)):
+ assert node.keys[i] > key_min and node.keys[i] < key_max, "Wrong key order"
+ assert i == 0 or node.keys[i - 1] < node.keys[i], "Wrong key order"
+
+ # Check children recursively
+ for i in range(len(node.children)):
+ child_min = node.keys[i - 1] if i > 0 else key_min
+ child_max = node.keys[i] if i < len(node.keys) else key_max
+ leaf_depth = audit_node(node.children[i], node, child_min, child_max, depth + 1, leaf_depth)
+
+ return leaf_depth
+
+ assert tree.root, "Tree has no root"
+ audit_node(tree.root, None, -math.inf, math.inf, 0, None)
+
+def test_basic():
+ """Insert a couple of keys and show how the tree evolves."""
+ print("## Basic test")
+
+ tree = ABTree(2, 3)
+ keys = [3, 1, 4, 5, 9, 2, 6, 8, 7, 0]
+ for key in keys:
+ tree.insert(key)
+ show(tree)
+ audit(tree)
+ assert tree.find(key), "Inserted key disappeared"
+
+ for key in keys:
+ assert tree.find(key), "Some keys are missing at the end"
+
+def test_main(a, b, limit, num_items):
+ print("## Test: a={} b={} range={} num_items={}".format(a, b, limit, num_items))
+
+ tree = ABTree(a, b)
+
+ # Insert keys
+ step = int(limit * 1.618)
+ key, audit_time = 1, 1
+ for i in range(num_items):
+ tree.insert(key)
+ key = (key + step) % limit
+
+ # Audit the tree occasionally
+ if i == audit_time or i + 1 == num_items:
+ audit(tree)
+ audit_time = int(audit_time * 1.33) + 1
+
+ # Check the content of the tree
+ key = 1
+ for i in range(limit):
+ assert tree.find(key) == (i < num_items), "Tree contains wrong keys"
+ key = (key + step) % limit
+
+tests = [
+ ("basic", test_basic),
+ ("small-2,3", lambda: test_main(2, 3, 997, 700)),
+ ("small-2,4", lambda: test_main(2, 4, 997, 700)),
+ ("big-2,3", lambda: test_main(2, 3, 99991, 70000)),
+ ("big-2,4", lambda: test_main(2, 4, 99991, 70000)),
+ ("big-10,20", lambda: test_main(10, 20, 99991, 70000)),
+ ("big-100,200", lambda: test_main(100, 200, 99991, 70000)),
+]
+
+if __name__ == "__main__":
+ for required_test in sys.argv[1:] or [name for name, _ in tests]:
+ for name, test in tests:
+ if name == required_test:
+ print("Running test {}".format(name), file=sys.stderr)
+ test()
+ break
+ else:
+ raise ValueError("Unknown test {}".format(name))
diff --git a/04-ab_tree/task.md b/04-ab_tree/task.md
new file mode 100644
index 0000000..a0444d9
--- /dev/null
+++ b/04-ab_tree/task.md
@@ -0,0 +1,9 @@
+You are given a representation of _(a, b)-tree_ with a `find` operation,
+and a representation of an _(a, b)-tree node_.
+
+Your goal is to implement an `insert` operation, which inserts the given
+key in the tree (or does nothing if the key is already present). Preferably,
+you should also implement `split_node` method and use it properly in
+your `insert` implementation.
+
+You should submit the `ab_tree.*` file (but not `ab_tree_test.*` files).
diff --git a/05-ab_experiment/cpp/Makefile b/05-ab_experiment/cpp/Makefile
new file mode 100644
index 0000000..967fad4
--- /dev/null
+++ b/05-ab_experiment/cpp/Makefile
@@ -0,0 +1,22 @@
+STUDENT_ID ?= PLEASE_SET_STUDENT_ID
+
+.PHONY: test
+test: ab_experiment
+ @rm -rf out && mkdir out
+ @for test in insert min random ; do \
+ for mode in '2-3' '2-4' ; do \
+ echo t-$$test-$$mode ; \
+ ./ab_experiment $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \
+ done ; \
+ done
+
+INCLUDE ?= .
+CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
+
+ab_experiment: ab_tree.h ab_experiment.cpp $(INCLUDE)/random.h
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ -o $@
+
+.PHONY: clean
+clean::
+ rm -f ab_experiment
+ rm -rf out
diff --git a/05-ab_experiment/cpp/ab_experiment.cpp b/05-ab_experiment/cpp/ab_experiment.cpp
new file mode 100644
index 0000000..31b520e
--- /dev/null
+++ b/05-ab_experiment/cpp/ab_experiment.cpp
@@ -0,0 +1,389 @@
+#include <algorithm>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+#include <iostream>
+#include <cmath>
+
+#include "ab_tree.h"
+#include "random.h"
+
+using namespace std;
+
+void expect_failed(const string& message) {
+ cerr << "Test error: " << message << endl;
+ exit(1);
+}
+
+/*
+ * A modified Splay tree for benchmarking.
+ *
+ * We inherit the implementation of operations from the Tree class
+ * and extend it by keeping statistics on the number of splay operations
+ * and the total number of rotations. Also, if naive is turned on,
+ * splay uses only single rotations.
+ *
+ * Please make sure that your Tree class defines the rotate() and splay()
+ * methods as virtual.
+ */
+
+class BenchmarkingABTree : public ab_tree {
+public:
+ int num_operations;
+ int num_struct_changes;
+
+ BenchmarkingABTree(int a, int b) : ab_tree(a,b)
+ {
+ reset();
+ }
+
+ void reset()
+ {
+ num_operations = 0;
+ num_struct_changes = 0;
+ }
+
+ pair<ab_node*, int> split_node(ab_node *node, int size) override
+ {
+ num_struct_changes++;
+ return ab_tree::split_node(node, size);
+ }
+
+ void insert(int key) override
+ {
+ num_operations++;
+ ab_tree::insert(key);
+ }
+
+ // Return the average number of rotations per operation.
+ double struct_changes_per_op()
+ {
+ if (num_operations > 0)
+ return (double) num_struct_changes / num_operations;
+ else
+ return 0;
+ }
+
+ // Delete key from the tree. Does nothing if the key is not in the tree.
+ void remove(int key){
+ num_operations += 1;
+
+ // Find the key to be deleted
+ ab_node *node = root;
+ int i;
+ bool found = node->find_branch(key, i);
+ while(!found){
+ node = node->children[i];
+ if (!node) return; // Key is not in the tree
+ found = node->find_branch(key, i);
+ }
+
+ // If node is not a leaf, we need to swap the key with its successor
+ if (node->children[0] != nullptr){ // Only leaves have nullptr as children
+ // Successor is leftmost key in the right subtree of key
+ ab_node *succ = min(node->children[i+1]);
+ swap(node->keys[i], succ->keys[0]);
+ node = succ;
+ }
+
+ // Now run the main part of the delete
+ remove_leaf(key, node);
+ }
+
+private:
+ // Main part of the remove
+ void remove_leaf(int key, ab_node* node)
+ {
+ EXPECT(node != nullptr, "Trying to delete key from nullptr");
+ EXPECT(node->children[0] == nullptr, "Leaf's child must be nullptr");
+
+ while(1){
+ // Find the key in the node
+ int key_position;
+ bool found = node->find_branch(key, key_position);
+ EXPECT(found, "Trying to delete key that is not in the node.");
+
+ // Start with the deleting itself
+ node->keys.erase(node->keys.cbegin() + key_position);
+ node->children.erase(node->children.cbegin() + key_position + 1);
+
+ // No underflow means we are done
+ if (node->children.size() >= a) return;
+
+ // Root may underflow, but cannot have just one child (unless tree is empty)
+ if (node == root){
+ if ((node->children.size() == 1) && (root->children[0] != nullptr)){
+ ab_node *old_root = root;
+ root = root->children[0];
+ root->parent = nullptr;
+ delete_node(old_root);
+ }
+ return;
+ }
+
+ ab_node *brother;
+ int separating_key_pos;
+ bool tmp;
+ tie(brother, separating_key_pos, tmp) = get_brother(node);
+ int separating_key = node->parent->keys[separating_key_pos];
+
+ // First check whether we can steal brother's child
+ if (brother->children.size() > a){
+ steal_child(node);
+ return;
+ }
+
+ // If the brother is too small, we merge with him and propagate the delete
+ node = merge_node(node);
+ node = node->parent;
+ key = separating_key;
+ key_position = separating_key_pos;
+ }
+ }
+
+ // Return the leftmost node of a subtree rooted at node.
+ ab_node* min(ab_node *node)
+ {
+ EXPECT(node != nullptr, "Trying to search for minimum of nullptr");
+ while (node->children[0]) {
+ node = node->children[0];
+ }
+ return node;
+ }
+
+ // Return the left brother if it exists, otherwise return right brother.
+ // Returns tuple (brother, key_position, is_left_brother), where
+ // key_position is a position of the key that separates node and brother in their parent.
+ tuple<ab_node*, int, bool> get_brother(ab_node* node)
+ {
+ ab_node *parent = node->parent;
+ EXPECT(parent != nullptr, "Node without parent has no brother");
+
+ // Find node in parent's child list
+ int i;
+ for(i = 0; i < parent->children.size(); ++i){
+ ab_node *c = parent->children[i];
+ if (c == node) break;
+ }
+ EXPECT(i < parent->children.size(), "Node is not inside its parent");
+
+ if (i == 0){
+ return make_tuple(parent->children[1], 0, false);
+ }
+ else{
+ return make_tuple(parent->children[i - 1], i - 1, true);
+ }
+ }
+
+ // Transfer one child from node's left brother to the node.
+ // If node has no left brother, use right brother instead.
+ void steal_child(ab_node* node)
+ {
+ ab_node *brother;
+ int separating_key_pos;
+ bool is_left_brother;
+ tie(brother, separating_key_pos, is_left_brother) = get_brother(node);
+ int separating_key = node->parent->keys[separating_key_pos];
+
+ EXPECT(brother->children.size() > a, "Stealing child causes underflow in brother!");
+ EXPECT(node->children.size() < b, "Stealing child causes overflow in the node!");
+
+ // We steal either from front or back
+ int steal_position, target_position;
+ if (is_left_brother){
+ steal_position = brother->children.size()-1;
+ target_position = 0;
+ }
+ else{
+ steal_position = 0;
+ target_position = node->children.size();
+ }
+ // Steal the child
+ ab_node *stolen_child = brother->children[steal_position];
+ if (stolen_child != nullptr){
+ stolen_child->parent = node;
+ }
+ node->children.insert(node->children.cbegin() + target_position, stolen_child);
+ brother->children.erase(brother->children.cbegin() + steal_position);
+
+ // List of keys is shorter than list of children
+ if (is_left_brother) steal_position -= 1;
+ else target_position -= 1;
+
+ // Update keys
+ node->keys.insert(node->keys.cbegin() + target_position, separating_key);
+ node->parent->keys[separating_key_pos] = brother->keys[steal_position];
+ brother->keys.erase(brother->keys.cbegin() + steal_position);
+ }
+
+public:
+ // Merge node with its left brother and destroy the node. Must not cause overflow!
+ // Returns result of the merge.
+ // If node has no left brother, use right brother instead.
+ ab_node* merge_node(ab_node* node){
+ num_struct_changes += 1;
+
+ ab_node *brother;
+ int separating_key_pos;
+ bool is_left_brother;
+ tie(brother, separating_key_pos, is_left_brother) = get_brother(node);
+ int separating_key = node->parent->keys[separating_key_pos];
+
+ // We swap brother and node if necessary so that the node is always on the right
+ if (!is_left_brother) swap(brother, node);
+
+ for (auto c: node->children)
+ brother->children.push_back(c);
+ brother->keys.push_back(separating_key);
+ for (auto k: node->keys)
+ brother->keys.push_back(k);
+
+ EXPECT(brother->children.size() <= b, "Merge caused overflow!");
+
+ // Update parent pointers in non-leaf
+ if (brother->children[0] != nullptr){
+ for (auto c : brother->children)
+ c->parent = brother;
+ }
+
+ delete_node(node);
+ return brother;
+ }
+};
+
+int a, b;
+RandomGen *rng; // Random generator object
+
+// An auxiliary function for generating a random permutation.
+vector<int> random_permutation(int n)
+{
+ vector<int> perm;
+ for (int i=0; i<n; i++)
+ perm.push_back(i);
+ for (int i=0; i<n-1; i++)
+ swap(perm[i], perm[i + rng->next_range(n-i)]);
+ return perm;
+}
+
+void test_insert()
+{
+ for (int e=32; e<=64; e++) {
+ int n = (int) pow(2, e/4.);
+ BenchmarkingABTree tree = BenchmarkingABTree(a,b);
+
+ vector<int> perm = random_permutation(n);
+ for (int x : perm)
+ tree.insert(x);
+
+ cout << n << " " << tree.struct_changes_per_op() << endl;
+ }
+}
+
+void test_random()
+{
+ for (int e=32; e<=64; e++) {
+ int n = (int) pow(2, e/4.);
+ BenchmarkingABTree tree = BenchmarkingABTree(a,b);
+
+ // We keep track of elements present and not present in the tree
+ vector<int> elems;
+ vector<int> anti_elems;
+ elems.reserve(n);
+ anti_elems.reserve(n+1);
+
+ for (int x = 0; x < 2*n; x+=2){
+ tree.insert(x);
+ elems.push_back(x);
+ }
+
+ for (int i = -1; i <2*n + 1; i+=2)
+ anti_elems.push_back(i);
+
+ for (int i=0; i<n; i++){
+ int r, x;
+ // Delete random element
+ r = rng->next_range(elems.size());
+ x = elems[r];
+ tree.remove(x);
+ elems.erase(elems.cbegin() + r);
+ anti_elems.push_back(x);
+
+ // Insert random "anti-element"
+ r = rng->next_range(anti_elems.size());
+ x = anti_elems[r];
+ tree.insert(x);
+ elems.push_back(x);
+ anti_elems.erase(anti_elems.cbegin() + r);
+ }
+
+ cout << n << " " << tree.struct_changes_per_op() << endl;
+ }
+}
+
+void test_min()
+{
+ for (int e=32; e<=64; e++) {
+ int n = (int) pow(2, e/4.);
+ BenchmarkingABTree tree = BenchmarkingABTree(a,b);
+
+ for (int x = 0; x < n; x++)
+ tree.insert(x);
+
+ for (int i=0; i<n; i++){
+ tree.remove(0);
+ tree.insert(0);
+ }
+
+ cout << n << " " << tree.struct_changes_per_op() << endl;
+ }
+}
+
+vector<pair<string, function<void()>>> tests = {
+ { "insert", test_insert },
+ { "random", test_random },
+ { "min", test_min },
+};
+
+int main(int argc, char **argv)
+{
+ if (argc != 4) {
+ cerr << "Usage: " << argv[0] << " <test> <student-id> (2-3|2-4)" << endl;
+ return 1;
+ }
+
+ string which_test = argv[1];
+ string id_str = argv[2];
+ string mode = argv[3];
+
+ try {
+ rng = new RandomGen(stoi(id_str));
+ } catch (...) {
+ cerr << "Invalid student ID" << endl;
+ return 1;
+ }
+
+ a = 2;
+ if (mode == "2-3")
+ b = 3;
+ else if (mode == "2-4")
+ b = 4;
+ else
+ {
+ cerr << "Last argument must be either '2-3' or '2-4'" << endl;
+ return 1;
+ }
+
+ for (const auto& test : tests) {
+ if (test.first == which_test)
+ {
+ cout.precision(12);
+ test.second();
+ return 0;
+ }
+ }
+ cerr << "Unknown test " << which_test << endl;
+ return 1;
+
+ return 0;
+}
diff --git a/05-ab_experiment/cpp/random.h b/05-ab_experiment/cpp/random.h
new file mode 100644
index 0000000..7d18ab6
--- /dev/null
+++ b/05-ab_experiment/cpp/random.h
@@ -0,0 +1,59 @@
+#define DS1_RANDOM_H
+
+#include <cstdint>
+
+/*
+ * This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
+ * and Sebastiano Vigna, distributed under the CC-0 license. For more details,
+ * see http://vigna.di.unimi.it/xorshift/.
+ *
+ * Rewritten to C++ by Martin Mares, also placed under CC-0.
+ */
+
+class RandomGen {
+ uint64_t state[2];
+
+ uint64_t rotl(uint64_t x, int k)
+ {
+ return (x << k) | (x >> (64 - k));
+ }
+
+ public:
+ // Initialize the generator, set its seed and warm it up.
+ RandomGen(unsigned int seed)
+ {
+ state[0] = seed * 0xdeadbeef;
+ state[1] = seed ^ 0xc0de1234;
+ for (int i=0; i<100; i++)
+ next_u64();
+ }
+
+ // Generate a random 64-bit number.
+ uint64_t next_u64(void)
+ {
+ uint64_t s0 = state[0], s1 = state[1];
+ uint64_t result = s0 + s1;
+ s1 ^= s0;
+ state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
+ state[1] = rotl(s1, 36);
+ return result;
+ }
+
+ // Generate a random 32-bit number.
+ uint32_t next_u32(void)
+ {
+ return next_u64() >> 11;
+ }
+
+ // Generate a number between 0 and range-1.
+ unsigned int next_range(unsigned int range)
+ {
+ /*
+ * This is not perfectly uniform, unless the range is a power of two.
+ * However, for 64-bit random values and 32-bit ranges, the bias is
+ * insignificant.
+ */
+ return next_u64() % range;
+ }
+};
+
diff --git a/05-ab_experiment/python/Makefile b/05-ab_experiment/python/Makefile
new file mode 100644
index 0000000..48e36ae
--- /dev/null
+++ b/05-ab_experiment/python/Makefile
@@ -0,0 +1,15 @@
+STUDENT_ID ?= PLEASE_SET_STUDENT_ID
+
+.PHONY: test
+test: ab_experiment.py ab_tree.py
+ @rm -rf out && mkdir out
+ @for test in insert min random ; do \
+ for mode in '2-3' '2-4' ; do \
+ echo t-$$test-$$mode ; \
+ ./ab_experiment.py $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \
+ done ; \
+ done
+
+.PHONY: clean
+clean::
+ rm -rf out __pycache__
diff --git a/05-ab_experiment/python/ab_experiment.py b/05-ab_experiment/python/ab_experiment.py
new file mode 100755
index 0000000..bc116e3
--- /dev/null
+++ b/05-ab_experiment/python/ab_experiment.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+
+import sys
+import random
+
+from ab_tree import ABTree
+
+class BenchmarkingABTree(ABTree):
+ """A modified ABTree for benchmarking.
+
+ We inherit the implementation of operations from the ABTree class
+ and extend it by delete operation and by keeping statistics on the number
+ of operations and the total number of structural changes.
+ """
+ def __init__(self, a, b):
+ ABTree.__init__(self, a, b)
+ self.reset()
+
+ def reset(self):
+ """ Reset statistics """
+ self.num_operations = 0
+ self.num_struct_changes = 0
+
+ def struct_changes_per_op(self):
+ """Return the average number of struct. changes per operation."""
+ if self.num_operations > 0:
+ return self.num_struct_changes / self.num_operations
+ else:
+ return 0
+
+ def insert(self, key):
+ self.num_operations += 1
+ ABTree.insert(self, key)
+
+ def split_node(self, node, size):
+ self.num_struct_changes += 1
+ return ABTree.split_node(self, node, size)
+
+ def remove(self, key):
+ """ Delete key from the tree. Does nothing if the key is not in the tree. """
+ self.num_operations += 1
+
+ # Find the key to be deleted
+ node = self.root
+ found, i = node.find_branch(key)
+ while not found:
+ node = node.children[i]
+ if not node: return # Key is not in the tree
+ found, i = node.find_branch(key)
+
+ # If node is not a leaf, we need to swap the key with its successor
+ if node.children[0] is not None: # Only leaves have None as children
+ # Successor is leftmost key in the right subtree of key
+ succ = self._min(node.children[i+1])
+ node.keys[i], succ.keys[0] = succ.keys[0], node.keys[i]
+ node = succ
+
+ # Now run the main part of the delete
+ self._remove_leaf(key, node)
+
+ def _remove_leaf(self, key, node):
+ """ Main part of the delete.
+ """
+ assert node is not None, "Trying to delete key from None"
+ assert node.children[0] is None, "Leaf's child must be None"
+
+ while True:
+ # Find the key in the node
+ found, key_position = node.find_branch(key)
+ assert found, "Trying to delete key that is not in the node."
+
+ # Start with the deleting itself
+ del node.keys[key_position]
+ del node.children[key_position + 1]
+
+ # No underflow means we are done
+ if len(node.children) >= self.a: return
+
+ # Root may underflow, but cannot have just one child (unless tree is empty)
+ if node == self.root:
+ if (len(node.children) == 1) and (self.root.children[0] is not None):
+ self.root = self.root.children[0]
+ self.root.parent = None
+ return
+
+ brother, separating_key_pos, _ = self._get_brother(node)
+ separating_key = node.parent.keys[separating_key_pos]
+
+ # First check whether we can steal brother's child
+ if len(brother.children) > self.a:
+ self._steal_child(node)
+ return
+
+ # If the brother is too small, we merge with him and propagate the delete
+ node = self.merge_node(node)
+ node, key, key_position = node.parent, separating_key, separating_key_pos
+
+ def _min(self, node):
+ """ Return the leftmost node of a subtree rooted at node."""
+ assert node is not None
+ while node.children[0] is not None:
+ node = node.children[0]
+ return node
+
+ def _get_brother(self, node):
+ """ Return the left brother if it exists, otherwise return right brother.
+ returns tuple (brother, key_position, is_left_brother), where
+ key_position is a position of the key that separates node and brother in their parent.
+ """
+ parent = node.parent
+ assert parent is not None, "Node without parent has no brother"
+
+ # Find node in parent's child list
+ i = 0
+ for c in parent.children:
+ if c is node: break
+ else: i += 1
+ assert i < len(parent.children), "Node is not inside its parent"
+
+ if i == 0:
+ return parent.children[1], 0, False
+ else:
+ return parent.children[i - 1], i - 1, True
+
+ def _steal_child(self, node):
+ """ Transfer one child from node's left brother to the node.
+ If node has no left brother, use right brother instead.
+ """
+ brother, separating_key_pos, is_left_brother = self._get_brother(node)
+ separating_key = node.parent.keys[separating_key_pos]
+
+ assert len(brother.children) > self.a, "Stealing child causes underflow in brother!"
+ assert len(node.children) < self.b, "Stealing child causes overflow in the node!"
+
+ # We steal either from front or back
+ if is_left_brother:
+ steal_position = len(brother.children)-1
+ target_position = 0
+ else:
+ steal_position = 0
+ target_position = len(node.children)
+ # Steal the child
+ stolen_child = brother.children[steal_position]
+ if stolen_child is not None:
+ stolen_child.parent = node
+ node.children.insert(target_position, stolen_child)
+ del brother.children[steal_position]
+
+ # List of keys is shorter than list of children
+ if is_left_brother:
+ steal_position -= 1
+ else:
+ target_position -= 1
+ # Update keys
+ node.keys.insert(target_position, separating_key)
+ node.parent.keys[separating_key_pos] = brother.keys[steal_position]
+ del brother.keys[steal_position]
+
+ def merge_node(self, node):
+ """ Merge node with its left brother and destroy the node. Must not cause overflow!
+
+ Returns result of the merge.
+ If node has no left brother, use right brother instead.
+ """
+ self.num_struct_changes += 1
+
+ brother, separating_key_pos, is_left_brother = self._get_brother(node)
+ separating_key = node.parent.keys[separating_key_pos]
+
+ # We swap brother and node if necessary so that the node is always on the right
+ if not is_left_brother:
+ brother, node = node, brother
+
+ brother.children.extend(node.children)
+ brother.keys.append(separating_key)
+ brother.keys.extend(node.keys)
+
+ assert len(brother.children) <= self.b, "Merge caused overflow!"
+
+ # Update parent pointers in non-leaf
+ if brother.children[0] is not None:
+ for c in brother.children:
+ c.parent = brother
+ return brother
+
+def test_insert():
+ for exp in range(32, 64):
+ n = int(2**(exp/4))
+ tree = BenchmarkingABTree(a, b)
+
+ for elem in random.sample(range(n), n):
+ tree.insert(elem)
+
+ print(n, tree.struct_changes_per_op())
+
+def test_random():
+ for exp in range(32, 64):
+ n = int(2**(exp/4))
+ tree = BenchmarkingABTree(a, b)
+
+ for elem in range(0, 2*n, 2):
+ tree.insert(elem)
+
+ # We keep track of elements present and not present in the tree
+ elems = list(range(0, n, 2))
+ anti_elems = list(range(-1, 2*n+1, 2))
+
+ for _ in range(n):
+ # Delete random element
+ elem = random.choice(elems)
+ tree.remove(elem)
+ elems.remove(elem)
+ anti_elems.append(elem)
+
+ # Insert random "anti-element"
+ elem = random.choice(anti_elems)
+ tree.insert(elem)
+ elems.append(elem)
+ anti_elems.remove(elem)
+
+ print(n, tree.struct_changes_per_op())
+
+def test_min():
+ for exp in range(32, 64):
+ n = int(2 ** (exp / 4))
+ tree = BenchmarkingABTree(a, b)
+
+ for i in range(n):
+ tree.insert(i)
+
+ for _ in range(n):
+ tree.remove(0)
+ tree.insert(0)
+
+ print(n, tree.struct_changes_per_op())
+
+tests = {
+ "min": test_min,
+ "insert": test_insert,
+ "random": test_random,
+}
+
+if __name__ == '__main__':
+ if len(sys.argv) == 4:
+ test, student_id = sys.argv[1], sys.argv[2]
+ a = 2
+ if sys.argv[3] == "2-3":
+ b = 3
+ elif sys.argv[3] == "2-4":
+ b = 4
+ else:
+ raise ValueError("Last argument must be either '2-3' or '2-4'")
+ random.seed(student_id)
+ if test in tests:
+ tests[test]()
+ else:
+ raise ValueError("Unknown test {}".format(test))
+ else:
+ raise ValueError("Usage: {} <test> <student-id> (2-3|2-4)".format(sys.argv[0]))
diff --git a/05-ab_experiment/task.md b/05-ab_experiment/task.md
new file mode 100644
index 0000000..52f634b
--- /dev/null
+++ b/05-ab_experiment/task.md
@@ -0,0 +1,82 @@
+## Goal
+
+The goal of this assignment is to evaluate your implementation of (a,b)-trees
+experimentally and compare performance of (2,3) and (2,4)-trees.
+
+You are given a test program (`ab_experiment`) which is used to evaluate your
+implementation of the previous assignment. The test program auguments your implementation
+by implementing a `remove` method and it performs the following experiments:
+
+- _Insert test:_ Insert _n_ elements sequentially and then repeatedly
+ find them all in sequential order.
+- _Min test:_ Insert _n_ elements sequentially and then _n_ times repeat: remove the minimal
+ element in the tree and then insert it back.
+- _Random test:_ Insert _n_ elements sequentially and then _n_ times repeat: remove random
+ element from the tree and then insert random element into the tree. Removed element is
+ always present in the tree and inserted element is always *not* present in the tree.
+
+
+The program tries each experiment with different values of _n_. In each try,
+it prints the average number of _structural changes_ per operation. Structural change is
+either a node split (in insert) or merging of two nodes (in delete).
+
+You should perform these experiments and write a report, which contains the following
+plots of the measured data. Each plot should show the dependence of the average
+number of structural changes on the set size _n_.
+
+- The insert test: one curve for (2,3) tree, one for (2,4) tree.
+- The min test: one curve for (2,3) tree, one for (2,4) tree.
+- The random test: one curve for (2,3) tree, one for (2,4) tree.
+
+The report should discuss the experimental results and try to explain the observed
+behavior using theory from the lectures. (If you want, you can carry out further
+experiments to gain better understanding of the data structure and include these
+in the report. This is strictly optional.)
+
+You should submit a PDF file with the report (and no source code).
+You will get 1 temporary point upon submission if the file is syntantically correct;
+proper points will be assigned later.
+
+## Test program
+
+The test program is given three arguments:
+- The name of the test (`insert`, `min`, `random`).
+- The random seed: you should use the last 2 digits of your student ID (you can find
+ it in the Study Information System – just click on the Personal data icon). Please
+ include the random seed in your report.
+- The type of the tree to test (`2-3` or `2-4`).
+
+The output of the program contains one line per experiment, which consists of _n_ and the
+average number of structural changes.
+
+## Your implementation
+
+Please use your implementation from the previous exercise. Methods `split_node(...)`
+and `insert()` will be augmented by the test program. If you are performing
+a node splits directly instead of using `split_node(...)` method, you
+need to adjust the `BenchmarkingABTree` class accordingly.
+
+## Hints
+
+The following tools can be useful for producing nice plots:
+- [pandas](https://pandas.pydata.org/)
+- [matplotlib](https://matplotlib.org/)
+- [gnuplot](http://www.gnuplot.info/)
+
+A quick checklist for plots:
+- Is there a caption explaining what is plotted?
+- Are the axes clearly labelled? Do they have value ranges and units?
+- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs
+ are more fitting in some cases, but you should tell.)
+- Is it clear which curve means what?
+- Is it clear what are the measured points and what is an interpolated
+ curve between them?
+- Are there any overlaps? (E.g., the most interesting part of the curve
+ hidden underneath a label?)
+
+In your discussion, please distinguish the following kinds of claims.
+It should be always clear which is which:
+- Experimental results (i.e., the raw data you obtained from the experiments)
+- Theoretical facts (i.e., claims we have proved mathematically)
+- Your hypotheses (e.g., when you claim that the graph looks like something is true,
+ but you are not able to prove rigorously that it always holds)
--
GitLab