From c3d78d81f9682b2f7364bf903f85d467241983d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mare=C4=8Dek?= <marecek@ufal.mff.cuni.cz> Date: Tue, 19 Mar 2024 09:07:13 +0100 Subject: [PATCH] ab-tree --- 04-ab_tree/cpp/Makefile | 12 ++ 04-ab_tree/cpp/ab_tree.h | 184 ++++++++++++++++++++++++++++++ 04-ab_tree/cpp/ab_tree_test.cpp | 177 ++++++++++++++++++++++++++++ 04-ab_tree/cpp/test_main.cpp | 43 +++++++ 04-ab_tree/python/ab_tree.py | 100 ++++++++++++++++ 04-ab_tree/python/ab_tree_test.py | 135 ++++++++++++++++++++++ 04-ab_tree/task.md | 15 +++ 7 files changed, 666 insertions(+) create mode 100644 04-ab_tree/cpp/Makefile create mode 100644 04-ab_tree/cpp/ab_tree.h create mode 100644 04-ab_tree/cpp/ab_tree_test.cpp create mode 100644 04-ab_tree/cpp/test_main.cpp create mode 100644 04-ab_tree/python/ab_tree.py create mode 100644 04-ab_tree/python/ab_tree_test.py create mode 100644 04-ab_tree/task.md diff --git a/04-ab_tree/cpp/Makefile b/04-ab_tree/cpp/Makefile new file mode 100644 index 0000000..e6ab228 --- /dev/null +++ b/04-ab_tree/cpp/Makefile @@ -0,0 +1,12 @@ +test: ab_tree_test + ./$< + +CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare + +ab_tree_test: ab_tree_test.cpp ab_tree.h test_main.cpp + $(CXX) $(CXXFLAGS) $^ -o $@ + +clean: + rm -f ab_tree_test + +.PHONY: clean test diff --git a/04-ab_tree/cpp/ab_tree.h b/04-ab_tree/cpp/ab_tree.h new file mode 100644 index 0000000..4668098 --- /dev/null +++ b/04-ab_tree/cpp/ab_tree.h @@ -0,0 +1,184 @@ +#include <limits> +#include <vector> +#include <tuple> +#include <iostream> + +using namespace std; + +// If the condition is not true, report an error and halt. +#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0) + +void expect_failed(const string& message); + +/*** One node ***/ + +class ab_node { + public: + // Keys stored in this node and the corresponding children + // The vectors are large enough to accommodate one extra entry + // in overflowing nodes. + vector<ab_node *> children; + vector<int> keys; + ab_node *parent; + + // If this node contains the given key, return true and set i to key's position. + // Otherwise return false and set i to the first key greater than the given one. + bool find_branch(int key, int &i) + { + i = 0; + while (i < keys.size() && keys[i] <= key) { + if (keys[i] == key) + return true; + i++; + } + return false; + } + + // Insert a new key at position i and add a new child between keys i and i+1. + void insert_branch(int i, int key, ab_node *child) + { + keys.insert(keys.begin() + i, key); + children.insert(children.begin() + i + 1, child); + } + + // An auxiliary function for displaying a sub-tree under this node. + void show(int indent); +}; + +/*** Tree ***/ + +class ab_tree { + public: + int a; // Minimum allowed number of children + int b; // Maximum allowed number of children + ab_node *root; // Root node (even a tree with no keys has a root) + int num_nodes; // We keep track of how many nodes the tree has + + // Create a new node and return a pointer to it. + ab_node *new_node(ab_node* parent) + { + ab_node *n = new ab_node; + n->keys.reserve(b); + n->children.reserve(b+1); + n->parent = parent; + num_nodes++; + return n; + } + + // Delete a given node, assuming that its children have been already unlinked. + void delete_node(ab_node *n) + { + num_nodes--; + delete n; + } + + // Constructor: initialize an empty tree with just the root. + ab_tree(int a, int b) + { + EXPECT(a >= 2 && b >= 2*a - 1, "Invalid values of a,b"); + this->a = a; + this->b = b; + num_nodes = 0; + // The root has no keys and one null child pointer. + root = new_node(nullptr); + root->children.push_back(nullptr); + } + + // An auxiliary function for deleting a subtree recursively. + void delete_tree(ab_node *n) + { + for (int i=0; i < n->children.size(); i++) + if (n->children[i]) + delete_tree(n->children[i]); + delete_node(n); + } + + // Destructor: delete all nodes. + ~ab_tree() + { + delete_tree(root); + EXPECT(num_nodes == 0, "Memory leak detected: some nodes were not deleted"); + } + + // Find a key: returns true if it is present in the tree. + bool find(int key) + { + ab_node *n = root; + while (n) { + int i; + if (n->find_branch(key, i)) + return true; + n = n->children[i]; + } + return false; + } + + // Delete the smallest element. + void delete_min() { + ab_node *node = root; + while (node->children[0]) + node = node->children[0]; + + node->children.erase(node->children.begin()); + node->keys.erase(node->keys.begin()); + + while (node->children.size() < a && node->parent) { + node = node->parent; + ab_node *first = node->children[0], *second = node->children[1]; + + // Merge the second to the first + if (second->children.size() == a) { + for (auto &c : second->children) { + first->children.push_back(c); + if (c) + c->parent = first; + } + node->children.erase(node->children.begin()+1); + + first->keys.push_back(node->keys[0]); + node->keys.erase(node->keys.begin()); + for (auto &k : second->keys) + first->keys.push_back(k); + + delete_node(second); + } + + // Move the leftest child of the second to the first + else { + second->children[0]->parent = first; + first->children.push_back(second->children[0]); + second->children.erase(second->children.begin()); + + first->keys.push_back(node->keys[0]); + node->keys[0] = second->keys[0]; + second->keys.erase(second->keys.begin()); + } + } + + if (node->children.size() == 1) { + node->children[0]->parent = nullptr; + root = node->children[0]; + delete_node(node); + } + } + + // Display the tree on standard output in human-readable form. + void show(); + + // Check that the data structure satisfies all invariants. + void audit(); + + // Split the node into two nodes: move some children of n into + // a newly created node such that n contains exactly size children in the end. + // Return the new node and the key separating n and the new node. + virtual pair<ab_node*, int> split_node(ab_node* n, int size) + { + // FIXME: Implement + } + + // Insert: add key to the tree (unless it was already present). + virtual void insert(int key) + { + // FIXME: Implement + } +}; diff --git a/04-ab_tree/cpp/ab_tree_test.cpp b/04-ab_tree/cpp/ab_tree_test.cpp new file mode 100644 index 0000000..24d1001 --- /dev/null +++ b/04-ab_tree/cpp/ab_tree_test.cpp @@ -0,0 +1,177 @@ +#include <functional> +#include <cstdlib> +#include <vector> + +#include "ab_tree.h" + +// Debugging output: showing trees prettily on standard output. + +void ab_tree::show() +{ + root->show(0); + for (int i=0; i<70; i++) + cout << '='; + cout << endl; +} + +void ab_node::show(int indent) +{ + for (int i = children.size() - 1; i >= 0 ; i--) { + if (i < keys.size()) { + for (int j = 0; j < indent; j++) + cout << " "; + cout << keys[i] << endl; + } + if (children[i]) + children[i]->show(indent+1); + } +} + +// Invariant checks + +void audit_subtree(ab_tree *tree, ab_node *n, ab_node* parent, int key_min, int key_max, int depth, int &leaf_depth) +{ + if (!n) { + // Check that all leaves are on the same level. + if (leaf_depth < 0) + leaf_depth = depth; + else + EXPECT(depth == leaf_depth, "Leaves are not on the same level"); + return; + } + // Check consistency of parent pointers + EXPECT(n->parent == parent, "Inconsistent parent pointers"); + + // The number of children must be in the allowed range. + if (depth > 0) + EXPECT(n->children.size() >= tree->a, "Too few children"); + EXPECT(n->children.size() <= tree->b, "Too many children"); + + // We must have one more children than keys. + EXPECT(n->children.size() == n->keys.size() + 1, "Number of keys does not match number of children"); + + // Allow degenerate trees with 0 keys in the root. + if (n->children.size() == 1) + return; + + // Check order of keys: they must be increasing and bounded by the keys on the higher levels. + for (int i = 0; i < n->keys.size(); i++) { + EXPECT(n->keys[i] >= key_min && n->keys[i] <= key_max, "Wrong key order"); + EXPECT(i == 0 || n->keys[i-1] < n->keys[i], "Wrong key order"); + } + + // Call on children recursively. + for (int i = 0; i < n->children.size(); i++) { + int tmin, tmax; + if (i == 0) + tmin = key_min; + else + tmin = n->keys[i-1] + 1; + if (i < n->keys.size()) + tmax = n->keys[i] - 1; + else + tmax = key_max; + audit_subtree(tree, n->children[i], n, tmin, tmax, depth+1, leaf_depth); + } +} + +void ab_tree::audit() +{ + EXPECT(root, "Tree has no root"); + int leaf_depth = -1; + audit_subtree(this, root, nullptr, numeric_limits<int>::min(), numeric_limits<int>::max(), 0, leaf_depth); +} + +// A basic test: insert a couple of keys and show how the tree evolves. + +void test_basic() +{ + cout << "## Basic test" << endl; + + ab_tree t(2, 3); + vector<int> keys = { 3, 1, 4, 5, 9, 2, 6, 8, 7, 0 }; + for (int k : keys) { + t.insert(k); + t.show(); + t.audit(); + EXPECT(t.find(k), "Inserted key disappeared"); + } + + for (int k : keys) + EXPECT(t.find(k), "Some keys are missing at the end"); + + // insert the keys again, nothing should change + for (int k : keys) { + t.insert(k); + t.audit(); + EXPECT(t.find(k), "One of the inserted keys has disappeared after it was inserted again"); + } +} + +// The main test: inserting a lot of keys and checking that they are really there. +// We will insert num_items keys from the set {1,...,range-1}, where range is a prime. + +void test_main(int a, int b, int range, int num_items) +{ + // Create a new tree. + cout << "## Test Main: a=" << a << " b=" << b << " range=" << range << " num_items=" << num_items << endl; + ab_tree t(a, b); + + int key = 1; + int step = (int)(range * 1.618); + int audit_time = 1; + + // Insert keys. + for (int i=1; i <= num_items; i++) { + t.insert(key); + // Audit the tree occasionally. + if (i == audit_time || i == num_items) { + // cout << "== Audit at " << i << endl; + // t.show(); + t.audit(); + audit_time = (int)(audit_time * 1.33) + 1; + } + key = (key + step) % range; + } + + // Check that the tree contains exactly the items it should contain. + key = 1; + for (int i=1; i < range; i++) { + bool found = t.find(key); + // cout << "Step #" << i << ": find(" << key << ") = " << found << endl; + EXPECT(found == (i <= num_items), "Tree contains wrong keys"); + key = (key + step) % range; + } +} + +void test_min(int a, int b, int num_items, int repeat) { + cout << "## Test Min: a=" << a << " b=" << b << " num_items=" << num_items << " repeat=" << repeat << endl; + ab_tree t(a, b); + + // Insert keys + for (int i=1; i <= num_items; i++) + t.insert(3*i); + t.audit(); + + // Delete and insert min + for (int i=1; i <= repeat; i++) { + t.delete_min(); + t.insert(0); + if (i < 3) + t.audit(); + } + t.audit(); +} + +/*** A list of all tests ***/ + +vector<pair<string, function<void()>>> tests = { + { "basic", [] { test_basic(); } }, + { "small-2,3", [] { test_main(2, 3, 997, 700); } }, + { "small-2,4", [] { test_main(2, 4, 997, 700); } }, + { "big-2,3", [] { test_main(2, 3, 999983, 700000); } }, + { "big-2,4", [] { test_main(2, 4, 999983, 700000); } }, + { "big-10,20", [] { test_main(10, 20, 999983, 700000); } }, + { "big-100,200", [] { test_main(100, 200, 999983, 700000); } }, + { "min", [] { test_min(1000, 1999, 100000, 100000); } } +}; diff --git a/04-ab_tree/cpp/test_main.cpp b/04-ab_tree/cpp/test_main.cpp new file mode 100644 index 0000000..3f4aff0 --- /dev/null +++ b/04-ab_tree/cpp/test_main.cpp @@ -0,0 +1,43 @@ +#include <cstdlib> +#include <functional> +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +using namespace std; + +extern vector<pair<string, function<void()>>> tests; + +void expect_failed(const string& message) { + cerr << "Test error: " << message << endl; + exit(1); +} + +int main(int argc, char* argv[]) { + vector<string> required_tests; + + if (argc > 1) { + required_tests.assign(argv + 1, argv + argc); + } else { + for (const auto& test : tests) + required_tests.push_back(test.first); + } + + for (const auto& required_test : required_tests) { + bool found = false; + for (const auto& test : tests) + if (required_test == test.first) { + cerr << "Running test " << required_test << endl; + test.second(); + found = true; + break; + } + if (!found) { + cerr << "Unknown test " << required_test << endl; + return 1; + } + } + + return 0; +} diff --git a/04-ab_tree/python/ab_tree.py b/04-ab_tree/python/ab_tree.py new file mode 100644 index 0000000..2bcf5aa --- /dev/null +++ b/04-ab_tree/python/ab_tree.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +class ABNode: + """Single node in an ABTree. + + Each node contains keys and children + (with one more children than there are keys). + We also store a pointer to node's parent (None for root). + """ + def __init__(self, keys = None, children = None, parent = None): + self.keys = keys if keys is not None else [] + self.children = children if children is not None else [] + self.parent = parent + + def find_branch(self, key): + """ Try finding given key in this node. + + If this node contains the given key, returns (True, key_position). + If not, returns (False, first_position_with_key_greater_than_the_given). + """ + i = 0 + while (i < len(self.keys) and self.keys[i] < key): + i += 1 + + return (i < len(self.keys) and self.keys[i] == key, i) + + def insert_branch(self, i, key, child): + """ Insert a new key and a given child between keys i and i+1.""" + self.keys.insert(i, key) + self.children.insert(i + 1, child) + +class ABTree: + """A class representing the whole ABTree.""" + def __init__(self, a, b): + assert a >= 2 and b >= 2 * a - 1, "Invalid values of a, b: {}, {}".format(a, b) + self.a = a + self.b = b + self.root = ABNode(children=[None]) + + def find(self, key): + """Find a key in the tree. + + Returns True if the key is present, False otherwise. + """ + node = self.root + while node: + found, i = node.find_branch(key) + if found: return True + node = node.children[i] + return False + + def delete_min(self): + """ Delete the smallest element. """ + node = self.root + while node.children[0]: + node = node.children[0] + + node.children.pop(0) + node.keys.pop(0) + + while len(node.children) < self.a and node.parent: + node = node.parent + first = node.children[0] + second = node.children[1] + + # Merge the second to the first + if len(second.children) == self.a: + if second.children[0]: + for c in second.children: + c.parent = first + first.children.extend(second.children) + first.keys.append(node.keys.pop(0)) + first.keys.extend(second.keys) + node.children.pop(1) + + # Move the leftest child of the second to the first + else: + second.children[0].parent = first + first.children.append(second.children.pop(0)) + first.keys.append(node.keys[0]) + node.keys[0] = second.keys.pop(0) + + if len(node.children) == 1: + assert node == self.root + node.parent = None + self.root = node.children[0] + + def split_node(self, node, size): + """Helper function for insert + + Split node into two nodes such that original node contains first _size_ children. + Return new node and the key separating nodes. + """ + # TODO: Implement and use in insert method + raise NotImplementedError + + def insert(self, key): + """Add a given key to the tree, unless already present.""" + # TODO: Implement + raise NotImplementedError diff --git a/04-ab_tree/python/ab_tree_test.py b/04-ab_tree/python/ab_tree_test.py new file mode 100644 index 0000000..c443ae7 --- /dev/null +++ b/04-ab_tree/python/ab_tree_test.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +import math +import sys + +from ab_tree import ABNode, ABTree + +def show(tree): + """Show a tree.""" + def show_node(node, indent): + for i in reversed(range(len(node.children))): + if i < len(node.keys): + print(" " * indent, node.keys[i], sep="") + if node.children[i]: + show_node(node.children[i], indent + 1) + + show_node(tree.root, 0) + print("=" * 70) + +def audit(tree): + """Invariant check for the given tree.""" + def audit_node(node, parent, key_min, key_max, depth, leaf_depth): + if not node: + # Check that all leaves are on the same level. + if leaf_depth is None: + leaf_depth = depth + assert depth == leaf_depth, "Leaves are not on the same level" + + else: + # Check consistency of parent pointers + assert node.parent == parent, "Inconsistent parent pointers" + + # The number of children must be in the allowed range. + assert depth == 0 or len(node.children) >= tree.a, "Too few children" + assert len(node.children) <= tree.b, "Too many children" + + # We must have one more children than keys + assert len(node.children) == len(node.keys) + 1, "Number of keys does not match number of children" + + # Check that keys are increasing and in (key_min, key_max) range. + for i in range(len(node.keys)): + assert node.keys[i] > key_min and node.keys[i] < key_max, "Wrong key order" + assert i == 0 or node.keys[i - 1] < node.keys[i], "Wrong key order" + + # Check children recursively + for i in range(len(node.children)): + child_min = node.keys[i - 1] if i > 0 else key_min + child_max = node.keys[i] if i < len(node.keys) else key_max + leaf_depth = audit_node(node.children[i], node, child_min, child_max, depth + 1, leaf_depth) + + return leaf_depth + + assert tree.root, "Tree has no root" + audit_node(tree.root, None, -math.inf, math.inf, 0, None) + +def test_basic(): + """Insert a couple of keys and show how the tree evolves.""" + print("## Basic test") + + tree = ABTree(2, 3) + keys = [3, 1, 4, 5, 9, 2, 6, 8, 7, 0] + for key in keys: + tree.insert(key) + show(tree) + audit(tree) + assert tree.find(key), "Inserted key disappeared" + + for key in keys: + assert tree.find(key), "Some keys are missing at the end" + """Try insert all keys again (nothing should change).""" + for key in keys: + tree.insert(key) + audit(tree) + assert tree.find(key), f"Key {key} that was inserted again has disappeared" + +def test_main(a, b, limit, num_items): + print("## Test Main: a={} b={} range={} num_items={}".format(a, b, limit, num_items)) + + tree = ABTree(a, b) + + # Insert keys + step = int(limit * 1.618) + key, audit_time = 1, 1 + for i in range(num_items): + tree.insert(key) + key = (key + step) % limit + + # Audit the tree occasionally + if i == audit_time or i + 1 == num_items: + audit(tree) + audit_time = int(audit_time * 1.33) + 1 + + # Check the content of the tree + key = 1 + for i in range(limit): + assert tree.find(key) == (i < num_items), "Tree contains wrong keys" + key = (key + step) % limit + +def test_min(a, b, num_items, repeat): + print("## Test Min: a={} b={} num_items={} repeat={}".format(a, b, num_items, repeat)) + + tree = ABTree(a, b) + + # Insert keys + for i in range(num_items): + tree.insert(3*i) + audit(tree) + + # Delete and insert min + for i in range(repeat): + tree.delete_min() + tree.insert(0) + if i < 3: + audit(tree) + audit(tree) + +tests = [ + ("basic", test_basic), + ("small-2,3", lambda: test_main(2, 3, 997, 700)), + ("small-2,4", lambda: test_main(2, 4, 997, 700)), + ("big-2,3", lambda: test_main(2, 3, 99991, 70000)), + ("big-2,4", lambda: test_main(2, 4, 99991, 70000)), + ("big-10,20", lambda: test_main(10, 20, 99991, 70000)), + ("big-100,200", lambda: test_main(100, 200, 99991, 70000)), + ("min", lambda: test_min(1000,1999,10000,10000)) +] + +if __name__ == "__main__": + for required_test in sys.argv[1:] or [name for name, _ in tests]: + for name, test in tests: + if name == required_test: + print("Running test {}".format(name), file=sys.stderr) + test() + break + else: + raise ValueError("Unknown test {}".format(name)) diff --git a/04-ab_tree/task.md b/04-ab_tree/task.md new file mode 100644 index 0000000..eb23ec5 --- /dev/null +++ b/04-ab_tree/task.md @@ -0,0 +1,15 @@ +You are given a representation of _(a, b)-tree_ with a `find` operation, +and a representation of an _(a, b)-tree node_. + +Your goal is to implement an `insert` operation, which inserts the given +key in the tree (or does nothing if the key is already present). Preferably, +you should also implement `split_node` method and use it properly in +your `insert` implementation. + +The implementation uses the variant of (a,b)-trees from lecture notes by [Martin Mares, +Chapter 3](http://mj.ucw.cz/vyuka/dsnotes/03-abtree.pdf) where the actual values are +stored also in the internal nodes of the tree and not only in leaves. + +You should submit the `ab_tree.*` file (but not `ab_tree_test.*` files). + +Source code templates can be found in [git](https://gitlab.kam.mff.cuni.cz/datovky/assignments/-/tree/master). -- GitLab