Skip to content
Snippets Groups Projects
Commit ebdf523e authored by Tung Anh Vu's avatar Tung Anh Vu
Browse files

(a,b)-tree experiment

parent 57acf53f
No related branches found
No related tags found
No related merge requests found
STUDENT_ID ?= PLEASE_SET_STUDENT_ID
.PHONY: test
test: ab_experiment
@rm -rf out && mkdir out
@for test in insert min random ; do \
for mode in '2-3' '2-4' ; do \
echo t-$$test-$$mode ; \
./ab_experiment $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \
done ; \
done
INCLUDE ?= .
CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -g -Wno-sign-compare -I$(INCLUDE)
ab_experiment: ab_tree.h ab_experiment.cpp $(INCLUDE)/random.h
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ -o $@
.PHONY: clean
clean::
rm -f ab_experiment
rm -rf out
#include <algorithm>
#include <functional>
#include <string>
#include <utility>
#include <vector>
#include <iostream>
#include <cmath>
#include "ab_tree.h"
#include "random.h"
using namespace std;
void expect_failed(const string& message) {
cerr << "Test error: " << message << endl;
exit(1);
}
/*
* A modified Splay tree for benchmarking.
*
* We inherit the implementation of operations from the Tree class
* and extend it by keeping statistics on the number of splay operations
* and the total number of rotations. Also, if naive is turned on,
* splay uses only single rotations.
*
* Please make sure that your Tree class defines the rotate() and splay()
* methods as virtual.
*/
class BenchmarkingABTree : public ab_tree {
public:
int num_operations;
int num_struct_changes;
BenchmarkingABTree(int a, int b) : ab_tree(a,b)
{
reset();
}
void reset()
{
num_operations = 0;
num_struct_changes = 0;
}
pair<ab_node*, int> split_node(ab_node *node, int size) override
{
num_struct_changes++;
return ab_tree::split_node(node, size);
}
void insert(int key) override
{
num_operations++;
ab_tree::insert(key);
}
// Return the average number of rotations per operation.
double struct_changes_per_op()
{
if (num_operations > 0)
return (double) num_struct_changes / num_operations;
else
return 0;
}
// Delete key from the tree. Does nothing if the key is not in the tree.
void remove(int key){
num_operations += 1;
// Find the key to be deleted
ab_node *node = root;
int i;
bool found = node->find_branch(key, i);
while(!found){
node = node->children[i];
if (!node) return; // Key is not in the tree
found = node->find_branch(key, i);
}
// If node is not a leaf, we need to swap the key with its successor
if (node->children[0] != nullptr){ // Only leaves have nullptr as children
// Successor is leftmost key in the right subtree of key
ab_node *succ = min(node->children[i+1]);
swap(node->keys[i], succ->keys[0]);
node = succ;
}
// Now run the main part of the delete
remove_leaf(key, node);
}
private:
// Main part of the remove
void remove_leaf(int key, ab_node* node)
{
EXPECT(node != nullptr, "Trying to delete key from nullptr");
EXPECT(node->children[0] == nullptr, "Leaf's child must be nullptr");
while(1){
// Find the key in the node
int key_position;
bool found = node->find_branch(key, key_position);
EXPECT(found, "Trying to delete key that is not in the node.");
// Start with the deleting itself
node->keys.erase(node->keys.cbegin() + key_position);
node->children.erase(node->children.cbegin() + key_position + 1);
// No underflow means we are done
if (node->children.size() >= a) return;
// Root may underflow, but cannot have just one child (unless tree is empty)
if (node == root){
if ((node->children.size() == 1) && (root->children[0] != nullptr)){
ab_node *old_root = root;
root = root->children[0];
root->parent = nullptr;
delete_node(old_root);
}
return;
}
ab_node *brother;
int separating_key_pos;
bool tmp;
tie(brother, separating_key_pos, tmp) = get_brother(node);
int separating_key = node->parent->keys[separating_key_pos];
// First check whether we can steal brother's child
if (brother->children.size() > a){
steal_child(node);
return;
}
// If the brother is too small, we merge with him and propagate the delete
node = merge_node(node);
node = node->parent;
key = separating_key;
key_position = separating_key_pos;
}
}
// Return the leftmost node of a subtree rooted at node.
ab_node* min(ab_node *node)
{
EXPECT(node != nullptr, "Trying to search for minimum of nullptr");
while (node->children[0]) {
node = node->children[0];
}
return node;
}
// Return the left brother if it exists, otherwise return right brother.
// Returns tuple (brother, key_position, is_left_brother), where
// key_position is a position of the key that separates node and brother in their parent.
tuple<ab_node*, int, bool> get_brother(ab_node* node)
{
ab_node *parent = node->parent;
EXPECT(parent != nullptr, "Node without parent has no brother");
// Find node in parent's child list
int i;
for(i = 0; i < parent->children.size(); ++i){
ab_node *c = parent->children[i];
if (c == node) break;
}
EXPECT(i < parent->children.size(), "Node is not inside its parent");
if (i == 0){
return make_tuple(parent->children[1], 0, false);
}
else{
return make_tuple(parent->children[i - 1], i - 1, true);
}
}
// Transfer one child from node's left brother to the node.
// If node has no left brother, use right brother instead.
void steal_child(ab_node* node)
{
ab_node *brother;
int separating_key_pos;
bool is_left_brother;
tie(brother, separating_key_pos, is_left_brother) = get_brother(node);
int separating_key = node->parent->keys[separating_key_pos];
EXPECT(brother->children.size() > a, "Stealing child causes underflow in brother!");
EXPECT(node->children.size() < b, "Stealing child causes overflow in the node!");
// We steal either from front or back
int steal_position, target_position;
if (is_left_brother){
steal_position = brother->children.size()-1;
target_position = 0;
}
else{
steal_position = 0;
target_position = node->children.size();
}
// Steal the child
ab_node *stolen_child = brother->children[steal_position];
if (stolen_child != nullptr){
stolen_child->parent = node;
}
node->children.insert(node->children.cbegin() + target_position, stolen_child);
brother->children.erase(brother->children.cbegin() + steal_position);
// List of keys is shorter than list of children
if (is_left_brother) steal_position -= 1;
else target_position -= 1;
// Update keys
node->keys.insert(node->keys.cbegin() + target_position, separating_key);
node->parent->keys[separating_key_pos] = brother->keys[steal_position];
brother->keys.erase(brother->keys.cbegin() + steal_position);
}
public:
// Merge node with its left brother and destroy the node. Must not cause overflow!
// Returns result of the merge.
// If node has no left brother, use right brother instead.
ab_node* merge_node(ab_node* node){
num_struct_changes += 1;
ab_node *brother;
int separating_key_pos;
bool is_left_brother;
tie(brother, separating_key_pos, is_left_brother) = get_brother(node);
int separating_key = node->parent->keys[separating_key_pos];
// We swap brother and node if necessary so that the node is always on the right
if (!is_left_brother) swap(brother, node);
for (auto c: node->children)
brother->children.push_back(c);
brother->keys.push_back(separating_key);
for (auto k: node->keys)
brother->keys.push_back(k);
EXPECT(brother->children.size() <= b, "Merge caused overflow!");
// Update parent pointers in non-leaf
if (brother->children[0] != nullptr){
for (auto c : brother->children)
c->parent = brother;
}
delete_node(node);
return brother;
}
};
int a, b;
RandomGen *rng; // Random generator object
// An auxiliary function for generating a random permutation.
vector<int> random_permutation(int n)
{
vector<int> perm;
for (int i=0; i<n; i++)
perm.push_back(i);
for (int i=0; i<n-1; i++)
swap(perm[i], perm[i + rng->next_range(n-i)]);
return perm;
}
void test_insert()
{
for (int e=32; e<=64; e++) {
int n = (int) pow(2, e/4.);
BenchmarkingABTree tree = BenchmarkingABTree(a,b);
vector<int> perm = random_permutation(n);
for (int x : perm)
tree.insert(x);
cout << n << " " << tree.struct_changes_per_op() << endl;
}
}
void test_random()
{
for (int e=32; e<=64; e++) {
int n = (int) pow(2, e/4.);
BenchmarkingABTree tree = BenchmarkingABTree(a,b);
// We keep track of elements present and not present in the tree
vector<int> elems;
vector<int> anti_elems;
elems.reserve(n);
anti_elems.reserve(n+1);
for (int x = 0; x < 2*n; x+=2){
tree.insert(x);
elems.push_back(x);
}
for (int i = -1; i <2*n + 1; i+=2)
anti_elems.push_back(i);
for (int i=0; i<n; i++){
int r, x;
// Delete random element
r = rng->next_range(elems.size());
x = elems[r];
tree.remove(x);
elems.erase(elems.cbegin() + r);
anti_elems.push_back(x);
// Insert random "anti-element"
r = rng->next_range(anti_elems.size());
x = anti_elems[r];
tree.insert(x);
elems.push_back(x);
anti_elems.erase(anti_elems.cbegin() + r);
}
cout << n << " " << tree.struct_changes_per_op() << endl;
}
}
void test_min()
{
for (int e=32; e<=64; e++) {
int n = (int) pow(2, e/4.);
BenchmarkingABTree tree = BenchmarkingABTree(a,b);
for (int x = 0; x < n; x++)
tree.insert(x);
for (int i=0; i<n; i++){
tree.remove(0);
tree.insert(0);
}
cout << n << " " << tree.struct_changes_per_op() << endl;
}
}
vector<pair<string, function<void()>>> tests = {
{ "insert", test_insert },
{ "random", test_random },
{ "min", test_min },
};
int main(int argc, char **argv)
{
if (argc != 4) {
cerr << "Usage: " << argv[0] << " <test> <student-id> (2-3|2-4)" << endl;
return 1;
}
string which_test = argv[1];
string id_str = argv[2];
string mode = argv[3];
try {
rng = new RandomGen(stoi(id_str));
} catch (...) {
cerr << "Invalid student ID" << endl;
return 1;
}
a = 2;
if (mode == "2-3")
b = 3;
else if (mode == "2-4")
b = 4;
else
{
cerr << "Last argument must be either '2-3' or '2-4'" << endl;
return 1;
}
for (const auto& test : tests) {
if (test.first == which_test)
{
cout.precision(12);
test.second();
return 0;
}
}
cerr << "Unknown test " << which_test << endl;
return 1;
return 0;
}
#define DS1_RANDOM_H
#include <cstdint>
/*
* This is the xoroshiro128+ random generator, designed in 2016 by David Blackman
* and Sebastiano Vigna, distributed under the CC-0 license. For more details,
* see http://vigna.di.unimi.it/xorshift/.
*
* Rewritten to C++ by Martin Mares, also placed under CC-0.
*/
class RandomGen {
uint64_t state[2];
uint64_t rotl(uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
public:
// Initialize the generator, set its seed and warm it up.
RandomGen(unsigned int seed)
{
state[0] = seed * 0xdeadbeef;
state[1] = seed ^ 0xc0de1234;
for (int i=0; i<100; i++)
next_u64();
}
// Generate a random 64-bit number.
uint64_t next_u64(void)
{
uint64_t s0 = state[0], s1 = state[1];
uint64_t result = s0 + s1;
s1 ^= s0;
state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14);
state[1] = rotl(s1, 36);
return result;
}
// Generate a random 32-bit number.
uint32_t next_u32(void)
{
return next_u64() >> 11;
}
// Generate a number between 0 and range-1.
unsigned int next_range(unsigned int range)
{
/*
* This is not perfectly uniform, unless the range is a power of two.
* However, for 64-bit random values and 32-bit ranges, the bias is
* insignificant.
*/
return next_u64() % range;
}
};
STUDENT_ID ?= PLEASE_SET_STUDENT_ID
.PHONY: test
test: ab_experiment.py ab_tree.py
@rm -rf out && mkdir out
@for test in insert min random ; do \
for mode in '2-3' '2-4' ; do \
echo t-$$test-$$mode ; \
./ab_experiment.py $$test $(STUDENT_ID) $$mode >out/t-$$test-$$mode ; \
done ; \
done
.PHONY: clean
clean::
rm -rf out __pycache__
#!/usr/bin/env python3
import sys
import random
from ab_tree import ABTree
class BenchmarkingABTree(ABTree):
"""A modified ABTree for benchmarking.
We inherit the implementation of operations from the ABTree class
and extend it by delete operation and by keeping statistics on the number
of operations and the total number of structural changes.
"""
def __init__(self, a, b):
ABTree.__init__(self, a, b)
self.reset()
def reset(self):
""" Reset statistics """
self.num_operations = 0
self.num_struct_changes = 0
def struct_changes_per_op(self):
"""Return the average number of struct. changes per operation."""
if self.num_operations > 0:
return self.num_struct_changes / self.num_operations
else:
return 0
def insert(self, key):
self.num_operations += 1
ABTree.insert(self, key)
def split_node(self, node, size):
self.num_struct_changes += 1
return ABTree.split_node(self, node, size)
def remove(self, key):
""" Delete key from the tree. Does nothing if the key is not in the tree. """
self.num_operations += 1
# Find the key to be deleted
node = self.root
found, i = node.find_branch(key)
while not found:
node = node.children[i]
if not node: return # Key is not in the tree
found, i = node.find_branch(key)
# If node is not a leaf, we need to swap the key with its successor
if node.children[0] is not None: # Only leaves have None as children
# Successor is leftmost key in the right subtree of key
succ = self._min(node.children[i+1])
node.keys[i], succ.keys[0] = succ.keys[0], node.keys[i]
node = succ
# Now run the main part of the delete
self._remove_leaf(key, node)
def _remove_leaf(self, key, node):
""" Main part of the delete.
"""
assert node is not None, "Trying to delete key from None"
assert node.children[0] is None, "Leaf's child must be None"
while True:
# Find the key in the node
found, key_position = node.find_branch(key)
assert found, "Trying to delete key that is not in the node."
# Start with the deleting itself
del node.keys[key_position]
del node.children[key_position + 1]
# No underflow means we are done
if len(node.children) >= self.a: return
# Root may underflow, but cannot have just one child (unless tree is empty)
if node == self.root:
if (len(node.children) == 1) and (self.root.children[0] is not None):
self.root = self.root.children[0]
self.root.parent = None
return
brother, separating_key_pos, _ = self._get_brother(node)
separating_key = node.parent.keys[separating_key_pos]
# First check whether we can steal brother's child
if len(brother.children) > self.a:
self._steal_child(node)
return
# If the brother is too small, we merge with him and propagate the delete
node = self.merge_node(node)
node, key, key_position = node.parent, separating_key, separating_key_pos
def _min(self, node):
""" Return the leftmost node of a subtree rooted at node."""
assert node is not None
while node.children[0] is not None:
node = node.children[0]
return node
def _get_brother(self, node):
""" Return the left brother if it exists, otherwise return right brother.
returns tuple (brother, key_position, is_left_brother), where
key_position is a position of the key that separates node and brother in their parent.
"""
parent = node.parent
assert parent is not None, "Node without parent has no brother"
# Find node in parent's child list
i = 0
for c in parent.children:
if c is node: break
else: i += 1
assert i < len(parent.children), "Node is not inside its parent"
if i == 0:
return parent.children[1], 0, False
else:
return parent.children[i - 1], i - 1, True
def _steal_child(self, node):
""" Transfer one child from node's left brother to the node.
If node has no left brother, use right brother instead.
"""
brother, separating_key_pos, is_left_brother = self._get_brother(node)
separating_key = node.parent.keys[separating_key_pos]
assert len(brother.children) > self.a, "Stealing child causes underflow in brother!"
assert len(node.children) < self.b, "Stealing child causes overflow in the node!"
# We steal either from front or back
if is_left_brother:
steal_position = len(brother.children)-1
target_position = 0
else:
steal_position = 0
target_position = len(node.children)
# Steal the child
stolen_child = brother.children[steal_position]
if stolen_child is not None:
stolen_child.parent = node
node.children.insert(target_position, stolen_child)
del brother.children[steal_position]
# List of keys is shorter than list of children
if is_left_brother:
steal_position -= 1
else:
target_position -= 1
# Update keys
node.keys.insert(target_position, separating_key)
node.parent.keys[separating_key_pos] = brother.keys[steal_position]
del brother.keys[steal_position]
def merge_node(self, node):
""" Merge node with its left brother and destroy the node. Must not cause overflow!
Returns result of the merge.
If node has no left brother, use right brother instead.
"""
self.num_struct_changes += 1
brother, separating_key_pos, is_left_brother = self._get_brother(node)
separating_key = node.parent.keys[separating_key_pos]
# We swap brother and node if necessary so that the node is always on the right
if not is_left_brother:
brother, node = node, brother
brother.children.extend(node.children)
brother.keys.append(separating_key)
brother.keys.extend(node.keys)
assert len(brother.children) <= self.b, "Merge caused overflow!"
# Update parent pointers in non-leaf
if brother.children[0] is not None:
for c in brother.children:
c.parent = brother
return brother
def test_insert():
for exp in range(32, 64):
n = int(2**(exp/4))
tree = BenchmarkingABTree(a, b)
for elem in random.sample(range(n), n):
tree.insert(elem)
print(n, tree.struct_changes_per_op())
def test_random():
for exp in range(32, 64):
n = int(2**(exp/4))
tree = BenchmarkingABTree(a, b)
for elem in range(0, 2*n, 2):
tree.insert(elem)
# We keep track of elements present and not present in the tree
elems = list(range(0, n, 2))
anti_elems = list(range(-1, 2*n+1, 2))
for _ in range(n):
# Delete random element
elem = random.choice(elems)
tree.remove(elem)
elems.remove(elem)
anti_elems.append(elem)
# Insert random "anti-element"
elem = random.choice(anti_elems)
tree.insert(elem)
elems.append(elem)
anti_elems.remove(elem)
print(n, tree.struct_changes_per_op())
def test_min():
for exp in range(32, 64):
n = int(2 ** (exp / 4))
tree = BenchmarkingABTree(a, b)
for i in range(n):
tree.insert(i)
for _ in range(n):
tree.remove(0)
tree.insert(0)
print(n, tree.struct_changes_per_op())
tests = {
"min": test_min,
"insert": test_insert,
"random": test_random,
}
if __name__ == '__main__':
if len(sys.argv) == 4:
test, student_id = sys.argv[1], sys.argv[2]
a = 2
if sys.argv[3] == "2-3":
b = 3
elif sys.argv[3] == "2-4":
b = 4
else:
raise ValueError("Last argument must be either '2-3' or '2-4'")
random.seed(student_id)
if test in tests:
tests[test]()
else:
raise ValueError("Unknown test {}".format(test))
else:
raise ValueError("Usage: {} <test> <student-id> (2-3|2-4)".format(sys.argv[0]))
## Goal
The goal of this assignment is to evaluate your implementation of (a,b)-trees
experimentally and compare performance of (2,3) and (2,4)-trees.
You are given a test program (`ab_experiment`) which is used to evaluate your
implementation of the previous assignment. The test program auguments your implementation
by implementing a `remove` method and it performs the following experiments:
- _Insert test:_ Insert _n_ elements in random order.
- _Min test:_ Insert _n_ elements sequentially and then _n_ times repeat: remove the minimal
element in the tree and then insert it back.
- _Random test:_ Insert _n_ elements sequentially and then _n_ times repeat: remove random
element from the tree and then insert random element into the tree. Removed element is
always present in the tree and inserted element is always *not* present in the tree.
The program tries each experiment with different values of _n_. In each try,
it prints the average number of _structural changes_ per operation. Structural change is
either a node split (in insert) or merging of two nodes (in delete).
You should perform these experiments and write a report, which contains the following
plots of the measured data. Each plot should show the dependence of the average
number of structural changes on the set size _n_.
- The insert test: one curve for (2,3) tree, one for (2,4) tree.
- The min test: one curve for (2,3) tree, one for (2,4) tree.
- The random test: one curve for (2,3) tree, one for (2,4) tree.
The report should discuss the experimental results and try to explain the observed
behavior using theory from the lectures. (If you want, you can carry out further
experiments to gain better understanding of the data structure and include these
in the report. This is strictly optional.)
You should submit a PDF file with the report (and no source code).
You will get 1 temporary point upon submission if the file is syntantically correct;
proper points will be assigned later.
## Test program
The test program is given three arguments:
- The name of the test (`insert`, `min`, `random`).
- The random seed: you should use the last 2 digits of your student ID (you can find
it in the Study Information System – just click on the Personal data icon). Please
include the random seed in your report.
- The type of the tree to test (`2-3` or `2-4`).
The output of the program contains one line per experiment, which consists of _n_ and the
average number of structural changes.
## Your implementation
Please use your implementation from the previous exercise. Methods `split_node(...)`
and `insert()` will be augmented by the test program. If you are performing
a node splits directly instead of using `split_node(...)` method, you
need to adjust the `BenchmarkingABTree` class accordingly.
## Hints
The following tools can be useful for producing nice plots:
- [pandas](https://pandas.pydata.org/)
- [matplotlib](https://matplotlib.org/)
- [gnuplot](http://www.gnuplot.info/)
A quick checklist for plots:
- Is there a caption explaining what is plotted?
- Are the axes clearly labelled? Do they have value ranges and units?
- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs
are more fitting in some cases, but you should tell.)
- Is it clear which curve means what?
- Is it clear what are the measured points and what is an interpolated
curve between them?
- Are there any overlaps? (E.g., the most interesting part of the curve
hidden underneath a label?)
In your discussion, please distinguish the following kinds of claims.
It should be always clear which is which:
- Experimental results (i.e., the raw data you obtained from the experiments)
- Theoretical facts (i.e., claims we have proved mathematically)
- Your hypotheses (e.g., when you claim that the graph looks like something is true,
but you are not able to prove rigorously that it always holds)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment