Skip to content
Snippets Groups Projects
Commit 8289b455 authored by Martin Mareš's avatar Martin Mareš
Browse files

(a,b)-trees: Insert and Delete

parent 71d1c57a
No related branches found
No related tags found
No related merge requests found
TOP=.. TOP=..
PICS=ab-example PICS=ab-example ab-ins ab-del-borrow ab-del-merge
include ../Makerules include ../Makerules
import ads;
import trees;
/* Předtím */
pair u[];
real s = 1;
u[0] = (0, 0); // otec
u[1] = u[0] + (-1.3, -s); // odkud si půjčujeme
u[2] = u[0] + (0, -s); // podtečený vrchol
u[3] = u[0] + (1.3, -s); // e
u[4] = u[1] + (-0.5, -s); // a
u[5] = u[1] + (0, -s); // b
u[6] = u[1] + (0.5, -s); // c
u[7] = u[2] + (0, -s); // d
tree_init(u);
real d = 0.1;
real dd = 0.18;
ab_edge(0, 1, -dd);
ab_edge(0, 2);
ab_edge(0, 3, dd);
ab_edge(1, 4, -dd);
ab_edge(1, 5);
ab_edge(1, 6, dd);
ab_edge(2, 7);
tree_elliptic_node(0, "{\bf 4}\;7");
tree_elliptic_node(1, "2\;{\bf 3}");
tree_elliptic_node(2, mode=v_bold);
tree_node(4, "a");
tree_node(5, "b");
tree_node(6, "c");
tree_node(7, "d");
tree_node(3, "e");
label("$v$", u[2], 3NNW);
label("$\ell$", u[1], (0, 3NNW.y));
draw(u[0] + 0.7W -- u[0] + 0.3W, e_smallarrow);
label("$p$", u[0] + 0.7W, 0.5W);
pair dd = dir(-30);
draw(u[1] + 0.7dd -- u[1] + 0.3dd, e_smallarrow);
label("$m$", u[1] + 0.7dd, 0.5dd);
/* Potom */
pair v[];
real s = 1;
v[0] = (5, 0); // otec
v[1] = v[0] + (-1.3, -s); // odkud si půjčujeme
v[2] = v[0] + (0, -s); // podtečený vrchol
v[3] = v[0] + (1.3, -s); // e
v[4] = v[1] + (-0.3, -s); // a
v[5] = v[1] + (0.3, -s); // b
v[6] = v[2] + (-0.3, -s); // c
v[7] = v[2] + (0.3, -s); // d
tree_init(v);
real d = 0.1;
real dd = 0.18;
ab_edge(0, 1, -dd);
ab_edge(0, 2);
ab_edge(0, 3, dd);
ab_edge(1, 4, -d);
ab_edge(1, 5, d);
ab_edge(2, 6, -d);
ab_edge(2, 7, d);
tree_elliptic_node(0, "{\bf 3}\;7");
tree_elliptic_node(1, "2");
tree_elliptic_node(2, "\bf 4");
tree_node(4, "a");
tree_node(5, "b");
tree_node(6, "c");
tree_node(7, "d");
tree_node(3, "e");
label("$v$", v[2], 3NNW);
label("$\ell$", v[1], (0, 3NNW.y));
/* Šipka */
draw((1.8,-0.3) -- (2.9,-0.3), e_arrow);
import ads;
import trees;
/* Předtím */
pair u[];
real s = 1;
u[0] = (0, 0); // otec
u[1] = u[0] + (-1.3, -s); // s kým slučujeme
u[2] = u[0] + (0, -s); // podtečený vrchol
u[3] = u[0] + (1.3, -s); // d
u[4] = u[1] + (-0.3, -s); // a
u[5] = u[1] + (0.3, -s); // b
u[6] = u[2] + (0, -s); // c
tree_init(u);
real d = 0.1;
real dd = 0.18;
ab_edge(0, 1, -dd);
ab_edge(0, 2);
ab_edge(0, 3, dd);
ab_edge(1, 4, -d);
ab_edge(1, 5, d);
ab_edge(2, 6);
tree_elliptic_node(0, "{\bf 4}\;7");
tree_elliptic_node(1, "\bf 2");
tree_elliptic_node(2, mode=v_bold);
tree_node(4, "a");
tree_node(5, "b");
tree_node(6, "c");
tree_node(3, "d");
label("$v$", u[2], 3NNW);
label("$\ell$", u[1], (0, 3NNW.y));
draw(u[0] + 0.7W -- u[0] + 0.3W, e_smallarrow);
label("$p$", u[0] + 0.7W, 0.5W);
/* Potom */
pair v[];
real s = 1;
v[0] = (5, 0); // otec
v[1] = v[0] + (-0.7, -s); // sloučený vrchol
v[2] = v[0] + (0.7, -s); // d
v[3] = v[1] + (-0.5, -s); // a
v[4] = v[1] + (0, -s); // b
v[5] = v[1] + (0.5, -s); // c
tree_init(v);
real d = 0.1;
real dd = 0.18;
ab_edge(0, 1, -d);
ab_edge(0, 2, d);
ab_edge(1, 3, -dd);
ab_edge(1, 4);
ab_edge(1, 5, dd);
tree_elliptic_node(0, "7");
tree_elliptic_node(1, "\bf 2\;4");
tree_node(3, "a");
tree_node(4, "b");
tree_node(5, "c");
tree_node(2, "d");
/* Šipka */
draw((2,-0.3) -- (3.1,-0.3), e_arrow);
import ads;
import trees;
/* Předtím */
pair u[];
real s = 1;
u[0] = (0, 0); // otec
u[1] = u[0] + (-1.2, -s); // a
u[2] = u[0] + (0, -s); // přeplněný vrchol
u[3] = u[0] + (1.2, -s); // f
u[4] = u[2] + (-0.9, -s); // b
u[5] = u[2] + (-0.3, -s); // c
u[6] = u[2] + (0.3, -s); // d
u[7] = u[2] + (0.9, -s); // e
tree_init(u);
real d = 0.1;
real dd = 0.18;
ab_edge(0, 1, -dd);
ab_edge(0, 2);
ab_edge(0, 3, dd);
ab_edge(2, 4, -dd);
ab_edge(2, 5, -d);
ab_edge(2, 6, d);
ab_edge(2, 7, dd);
tree_elliptic_node(0, "2\;8", elong=1.5);
tree_elliptic_node(2, "4\;{\bf 5}\;6", elong=1.5, mode=v_bold);
tree_node(1, "a");
tree_node(4, "b");
tree_node(5, "c");
tree_node(6, "d");
tree_node(7, "e");
tree_node(3, "f");
/* Potom */
pair v[];
real s = 1;
v[0] = (5, 0); // otec
v[1] = v[0] + (-1.7, -s); // a
v[2] = v[0] + (-0.65, -s); // levá polovina
v[3] = v[0] + (0.65, -s); // pravá polovina
v[4] = v[0] + (1.7, -s); // f
v[5] = v[2] + (-0.3, -s); // b
v[6] = v[2] + (0.3, -s); // c
v[7] = v[3] + (-0.3, -s); // d
v[8] = v[3] + (0.3, -s); // e
tree_init(v);
real d = 0.1;
real dd = 0.18;
ab_edge(0, 1, -1.5dd);
ab_edge(0, 2, -0.5d);
ab_edge(0, 3, 0.5d);
ab_edge(0, 4, 1.5dd);
ab_edge(2, 5, -d);
ab_edge(2, 6, d);
ab_edge(3, 7, -d);
ab_edge(3, 8, d);
tree_elliptic_node(0, "2\;{\bf 5}\;8", elong=1.5, mode=v_bold);
tree_elliptic_node(2, "4");
tree_elliptic_node(3, "6");
tree_node(1, "a");
tree_node(5, "b");
tree_node(6, "c");
tree_node(7, "d");
tree_node(8, "e");
tree_node(4, "f");
/* Šipka */
draw((1.8,-0.3) -- (2.9,-0.3), e_arrow);
...@@ -115,15 +115,93 @@ at most 1~bit of information and we need to gather $\log n$ bits to determine th ...@@ -115,15 +115,93 @@ at most 1~bit of information and we need to gather $\log n$ bits to determine th
\subsection{Insertion} \subsection{Insertion}
TODO If we want to insert a~key, we try to find it first. If the key is not present
yet, the search ends in a~leaf (external node). However, we cannot simply turn this
An~\alg{Insert} takes $\Theta(b \cdot \log n / \log a)$ time. leaf into an~internal node with two external children --- this would break the
axiom that all leaves lie on the same level.
Instead, we insert the key to the parent of the external node --- that is, to a~node
on the lowest internal level. Adding a~key requires adding a~child, so we add a~leaf.
This is correct since all other children of that node are also leaves.
If the node still has at most $b-1$ keys, we are done. Otherwise, we split
the overfull node to two and distribute the keys approximately equally. In the
parent of the split node, we need to replace one child pointer by two, so we have
to add a~key to the parent. We solve this by moving the middle key of the
overfull node to the parent. Therefore we are splitting the overfull node to three
parts: the middle key is moved to the parent, all smaller keys form one new node,
and all larger keys form the other one. Children will be distributed among the new
nodes in the only possible way.
\figure{ab-ins.pdf}{}{Splitting an overfull node on insertion to a~$(2,3)$-tree}
This way, we have reduced insertion of a~key to the current node to insertion
of a~key to its parent. Again, this can lead to the parent overflowing, so the
splitting can continue, possibly up to the root. If it happens that we split
the root, we create a~new root with a~single key and two children (this is correct,
since we allowed less than $a$~children in the root). This increases the height
of the tree by~1.
Let us calculate time complexity of \alg{Insert}. In the worst case, we visit
$\Theta(1)$ nodes on each level and we spend $\Theta(b)$ time on each node. This
makes $\Theta(b \cdot \log n / \log a)$ time total.
It remains to show that nodes created by splitting are not undersized, meaning they
have at least~$a$ children. We split a~node~$v$ when it reached $b+1$ children,
so it had $b$~keys. We send one key to the parent, so the new nodes $v_1$ and~$v_2$
will take $\lfloor (b-1)/2\rfloor$ and $\lceil (b-1)/2\rceil$ keys. If any of them
were undersized, we would have $(b-1)/2 < a-1$ and thus $b < 2a-1$. Voilà, this explains
why we put the condition $b\ge 2a-1$ in the definition.
\subsection{Deletion} \subsection{Deletion}
TODO If we want to delete a~key, we find it first. If it is located on the last internal
level, we can delete it directly, together with one of the leaves under it.
A~\alg{Delete} takes $\Theta(b \cdot \log n / \log a)$ time. We still have to check for underflow, though.
Keys located on the higher levels cannot be removed directly --- the internal node
would lose one pointer and we would have a~subtree left in our hands with no place
to connect it to. This situation is similar to deletion of a~node with two children
in a~binary search tree, so we can solve it similarly: We replace the deleted key
by its successor (which is the leftmost key in the deleted key's right subtree).
The successor lies on the last internal level, so it can be deleted directly.
The only remaining problem is fixing an undersized node. For a~moment we will assume
that the node is not the root, so it has $a-2$ keys. It is tempting to solve the underflow
by merging the node with one of its siblings. However, this can be done only if
the sibling contains few keys; otherwise, the merged node could be overfull. But if the
sibling is large, we can fix our problem by borrowing a~key from it.
Let us be exact. Suppose that we have an undersized node~$v$ with
$a-2$ keys and this node has a~left sibling~$\ell$ separated by a~key~$p$ in their
common parent. If there is no left sibling, we use the right sibling and follow
A~MIRROR image of the procedure.
If the sibling has only~$a$ children, we merge nodes~$v$ and~$\ell$ to a~single
node and we also move the key~$p$ from the parent there. This creates a~node with
$(a-2) + (a-1) + 1 = 2a-2$ keys, which cannot exceed $b-1$. Therefore we reduced
deletion from~$v$ to deletion from its parent.
\figure{ab-del-merge.pdf}{}{Merging nodes on deletion from a~$(2,3)$-tree}
On the contrary, if the sibling has more than~$a$ children, we disconnect its
rightmost child~$c$ and its largest key~$m$. Then we move the key~$m$ to the parent
and the key~$p$ from the parent to~$v$. There, $p$~becomes the smallest key, before
which we connect the child~$c$. After this ``rotation of keys'', all nodes will
have numbers of children in the allowed range, so we can stop.
\figure{ab-del-borrow.pdf}{}{Borrowing a~key from a~sibling on deletion from a~$(2,3)$-tree}
In each step of the algorithm, we either produce a~node which is not undersized,
or we fix the underflow by borrowing from a~sibling, or we merge two nodes. In the
first two cases, we stop. In the third case, we continue by deleting a~key on the
next higher level, continuing possibly to the root. If the root becomes
undersized, it has no keys left, in which case we make its only child the new
root.
In the worst case, \alg{Delete} visits $\Theta(1)$ nodes on each level and it
spends $\Theta(b)$ time per node. Its time complexity is therefore $\Theta(b
\cdot \log n / \log a)$.
\subsection{The choice of parameters} \subsection{The choice of parameters}
...@@ -258,7 +336,7 @@ changes the potential by $\O(1)$. Then it performs a~sequence of splits, each wi ...@@ -258,7 +336,7 @@ changes the potential by $\O(1)$. Then it performs a~sequence of splits, each wi
zero amortized cost. zero amortized cost.
A~\alg{Delete} removes the key, which has $\O(1)$ cost both A~\alg{Delete} removes the key, which has $\O(1)$ cost both
real and amortized. If the node was underfull, it can perform a~sequence of merges real and amortized. If the node was undersized, it can perform a~sequence of merges
with zero amortized cost. Finally, it can borrow a~key from a~neighbor, which has with zero amortized cost. Finally, it can borrow a~key from a~neighbor, which has
$\O(1)$ real and amortized cost, but this happens at most once per \alg{Delete}. $\O(1)$ real and amortized cost, but this happens at most once per \alg{Delete}.
...@@ -273,6 +351,8 @@ TODO ...@@ -273,6 +351,8 @@ TODO
\section{Top-down (a,b)-trees and parallel access} \section{Top-down (a,b)-trees and parallel access}
TODO
\section{Red-black trees} \section{Red-black trees}
TODO TODO
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment