diff --git a/04-heaps/Makefile b/04-heaps/Makefile index 76a8061c07025766fa099c7d0388a5eb04eac511..813adc72b872bc04d025a444c432a7338d39ec14 100644 --- a/04-heaps/Makefile +++ b/04-heaps/Makefile @@ -1,4 +1,4 @@ TOP=.. -PICS= +PICS=bins-def bins-example bins-rec include ../Makerules diff --git a/04-heaps/bins-def.asy b/04-heaps/bins-def.asy new file mode 100644 index 0000000000000000000000000000000000000000..82d2e673909d385343f76976a393ac09f9fc47b7 --- /dev/null +++ b/04-heaps/bins-def.asy @@ -0,0 +1,32 @@ +import ads; +import trees; +unitsize(1.2cm); + +tree_node_size = vertex_size; +tn_edge_len = 0.7; + +void t(int parent_id, real angle) +{ + tnode n = tn_add(parent_id, angle); + n.mode = v_black; +} + +tn_init(); +t(-1, 0); // 0 = kořen +t(0, -67); // 1 +t(0, -45); // 2 +t(0, 45); // 3 +t(0, 67); // 4 +t(2, 0); // 5 +tn_draw(); + +draw(tn_pos[3] -- tn_pos[3] + (-0.25,-1) -- tn_pos[3] + (0.25,-1) -- cycle); +draw(tn_pos[4] -- tn_pos[4] + (-0.4,-1.2) -- tn_pos[4] + (0.4,-1.2) -- cycle); + +pair low = (0, -1.5); +label("$B_k$", tn_pos[0], 2N); +label("\strut $B_0$", tn_pos[1] + low); +label("\strut $B_1$", tn_pos[2] + low); +label("\strut $B_{k-2}$", tn_pos[3] + low); +label("\strut $B_{k-1}$", tn_pos[4] + low); +label("$\ldots$", interp(tn_pos[2], tn_pos[3], 0.5) + 0.2*low); diff --git a/04-heaps/bins-example.asy b/04-heaps/bins-example.asy new file mode 100644 index 0000000000000000000000000000000000000000..24be3e4df0d12f2fd5bb234a26b5cdc847870750 --- /dev/null +++ b/04-heaps/bins-example.asy @@ -0,0 +1,31 @@ +import ads; + +pair v[]; +v[0] = (0,0); +int p[]; +p[0] = -1; + +real edge_len = 0.5; +real dists[] = { 0, 1.5, 3, 5, 7.7 }; +real dirs[] = { -45, 45, 65, 78.5 }; + +for (int i=0; i<5; ++i) { + pair w[]; + int n = v.length; + for (int j=0; j<n; ++j) + w[j] = v[j] + (dists[i],0); + graph(w); + for (int j=1; j<n; ++j) + edge(j, p[j]); + label("\strut $B_{" + (string)i + "}$", (dists[i], 0.5)); + if (i == 4) + break; + + for (int j=0; j<n; ++j) { + pair d = (0,1) * dir(dirs[i]); + d = d * (-edge_len/d.y); + v[n+j] = v[j] + d; + p[n+j] = p[j] + n; + } + p[n] = 0; +} diff --git a/04-heaps/bins-rec.asy b/04-heaps/bins-rec.asy new file mode 100644 index 0000000000000000000000000000000000000000..8cc04d91df124c5e2f977689ac6cc4c487abee3a --- /dev/null +++ b/04-heaps/bins-rec.asy @@ -0,0 +1,18 @@ +import ads; + +pair v[]; +v[0] = (0,0); +v[1] = (2.5,0); +v[2] = v[1] + 1.2*dir(-30); + +graph(v); + +draw(v[0] -- v[0] + (-0.8,-2) -- v[0] + (0.8,-2) -- cycle); +draw(v[1] -- v[1] + (-0.5,-1.4) -- v[1] + (0.5,-1.4) -- cycle); +draw(v[2] -- v[2] + (-0.5,-1.4) -- v[2] + (0.5,-1.4) -- cycle); +edge(1, 2); + +label("$B_k$", v[0] - (0,2.5)); +label("$B_{k-1}$", v[1] - (0,1.8)); +label("$B_{k-1}$", v[2] - (0,1.8)); +label("$=$", interp(v[0], v[1], 0.5) - (0,1)); diff --git a/04-heaps/heaps.tex b/04-heaps/heaps.tex index a4b4fa33d8a3de9ce50d7821cdd4541463648d1f..34f96c5ea8abde7e38b4547a5a9eeb8f20fb9c5c 100644 --- a/04-heaps/heaps.tex +++ b/04-heaps/heaps.tex @@ -28,11 +28,11 @@ $\opdf{Increase}(\<id>,p)$ & Increase priority of the item identified by~\<id> to~$p$. \cr $\opdf{Delete}(\<id>)$ & Remove the given item from the heap. - It is usually simulated by $\alg{Decrease}(\<id>,-\infty)$ - followed by \alg{ExtractMin}. \cr + It is usually simulated by $\op{Decrease}(\<id>,-\infty)$ + followed by \op{ExtractMin}. \cr $\opdf{Build}((p_1,v_1),\ldots)$ & Create a~new heap containing the given items. - It is equivalent to a~sequence of \alg{Inserts} on an empty + It is equivalent to a~sequence of \op{Inserts} on an empty heap, but it can be often implemented more efficiently. \cr } By reversing the order on priorities, we can obtain the a~maximal heap, which maintains @@ -40,21 +40,217 @@ the maximum instead of the minimum. \obs{ We can sort a~sequence of $n$~items by inserting them to a~heap and then calling -\alg{ExtractMin} $n$~times. The standard lower bound on comparison-based sorting implies -that at least one of \alg{Insert} and \alg{ExtractMin} must take $\Omega(\log n)$ time +\op{ExtractMin} $n$~times. The standard lower bound on comparison-based sorting implies +that at least one of \op{Insert} and \op{ExtractMin} must take $\Omega(\log n)$ time amortized. } We can implement the heap interface using a~search tree, which yields $\Theta(\log n)$ -time complexity of all operations except \alg{Build}. However, specialized constructions +time complexity of all operations except \op{Build}. However, specialized constructions presented in this chapter -will achieve $\Theta(\log n)$ amortized time for \alg{ExtractMin}, $\Theta(n)$ for \alg{Build}, +will achieve $\Theta(\log n)$ amortized time for \op{ExtractMin}, $\Theta(n)$ for \op{Build}, and $\Theta(1)$ amortized for all other operations. +\subsection{Dijkstra's algorithm} + +Let us see one example where a~heap outperforms search trees: the famous Dijkstra's +algorithm for finding the shortest path in a~graph. + +TODO + +\lemma{ +Dijkstra's algorithm with a~heap runs in time +$\O(n\cdot T_I(n) + n\cdot T_X(n) + m\cdot T_D(n))$. +Here $n$ and~$m$ are the number of vertices and edges of the graph, respectively. +$T_I(n)$ is the amortized time complexity of \op{Insert} on a~heap with at most~$n$ +items, and similarly $T_X(n)$ for \op{ExtractMin} and $T_D(n)$ for \op{Decrease}. +} + \section{Regular heaps} +TODO + \section{Binomial heaps} +The binomial heap performs similarly to the regular heaps, but is has a~more +flexible structure, which will serve us well in later constructions. It supports +all the usual heap operations. It is also able to \opdf{Merge} two heaps into one +efficiently. + +The binomial heap will be defined as a~collection of binomial trees, +so let us introduce these first. + +\defn{ +The \em{binomial tree of rank~$k$} is a~rooted tree~$B_k$ with ordered children +in each node such that: +\tightlist{n.} +\:$B_0$ contains only the root. +\:$B_k$ for $k>0$ contains a~root with~$k$ children, which are the roots of subtrees $B_0$, $B_1$, \dots, $B_{k-1}$ + in this order.\foot{In fact, this works even for $k=0$.} +\endlist +If we wanted to be strictly formal, we would define~$B_k$ as any member of an~isomorphism +class of trees satisfying these properties. +} + +\figure[bintree]{bins-def.pdf}{}{The binomial tree of rank~$k$} + +\figure[bintreesample]{bins-example.pdf}{}{Binomial trees of small rank} + +We can see this construction and several examples in figures \figref{bintree} and \figref{bintreesample}. +Let us mention several important properties of binomial trees, which can be easily proved by induction +on the rank: + +\obs{ +\tightlist{o} +\:The binomial tree~$B_k$ has $2^k$ nodes on $k+1$ levels. +\:$B_k$ can be obtained by linking the roots of two copies of~$B_{k-1}$ (see figure \figref{bintreerec}). +\endlist +} + +\figure[bintreerec]{bins-rec.pdf}{}{Joining binomial trees of the same rank} + +\defn{ +The \em{binomial heap} for a~given set of items is a~sequence ${\cal T} = T_1,\ldots,T_\ell$ +of binomial trees such that: +\list{n.} +\:The ranks of the trees are increasing: $\<rank>(T_i) < \<rank>(T_{i+1})$ for all~$i$. + In particular, this means that the ranks are distinct. +\:Each node of a~tree contains a~single item. We will use $p(v)$ for the priority of the item + stored in a~node~$v$. +\:The items obey the \em{heap order} --- if a~node~$u$ is a~parent of~$v$, then $p(u) \le p(v)$. +\endlist +} + +\note{ +To ensure that all operations have the required time complexity, we need to be careful +with the representation of the heap in memory. For each node, we will store: +\tightlist{o} +\:the rank (a~rank of a~non-root node is defined as the number of its children) +\:the priority and other data of the item +\:a~pointer to the first child +\:a~pointer to the next sibling (the children of a~node form a~single-linked list) +\:a~pointer to the parent (this is necessary only if we want to implement \op{Decrease}) +\endlist +} +Since the next sibling pointer plays no role in tree roots, we can use it to chain +trees in the heap. (In fact, it is often useful to encode tree roots as children +of a~``meta-root'', which helps to unify all operations with lists.) + +As the trees in the heap have distinct ranks, their sizes are distinct powers of two, +which sum to the total number of items~$n$. This is exactly the binary representation +of the number~$n$ --- the $k$-th digit (starting with~0) is~1 iff the tree~$B_k$ is +present in the heap. Since each~$n$ has a~unique binary representation, the +shape of the heap is completely determined by its size. Still, we have a~lot of +freedom in location of items in the heap. + +\corr{ +A~binomial heap with $n$~items contains $\Theta(\log n)$ trees, whose ranks and +heights are $\Theta(\log n)$. Each node has $\Theta(\log n)$ children. +} + +\subsection{Finding the minimum} + +Since the trees are heap-ordered, the minimum item of each tree is located in its root. +To find the minimum of the whole heap, we have to examine the roots of all trees. +This can be done in time linear in the number of trees, that is $\Theta(\log n)$. + +If the \op{Min} operation is called frequently, we can speed it up to $\Theta(1)$ +by caching a~pointer to the current minimum. The cache can be updated during all +other operations at the cost of a~constant slow-down. We are leaving this modification +as an~exercise. + +\subsection{Merging two heaps} + +The \op{Merge} operation might look advanced, but in fact it will serve as the basic +building block of all other operations. \op{Merge} takes two heaps $H_1$ and~$H_2$ +and it constructs a~new heap~$H$ containing all items of $H_1$ and~$H_2$. The original +heaps will be destroyed. + +We will scan the lists of trees in each heap in the order of increasing ranks, +as when merging two sorted lists. If a~given rank is present in only one of the +lists, we move that tree to the output. If we have the same rank in both lists, +we cannot use both trees, since it would violate the requirements that all ranks are +distinct. In this case, we \em{link} the trees to form~$B_{k+1}$ as in figure +\figref{bintreerec}: the tree whose root has the smaller priority will stay as the +root of~the new tree, the other root will become its child. Of course, it can happen +that rank~$k+1$ is already present in one or more list, but this can be solved +by further merging. + +The whole process is similar to addition of binary numbers. We are processing the +trees in order of increasing rank~$k$. In each step, we have at most one tree of rank~$k$ +in each heap, and at most one tree of rank~$k$ carried over from the previous step. +If we have two or more trees, we join a~pair of them and send it as a~carry to the +next step. We are left with at most one tree, which we can send to the output. + +As a~minor improvement, if we exhaust one of the input lists and we have no +carry, we can link the rest of the other list to the output in constant time. +This will help in the \op{Build} operation later. + +Let us analyze the time complexity. The maximum rank in both lists is $\Theta(\log n)$, +where $n$~is the total number of items in both heaps. For each rank, we spend $\Theta(1)$ +time. If we end up with a~carry after both lists are exhausted, we need one more step +to process it. This means that \op{Merge} always finishes in $\Theta(\log n)$ time. + +\subsection{Inserting items} + +\op{Insert} is easy. We create a~new heap with a~single binomial tree of rank~0, +whose root contains the new item. Then we merge this heap to the current heap. +This obviously works in time $\Theta(\log n)$. + +\op{Build}ing a~heap of $n$~given items is done by repeating \op{Insert}. We are +going to show that a~single \op{Insert} takes constant amortized time, so the whole +\op{Build} runs in $\Theta(n)$ time. We observe that the \op{Merge} inside the +\op{Insert} behaves as a~binary increment and runs in time linear in the number +of bits changed during that increment. (To achieve this, we needed to stop merging +when one of the lists was exhausted.) Thus we can apply the amortized analysis of binary +counters we developed in section \secref{amortsec}. + +\subsection{Extracting the minimum} + +The \op{ExtractMin} operation will be again based on \op{Merge}. We start by locating +the minimum as in \op{Min}. We find the minimum in the root of one of the trees. We unlink +this tree from the heap. We remove its root, so the tree falls apart to binomial trees +of all lower ranks (remember figure \figref{bins-def}). As the ranks of these trees are +strictly increasing, the trees form a~correct binomial heap, which can be merged back +to the current heap. + +Finding the minimum takes $\Theta(\log n)$. Disassembling the tree at the root and +collecting the sub-trees in a~new heap takes $\Theta(\log n)$; actually, it can be done in +constant time if we are using the representation with the meta-root. Merging these trees +back is again $\Theta(\log n)$. We conclude that the whole \op{ExtractMin} runs in time +$\Theta(\log n)$. + +\subsection{Decreasing and increasing} + +A~\op{Decrease} can be performed as in a~regular heap. We modify the priority of the +node, which can break heap order at the edge to the parent. If this happens, we swap +the two items, which can break the order one level higher. In the worst case, the item +bubbles up all the way to the root, which takes $\Theta(\log n)$ time. + +An~\op{Increase} can be done by bubbling items down, but this is slow. As each node +can have logarithmically many children, we spend $\Theta(\log n)$ time per step, +which is $\Theta(\log^n)$ total. It is faster to \op{Delete} the item by decreasing +it to $-\infty$ and performing an~\op{ExtractMin}, and then inserting it back with +the new priority. This is $\Theta(\log n)$. + +\subsection{Summary of operations} + +\displaytexfig{\vbox{\halign{#\hfil\quad&#\hfil\quad\enspace&#\hfil\cr +\em{operation} & \em{complexity} & \em{description} \cr +\noalign{\smallskip} +\alg{Insert} & $\Theta(\log n)$ & adds an item to the heap and returns its identifier \cr +\alg{Min} & $\Theta(1)$ & returns the item with minimum priority \cr +\alg{ExtractMin} & $\Theta(\log n)$ & finds and removes the minimum item \cr +\alg{Merge} & $\Theta(\log n)$ & merges to heaps into one \cr +\alg{Build} & $\Theta(n)$ & builds a~heap of~$n$ given items \cr +\alg{Decrease} & $\Theta(\log n)$ & increases priority of an item \cr +\alg{Increase} & $\Theta(\log n)$ & decreases priority of an item \cr +\alg{Delete} & $\Theta(\log n)$ & deletes an item \cr +}}} + +Compared with the binary heap, the binomial heap works in the same time, +but it additionally supports efficient merging. + \section{Lazy binomial heaps} \section{Fibonacci heaps} diff --git a/tex/adsmac.tex b/tex/adsmac.tex index 0545fd533b4b818e2e14bdacba9ebc402c4e23a5..d2d81923db177dcd004b760a66e0ad4e70637f2a 100644 --- a/tex/adsmac.tex +++ b/tex/adsmac.tex @@ -363,6 +363,9 @@ % Jmeno algoritmu v textu nebo ve formuli \protected\def\alg#1{\leavevmode\hbox{\csc #1}} +% Jmena operaci datovych struktur sazime stejne jako jmena algoritmu +\let\op=\alg + %%% Konstrukce pouzivane v algoritmech %%% % Komentar @@ -802,7 +805,7 @@ \def\dfr#1#2{\rr{#2}\em{#1}} % Definice operace datové struktury -\def\opdf#1{\rr{operace/#1=operace/\alg{#1}}\alg{#1}} +\def\opdf#1{\rr{operace/#1=operace/\op{#1}}\op{#1}} % Zápis do souboru \newwrite\idxfile