Merge branch 'fs-succinct'

3ca32732 · Martin Mareš · 1743b5ed · 7d2c9375 · 3ca32732 · 3ca32732
Commit 3ca32732 authored Sep 21, 2021 by Martin Mareš
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,8 @@ CHAPTERS= \
 	06-hash \
 	07-geom \
 	08-string \
-	vk-dynamic
+	vk-dynamic \
+	fs-succinct
 chapters:
 	for ch in $(CHAPTERS) ; do $(MAKE) -C $$ch pics ; done


--- a/fs-succinct/Makefile
+++ b/fs-succinct/Makefile
+TOP=..
+PICS=sole sole_boxes sole_hilevel mixer composition mixer_chain mixer_tree tree_shapes
+include ../Makerules
+sole.pdf:: succinct_common.asy
+sole_boxes.pdf:: succinct_common.asy
+sole_hilevel.pdf:: succinct_common.asy
+mixer_chain.pdf:: succinct_common.asy
+mixer_tree.pdf:: succinct_common.asy
+tree_shapes.pdf:: succinct_common.asy
--- a/fs-succinct/composition.asy
+++ b/fs-succinct/composition.asy
+//import ads;
+//import flowchart;
+//draw(roundrectangle("f", (0,0)));
+//draw(roundrectangle("g", (1,-1)));
+//draw(roundrectangle("h", (-1,-2)));
+object f1 = draw("$g_1$", roundbox, (0,0),     xmargin=0.5, ymargin=0.5);
+object f2 = draw("$g_2$", roundbox, (1cm,-1cm),    xmargin=0.5, ymargin=0.5);
+object f3 = draw("$g_3$", roundbox, (-1cm,-1.5cm), xmargin=0.5, ymargin=0.5);
+// XXX this does not work when setting unitsize
+draw(point(f1, SE) -- point(f2, NW), Arrow);
+draw(point(f1, SW) -- point(f3, NE), Arrow);
+draw(point(f2, W) -- point(f3, E), Arrow);
+draw(roundbox(bbox(), xmargin=0.35cm));
+draw(point(f2, S) -- (xpart(point(f2, S)), -2.5cm), Arrow);
+draw(point(f3, S) -- (xpart(point(f3, S)), -2.5cm), Arrow);
+draw((xpart(point(f1, N)), 1cm) -- point(f1, N), Arrow);
+label("$g$", (xpart(min(currentpicture)), ypart(max(currentpicture))) + (0.25cm, -0.25cm));
--- a/fs-succinct/mixer.asy
+++ b/fs-succinct/mixer.asy
+import succinct_common;
+real r = 1.5;
+real dist=2;
+mixer(0,0,r);
+draw((-dist,0)--(-r,0), e_arrow);
+draw((r,0)--(dist,0), e_arrow);
+draw((0,-r)--(0,-dist), e_arrow);
+draw((0,dist)--(0,r), e_arrow);
+label((0, dist), "\vbox{\hbox{$x\in[X]$}\hbox{\eightrm (input)}}", N);
+label((-dist, 0), "\vbox{\hbox{$y\in[Y]$}\hbox{\eightrm (carry in)}}", W);
+label((dist, 0), "\vbox{\hbox{$s\in[S]$}\hbox{\eightrm (carry out)}}", E);
+label((0, -dist), "\vbox{\hbox{$m\in[2^M]$}\hbox{\eightrm (output)}}", S);
+label((0, 0), "$c\in [C]$");
--- a/fs-succinct/mixer_chain.asy
+++ b/fs-succinct/mixer_chain.asy
+import succinct_common;
+real mixgrid = 2.5;
+int nmixers = 5;
+for (int i = 0; i < nmixers; ++i) {
+    real x = mixgrid * i;
+    if (i == 3) {
+        label((x,0), "$\cdots$");
+    } else {
+        draw((x, 1.25) -- (x, 0.5), e_arrow);
+        label((x, 1.25), "$X$", N);
+        mixer(x, 0);
+        draw((x, -0.5) -- (x, -1.25), e_arrow);
+        label((x, -1.25), (i == 4) ? "$2^{M_n}$" : "$2^{M_"+((string)(i+1))+"}$", S);
+    }
+}
+string[] alphas = {"", "Y_1", "Y_2", "Y_3", "Y_{N-1}", "Y_N"};
+for (int i = 1; i < nmixers; ++i) {
+    carry_arrow((mixgrid * (i-1), 0), (mixgrid*i, 0), alphas[i]);
+}
+pair endb = (mixgrid * (nmixers-1), 0) + (0.5,0);
+draw(endb -- endb + (0.5,0) {E} .. {S} endb + (1.5,-1) -- endb + (1.5,-1.25), e_arrow);
+label(endb + (1.5,-1.25), "$2^{M_{n+1}}$", S);
--- a/fs-succinct/mixer_tree.asy
+++ b/fs-succinct/mixer_tree.asy
+import succinct_common;
+pair C = (0,0);
+PENTAMIXER_R = 0.75;
+pentamixer(C);
+pm_arrow(C, A_IN, 1, lbl="\vbox{\hbox{$x\in[X]$}\hbox{\eightrm (input)}}");
+pm_arrow(C, A_CIN1, 1, lbl="\vbox{\hbox{$y\in[Y]$}\hbox{\eightrm (carry in 1)}}");
+pm_arrow(C, A_CIN2, 1, lbl="\vbox{\hbox{$z\in[Z]$}\hbox{\eightrm (carry in 2)}}");
+pm_arrow(C, A_OUT, 1, out=true, lbl="\vbox{\hbox{$m\in[2^M]$}\hbox{\eightrm (output)}}");
+pm_arrow(C, A_COUT, 1, out=true, lbl="\vbox{\hbox{$s\in[S]$}\hbox{\eightrm (carry out)}}");
+PENTAMIXER_R = 0.5;
+pair m1 = (5, -1);
+pair m2 = (9, -1);
+pair m3 = (7, 1.5);
+pair mix[] = {m1, m2, m3};
+pentamixer(m1);
+pentamixer(m2);
+pentamixer(m3);
+draw(pm_dir(m1, A_COUT)--pm_dir(m3, A_CIN1), e_arrow);
+draw(pm_dir(m2, A_COUT)--pm_dir(m3, A_CIN2), e_arrow);
+for (int i = 0; i < 3; ++i) {
+    pm_arrow(mix[i], A_IN,  0.5, lbl="in");
+    pm_arrow(mix[i], A_OUT, 0.5, out=true, lbl="out");
+}
+for (int i = 0; i < 2; ++i) {
+    pm_arrow(mix[i], A_CIN1,  0.5, "$\vdots$");
+    pm_arrow(mix[i], A_CIN2,  0.5, "$\vdots$");
+}
+pm_arrow(m3, A_COUT, 0.5, out=true, "$\vdots$");
--- a/fs-succinct/sole.asy
+++ b/fs-succinct/sole.asy
+import succinct_common;
+blocks(0 ... concat(array(6,"B"), new string[] {"...", "B", "EOF"}));
+thruarrows(0,0,6);
+thruarrows(0,7,2);
+blocks(1 ... concat(array(6, "B+1"), array(1, "..."), array(2, "B+1")) );
+mixarrows(1,0,6);
+mixarrow(1, 7);
+block(1, 9, "0");
+blocks(2,  "B", "B+3", "B-3", "B+6", "B-6", "B+9", "...", "B-i","B+j", "B-j");
+thruarrow(2, 0);
+mixarrows(2, 1, 6);
+mixarrow(2, 8);
+thruarrow(1, 9);
+blocks(3 ... concat(array(6,"B"), array(1, "..."), array(3, "B")));
+passlabel(0, "Add EOF");
+passlabel(1, "Pass 1");
+passlabel(2, "Pass 2");
--- a/fs-succinct/sole_boxes.asy
+++ b/fs-succinct/sole_boxes.asy
+import succinct_common;
+void fillbox(int col, pen pen) {
+    path p = (
+        (col*blockwidth, -rowheight-blockheight)
+        -- ((col+2)*blockwidth, -rowheight-blockheight)
+        -- ((col+2)*blockwidth, -2*rowheight)
+        -- ((col+1)*blockwidth, -2*rowheight)
+        -- ((col+1)*blockwidth, -3*rowheight)
+        -- ((col-1)*blockwidth, -3*rowheight)
+        -- ((col-1)*blockwidth, -2*rowheight-blockheight)
+        -- ((col)*blockwidth, -2*rowheight-blockheight)
+        -- cycle
+        );
+    fill(p, pen);
+    draw(p, thick);
+}
+fillbox(2, 0.5*white);
+fillbox(4, 0.75*white);
+fillbox(6, 0.5*white);
+//blocks(0 ... concat(array(8,"B"), new string[] {"..."}));
+//thruarrows(0,0,6);
+blocks(1 ... concat(array(8, "B+1"), array(1, "...")) );
+mixarrows(1,0,8);
+blocks(2,  "B", "B+3", "B-3", "B+6", "B-6", "B+9", "B-9", "...");
+thruarrow(2, 0);
+mixarrows(2, 1, 6);
+blocks(3 ... concat(array(7,"B"), array(1, "...")));
--- a/fs-succinct/sole_hilevel.asy
+++ b/fs-succinct/sole_hilevel.asy
+import succinct_common;
+real mixgrid = 2.5;
+int nmixers = 3;
+for (int i = 0; i < nmixers; ++i) {
+    real x = mixgrid * i;
+    draw((x, 1.25) -- (x, 0.5), e_arrow);
+    label((x, 1.25), "$(B+1)^2$", N);
+    mixer(x, 0);
+    draw((x, -0.5) -- (x, -1.25), e_arrow);
+    label((x, -1.25), "$B^2$", S);
+}
+string[] alphas = {"B+3", "B+6", "B+9", "B+12"};
+for (int i = 0; i < nmixers+1; ++i) {
+    carry_arrow((mixgrid * (i-1), 0), (mixgrid*i, 0), alphas[i]);
+}
--- a/fs-succinct/succinct.tex
+++ b/fs-succinct/succinct.tex
+\ifx\chapter\undefined
+\input adsmac.tex
+\singlechapter{50}
+\fi
+\input tabto.tex
+\chapter[succinct]{Space-efficient data structures}
+In this chapter, we will explore space-efficient data structures. This may
+sound like a boring topic at first -- after all, many of the commonly-used data
+sctructures have linear space complexity, which is asymptotically optimal.
+However, in this chapter, we shall use a much more fine-grained notion of space
+efficiency and measure space requirements in bits.
+Imagine we have a data structure whose size is parametrized by some parameter
+$n$ (e.g. number of elements). Let us define $X(n)$ as the universe of all possible
+values that a size-$n$ data structure (as a whole) can hold. For example if we
+have a data structure for storing strings from a fixed alphabet, $X(n)$ may be the
+universe of all length-$n$ strings from this alphabet.
+Let us denote $s(n)$ the number of bits needed to store a size-$n$ data structure.
+The information-theoretical optimum is $OPT(n) := \lceil\log |X(n)|\rceil$
+(which is essentially the entropy of a uniform distribution over $X(n)$).
+Note: We will always ignore constant additive factors, so sometimes we will use the
+definition $OPT(n) := \log |X(n)|$ (without rounding, differs by at most one from
+the original definition) interchangably.
+\defn{{\I Redundancy} of a space-efficient data structure is $r(n) := s(n) - OPT(n)$.}
+Now we can define three classes of data structures based on their fine-grained space
+efficiency:
+\defn{A data structure is
+\tightlist{o}
+\:{\I implicit} when $s(n) \le OPT(n) + \O(1)$,\tabto{7.6cm}i.e., $r(n) = \O(1)$,
+\:{\I succinct} when $s(n) \le OPT(n) + {\rm o}(OPT(n))$,\tabto{7.6cm}i.e., $r(n) = {\rm o}(OPT(n))$,
+\:{\I compact} when $s(n) \le \O(OPT(n))$.
+\endlist
+}
+A typical implicit data structure contains just its elements in some order and nothing more.
+Examples include sorted arrays and heaps.
+Note that some linear-space data structures are not even compact --  because we
+are counting bits now, not words. For example, a linked list representing a
+length-$n$ sequence of numbers from range $[m]$ needs $\O(n (\log n + \log m))$
+bits ($\log n$ bits are used to represent a next-pointer), whereas $OPT$ is $n
+\log m$. For $n \gg m$, this does not satisfy the requirements for a compact
+data structure.
+And of course, as with any data structure, we want to be able to perform reasonably
+fast operations on these space-efficient data structures.
+\section{Representation of strings over arbitrary alphabet}
+Let us consider the problem of representing a length-$n$ string over alphabet $[m]$,
+for example a string of base-10 digits. The following two naive approaches immediately
+come to mind:
+\list{(a)}
+\: Consider the whole string as one base-10 number and convert that number into binary.
+   This achieves the information-theoretically optimum size of $OPT(n) = \lceil n \log 10 \rceil
+   \approx 3.32n = \Theta(n+1)$. However, this representation does not support local decoding and
+   modification -- you must always decode and re-encode the whole string.
+\: Store the string digit-by-digit. This uses space $n \lceil \log 10 \rceil = 4n = OPT(n) + \Theta(n)$.
+   For a fixed alphabet size, this is not succinct because $\Theta(n) > o(OPT(n)) = o(n + 1)$\foot{More
+   formally, if we consider $\O$ and $o$ to be sets of functions, $\Theta(n) \cap o(n + 1) = \emptyset$.}.
+   However, we get constant-time local decoding and modification for free.
+\endlist
+We would like to get the best of both worlds -- achieve close-to-optimum space
+requirements while also supporting constant-time local decoding and modification.
+A simple solution that may work in practice is to encode the digits in groups
+(e.g. encode each 2 subsequent digits into one number from the range [100] and
+convert that number to binary).
+With groups of size $k$, we get $$s(n) = \lceil n/k \rceil \lceil k \log 10
+\rceil \le (n/k + 1)(k \log 10 + 1)  = \underbrace{n \log 10}_{OPT(n)} + n/k +
+\underbrace{k\log 10 + 1}_{\O(1)}.$$ Thus we see that with increasing $k$,
+redundancy goes down, approaching the optimum but never quite reaching it. For a
+fixed $k$ it is still linear and thus our scheme is not succinct. Also, with
+increasing $k$, local access time goes up. In practice, however, one could
+chose a good-compromise value for $k$ and happily use such a scheme.
+We will develop a succinct encoding scheme later in this chapter.
+\section{Intermezzo: Prefix-free encoding of bit strings}
+Let us forget about arbitrary alphabets for a moment and consider a different
+problem. We want to encode a binary string of arbitrary length in a way that
+allows the decoder to determine when the string ends (it can be followed by
+arbitrary other data). Furthermore, we want this to be a streaming encoding
+-- i.e., encode the string piece by piece while it is being read from the input.
+The length of the string is not known in advance -- it will only be determined
+when the input reaches its end\foot{If the length were known in advance, we could
+simply store the length using any simple variable-size number encoding, followed by the
+string data itself. This would give us $\O(\log n)$ redundancy almost for free.}
+A trivial solution might be to split the string into $b$-bit blocks and encode
+each of them into a $(b+1)$-bit block with a simple padding scheme:
+\tightlist{o}
+\: For a complete block, output its $b$ data bits followed by a one.
+\: For an incomplete final block, output its data bits, followed by a one
+   and then as many zeros as needed to reach $b+1$ bits.
+\: If the final block is complete (input length is divisible by $b$), we must
+   add an extra padding-only block (one followed by $b$ zeros) to signal the
+   end of the string.
+\endlist
+The redundancy of such encoding is at most $n/b + b + 1$ (one bit per block,
+$b+1$ for extra padding block). For a fixed $b$, this is $\Theta(n)$, so the
+scheme is not succinct.
+\subsection{SOLE (Short-Odd-Long-Even) Encoding}
+In this section we will present a more advanced prefix-free string encoding
+that will be succinct.
+First, we split the input into $b$-bit blocks. We will add a padding in the
+form of $10\cdots0$ at the end of the last block to make it $b$ bits long.
+If the last block was complete, we must add an extra padding-only block to
+make the padding scheme reversible.
+Now we will consider each block as a single character from the alphabet $[B]$,
+where $B:=2^b$. Then we shall extend this alphabet by adding a special EOF
+character. We will add this character at the end of encoding. This gives us
+a new string from the alphabet $[B+1]$ that has length at most $n/b + 2$
+($+1$ for padding, $+1$ for added EOF character).
+However, as $B+1$ is not a power of two, now we have a question of how to
+encode this string. Note that this is a special case of the problem stated
+above, i.e. encoding a string from an arbitrary alphabet. We will try to solve
+this special case as a warm-up and then move on to a fully general solution.
+First, we need to introduce a new concept: re-encoding character pairs into
+different alphabets. Let's assume for example, that we have two characters from
+alphabets [11] and [8], respectivelly. We can turn them into one character from
+the alphabet [88] (by the simple transformation of $8x + y$). We can then split
+that character again into two in a different way. For example into two characters
+from alphabets [9] and [10]. This can be accomplished by simple division with
+remainder: if the original character is $z\in [88]$, we transform in into
+$\lfloor z / 10\rfloor$ and $(z \bmod 10)$. For example, if we start
+with the characters 6 and 5, they first get combined to form $6\cdot 8 + 5 = 53$
+and then split into 5 and 3.
+We can think of these two steps as a single transformation that takes
+two characters from alphabets [11] and [8] and transforms them into
+two characters from alphabets [9] and [10]. More generally, we can
+always transform a pair of characters from alphabets $[A]$ and $[B]$
+into a pair from alphabets $[C]$ and $[D]$ as long as $C\cdot D
+\ge A \cdot B$ (we need an output universe large enough to hold all
+possible input combinations).
+We will use this kind of alphabet re-encoding by pairs heavily in the SOLE
+encoding. The best way to explain the exact scheme is with a diagram (fig. \figref{sole}).
+\figure[sole]{sole.pdf}{}{SOLE alphabet re-encoding scheme}
+There are two re-encoding phases. The first transforms blocks with alphabet
+$[B+1]$ into blocks with variable alphabet sizes (of the form of alternating
+$[B+3k]$, $[B-3k]$). This is the origin of the name: after this pass,
+odd-numbered blocks have smaller alphabets than even-numbered ones. The second
+pass runs phase shifted by one block and converts the variable-alphabet blocks
+into blocks with alphabet $[B]$.
+What is the redundancy of this scheme?  Let us count how the number of blocks
+increases throughout the encoding passes:
+\tightlist{o}
+\: If the original length was a multiple of $b$, we must add one block to complete padding.
+\: We always add one block with EOF character.
+\: Before the first pass, we may need to add an extra padding block to make number of blocks even
+   (not shown in fig. \figref{sole}).
+\: Before the second pass, we always add an extra padding block to make number of blocks odd.
+\endlist
+In total, we add at most 4 blocks. Thus $r(n) \le 4b$.
+For the scheme to work, we need to set $b \ge 2\log n + 2$ (so
+$B \ge 4n^2$). This gives us redundancy $r(n) = \O(\log n)$. Thus
+we have a succinct scheme. Also, one block fits into $\O(1)$ words
+on a RAM, so we can do constant-time arithmetic on the blocks.
+Note that this representation is locally decodable and modifiable -- each input
+block affects at most 4 output blocks.
+Now we must check that all the alphabet transformations are valid, i.e., the
+output universe of each transformation is always at least as big as the input
+universe.
+For the first pass, we want:
+$$\eqalign{
+(B+1)^2      &\le (B-3i)(B+3i+3)\cr
+B^2 + 2B + 1 &\le B^2 + 3B - 9i^2 - 9i\cr
+B            &\ge 9i^2 + 9i + 1\cr
+}$$
+We know $B \ge 4n^2$ and $i \le {n+1\over 2}$. By plugging $i = {n+1\over 2}$
+and $B=4n^2$ and doing some algebraic manipulation, we can verify that the
+inequality holds. For smaller $i$ the right-hand side decreases so it holds
+for those too.
+For the second pass, this is trivial, as $(B+i)(B-i) = B^2 - i^2 \le B^2$.
+\section{Mixers as a building block for succinct structures}
+\subsection{A reinterpretation of the SOLE encoding}
+There is another way of looking at the SOLE encoding from the previous section.
+We can group the alphabet translations into ``encoding boxes'' that take input
+from the alphabet $(B+1)^2$, output the alphabet $B^2$ and the part of the
+information that did not fit into the output is passed as a
+``carry''\foot{Sometimes the alternative term {\it spill} is used instead.} to
+the next encoding box (similarly to how carrying works when doing addition).
+See fig. \figref{sole_boxes}. We will also call these boxes {\it mixers}.
+\figure[sole_boxes]{sole_boxes.pdf}{}{SOLE interpreted as a chain of mixers}
+The start and end of the encoding are irregular, but we will ignore that for now.
+An important property of these boxes is that outgoing carry does not depend on incoming
+carry (unlike in addition). This allows for local decoding and modification. Otherwise
+a single input change could affect the whole output. Now we can describe this scheme
+in a more abstract, high-level way (fig. \figref{sole_hilevel}).
+\figure[sole_hilevel]{sole_hilevel.pdf}{}{SOLE high-level mixer diagram}
+In our case, the input alphabet size is always $(B+1)^2$, the output alphabet size
+is $B^2$ and the carry alphabet sizes form the sequence $B+3i$. Given that the output
+alphabet is smaller than the input alphabet, it makes sense that the carry alphabet
+has to increase in size to accomodate the accumulating information that did not fit
+into the output. The final carry is then used to output some extra blocks at the end.
+\subsection{Generalizing the mixer concept}
+\figure[mixer]{mixer.pdf}{}{General structure of a mixer}
+At a high level, a mixer can be thought of as a mapping $f: [X]\times[Y] \rightarrow [2^M]\times[S]$
+with the property that when $(m,s) = f(x,y)$, $s$ depends only on $x$. This is the key property
+that allows local decoding and modification because carry does not cascade.
+Internally, the a mixer is
+always implemented as a composition of two mappings, $f_1$ that transforms $x \rightarrow
+(c,s)$ and $f_2$
+that transforms $(y,c) \rightarrow m$. See fig. \figref{mixer}. Both $f_1$ and $f_2$ must be injective
+so that the encoding is reversible.
+The mappings $f_1$ and $f_2$ themselves are trivial alphabet translations similar to what we
+used in the SOLE encoding. You can for example use $f_1(x) = (\lceil x/S \rceil, x \bmod S)$
+and $f_2(y,c) = c\cdot Y + y$.
+Thus implementing the mixer is simple as long as the parameters allow its existence. A mixer
+with parameters $X$, $Y$, $S$, $M$ can exist if and only if there exists $C$ such that
+$S\cdot C \ge X$ and $C\cdot Y \le 2^M$ (once again, the alphabet translations need their
+range to be as large as their domain in order to work).
+\lemma{
+A mixer $f$ has the following properties (as long as all inputs and outputs fit into a constant
+number of words):
+\tightlist{o}
+\: $f$ can be computed on a RAM in constant time
+\: $s$ depends only on $x$, not $y$
+\: $x$ can be decoded given $m$, $s$ in constant time
+\: $y$ can be decoded given $m$ in constant time
+\endlist
+}
+All these properties should be evident from the construction.
+\defn{The redundancy of a mixer is $$r(f) := \underbrace{M + \log S}_{\hbox{output entropy}}  - \quad \underbrace{(\log X + \log Y)}_{\hbox{input entropy}}.$$
+In general, the redundancy of a mapping (with possibly multiple inputs and multiple outputs) is the sum of the logs of the
+output alphabet size, minus the sum of the logs of the input alphabet sizes. Note that there is no rounding (because the inputs and
+outputs can be from arbitrary alphabets, not necessarily binary) and the redundancy can be non-integer. Compare this to the concept
+of redundancy for space-efficient datastructures defined above.}
+\subsection{On the existence of certain kinds of mixers}
+Now we would like to show that mixers with certain parameters do exist.
+\lemma{For $X,Y$ there exists a mixer $f: [X]\times[Y] \rightarrow [2^M]\times[S]$
+such that:
+\tightlist{o}
+\: $S = \O(\sqrt{X})$, $2^M = \O(Y\cdot\sqrt{X})$
+\: $r(f) = \O(1/\sqrt{X})$
+\endlist
+}
+\proof{
+First, let's assume we have chosen an $M$ (which we shall do later). Then we
+want to set $C$ so that it satisfies the inequality $C \cdot Y \le 2^M$. Basically
+we are asking the question how much information can we fit in $m$ in addition to
+the whole of $y$. Clearly we want $C$ to be as high as possibly, thus we set
+$C := \lfloor 2^M / Y \rfloor$.
+Now let us calculate the redundancy. First we shall note that we can compute redundancy
+for $f_1$ and $f_2$ separately and add them up:
+$$\eqalign{r(f) &= M + \log S - \log X - \log Y \cr
+&= \left(M - \log C - \log Y\right) + \left(\log C + \log S - \log X\right)\cr
+&= r(f_2) + r(f_1)}$$
+}
+This is just a telescopic sum. It works similarly for more complex mapping compositions:
+as long as each intermediate result is used only once as an input to another mapping, you
+can just sum the redundancies of all the mappings involved.
+\figure[composition]{composition.pdf}{}{Mapping composition}
+For example, if you have a mapping composition as in fig. \figref{composition},
+you can easily see $r(g) = r(g_1) + r(g_2) + r(g_3)$. For every edge fully inside
+the composition, the same number is added once and subtracted once.
+First, we shall estimate $r(f_2)$:
+$$\eqalign{r(f_2) &= M - \log(Y\cdot C)= M - \log(\overbrace{Y\cdot \lfloor 2^M / Y \rfloor}^{\ge 2^M - Y})\cr 
+r(f_2) &\le M - \log(2^M-Y)= \log{2^M\over 2^M-Y} = \log{1 \over 1-{Y \over 2^M}}}$$
+Now we shall use a well-known inequality form analysis:
+$$\eqalign{
+e^x &\ge 1+x\cr
+x &\ge \log(1+x)\cr
+-x &\le \log{1 \over 1+x}}$$
+By substituting $x \rightarrow -x$ we get:
+$$x \ge \log{1 \over 1-x}$$
+Thus
+$$r(f_2) \le {Y\over 2^M} = \O\left({1 \over C}\right)$$
+Now to $r(f_1)$:
+$$\eqalign{
+r(f_1) &= \log C + \log S - \log X = \log C + \log \left\lceil {X\over C}\right\rceil - \log X
+= \log\left({C\left\lceil{X \over C}\right\rceil \over X}\right)\cr
+r(f_1) &\le \log\left({X+C \over X}\right) = \log\left(1 + {C\over X}\right) \le {C \over X}\qquad\hbox{(because $\log(x) \le x-1$)}
+}$$
+Putting this together:
+$$r(f) = r(f_1) + r(f_2) \le \O\left({1 \over C} + {C \over X}\right)$$
+In order to minimize this sum, we should set $C = \Theta\left(\sqrt{X}\right)$. Then
+$r(f) = \O\left({1/\sqrt{X}}\right)$ and $S = \left\lceil{X \over \Theta(\sqrt{X})}\right\rceil = \Theta\left(\sqrt{X}\right)$
+as promised. Note that this holds for any value of $Y$.
+However, we cannot freely set $C$, as we have already decided that $C := \lfloor 2^M / Y \rfloor$.
+Instead, we need to set a value for $M$ that gives us the right $C$.
+The whole mixer parameter selection process could be as follows
+(it may be useful to refer back to fig. \figref{mixer}):
+\tightlist{n.}
+\: We are given $X$, $Y$ as parameters.
+\: Set $M := \left\lceil\log\left(Y\sqrt{X}\right)\right\rceil$.
+\: Set $C := \left\lfloor 2^M / Y \right\rfloor$. This ensures that $2^M \ge C\cdot Y$ and gives us $C = \Theta\left(\sqrt{X}\right)$.
+\: Set $S := \left\lceil X / C \right\rceil$. This ensures that $C\cdot S \ge X$ and gives us $S =  \Theta\left(\sqrt{X}\right)$.
+\endlist
+All the inequalities required for mixer existence are satisfied and based on the analysis
+above the parameters satisfy what our lemma promised.
+\qed
+\section{Succinct representation of arbitrary-alphabet strings}
+\subsection{A naive first try}
+We would like to use mixers to encode string from an arbitrary alphabet into
+the binary alphabet.  Let's assume we have a string $A \in [\Sigma]^n$.
+We shall split it into some blocks of size $k$, which gives us a block alphabet
+$[X] = [\Sigma^k]$. Then we could use a mixer chain as in fig. \figref{mixer_chain},
+similar to what we did in the SOLE encoding.
+\figure[mixer_chain]{mixer_chain.pdf}{}{Mixer chain for string encoding}
+The intuition behind this is simple: whatever part of $X$ did not fit into a
+whole number of bits is sent on as carry and whenever a whole extra bit of
+information has accumulated in the chain, it can be output. The final carry
+is output at the end using the neccessary number of bits. Here we don't mind
+rounding because it is an additive constant.
+Everything is also locally decodable and modifiable -- to decode $i$-th input
+block, you only need $i$-th and $(i+1)$-st output blocks. And vice versa, you
+only need modify these two output blocks after changing the $i$-th input block.
+Now we just need to set $k$ and calculate redundancy. It will be useful to
+set $k \approx 2\log_\Sigma n$. Then $X \approx n^2$ and by previous lemmas,
+$Y_i \in \O(n)$ and redundancy of the mixers is $\O(1/n)$. As there is less
+than $n$ mixers, the total redundancy is $\O(1)$.
+That all sounds wonderful. However, there is one serious problem. Each of the
+mixers will have differrent parameters ($Y_i$, $M_i$, $S_i=Y_{i+1}$). In order
+to compute the parameters for $i$-th mixer, we need to know the parameters for
+the $(i - 1)$-st, namely the $Y_i=S_{i-1}$. For that, we need the $(i-2)$-nd and
+so on...
+If we did encoding / decoding in a streaming fashion, this would not matter --
+we could compute the mixer parameters one by one as we go.
+But if we wish for random access in constant time, we would need to store a table
+of all the mixer parameters -- i.e., a table with $\Theta(n/\log_\Sigma n)$ rows.
+That is impractical.
+Note that this was not an issue for sole as there the $Y_i$'s formed an arithmetic
+sequence. They weren't even the optimal $Y_i$'s that would be created by the generic
+mixer construction but a close enough approximation that still yielded good results,
+up to an additive constant. That was a special case -- in general, we do now know
+how to approximate the mixer parameters by something easier to compute locally.
+\subsection{A tree encoding to the rescue}
+To remedy the situation, instead of a chain, we will organize mixers into a
+binary tree.  Each vertex will contain one mixer whose carry output goes to its
+parent (thus most vertices receive two carry inputs but it is trivial to
+combine them into one). This is depicted in fig. \figref{mixer_tree}. Now we
+need $Y\cdot Z \cdot C \le 2^M$.
+\figure[mixer_tree]{mixer_tree.pdf}{}{A single mixer vertex and the organization of those into a tree}
+Then you can create a linear order on the vertices (e.g. by layers
+bottom-to-top), split the input string into blocks and feed the blocks through
+the mixer vertices in this order and save the corresponding outputs in the same
+order.
+Note that this scheme still has all the nice properties, for example it is locally
+decodable. To decode a vertex's input, you only need the output of that vertex and
+its parent.
+But how does a tree help us determine individual mixer parameters more easily?
+The parameters of a mixer in a vertex are uniquely determined by the shape of the
+subtree under that vertex. This is easily seen by induction: all leaves have the
+same parameters (as they have dummy carry-in alphabets of size 1) and the parameters
+of any vertex are determined by the parameters of its children.
+We will use the same tree shape as for binary heaps: all the levels are full, except
+for possibly the last and in the last level all the vertices in one contiguous segement
+starting at the very left.
+Now let us consider a level at height $h$ (from the bottom). There are at most three
+three vertex types by subtree shape and they appear on the level in a specific order:
+\tightlist{n.}
+\: a contiguous segment of vertices with full subtrees of height $h$ (type A)
+\: one vertex with an irregular subtree (type B)
+\: a contiguous segment of vertices with full subtrees of height $h-1$ (type C)
+\endlist
+See fig. \figref{tree_shapes}. If the last level happens to be full, there are only
+type-A vertices.
+\figure[tree_shapes]{tree_shapes.pdf}{}{Vertex types by subtree shape}
+Thus, for each level and each vertex type, it is sufficient to remember:
+\tightlist{o}
+\: Number of vertices of this type on this level. From this, we can easily determine
+   vertex type from its index by simple comparison.
+\: Mixer parameters.
+\: Starting address of the output of first vertex of this type in the output stream.
+   From this, we can easily compute starting address of any vertex by simple addition
+   and multiplication as all vertices of a given type on a given level have the same
+   number of output bits (parameter $M$). This will be useful for local decoding.
+\endlist
+This a precomputed table of $\O(\log n)$ words.
+Block size and redundancy computation is exactly the same as in the chain case and
+we still get $\O(1)$ redundancy. The chain can be thought of as a degenerate case
+of the tree construction where the tree has the shape of a path (and thus all subtrees
+have distinct shapes and distinct mixer parameters).
+Local decoding of $i$-th input block could be done as follows:
+\tightlist{o}
+\: Convert block index into a position in the tree (level + index on level)
+\: Determine the vertex type and mixer parameters, compute position in output stream and extract the
+   corresponding output $m \in 2^M$
+\: Do the same for the parent vertex
+\: Using the parent mixer, decode the carry going up from our vertex
+\: Using our mixer, decode the original input block from our output and carry
+\endlist
+Local modification can be done in a similar fashion the other way around. Both take
+$\O(1)$ time on RAM.
+\theorem{
+On a Word-RAM, we can represent a string $A \in [\Sigma]^n$ in space $\lceil n \log \Sigma \rceil + \O(1)$ bits,
+with random-access element read and write operations in $\O(1)$ time, using a precomputed table of
+$\O(\log n)$ constants dependent on $n$ and $\Sigma$.
+}
+\endchapter
--- a/fs-succinct/succinct_common.asy
+++ b/fs-succinct/succinct_common.asy
+import ads;
+real blockwidth = 1;
+real blockheight = 0.5;
+real arrowheight = 1;
+real rowheight =  blockheight + arrowheight;
+void mixarrow(int row, int col) {
+    real x = col * blockwidth;
+    real y = -row * rowheight - blockheight;
+    path arr1 = (x + blockwidth/2, y) {S} .. {S} (x + blockwidth, y - arrowheight / 2) {S} .. {S} (x + 1.5*blockwidth, y - arrowheight); 
+    path arr2 = reflect((x + blockwidth, 0), (x+blockwidth, 1)) * arr1;
+    draw(arr1, Arrow);
+    draw(arr2, Arrow);
+}
+void thruarrow(int row, int col) {
+    real x = col * blockwidth;
+    real y = -row * rowheight - blockheight;
+    draw((x+blockwidth/2, y)--(x+blockwidth/2, y-arrowheight), Arrow);
+}
+void block(int row, int col, string alphabet) {
+    real xbase = col * blockwidth;
+    real ybase = -row * rowheight;
+    if (alphabet == "...") {
+        label("$\cdots$", (xbase+blockwidth/2, ybase-blockheight/2), (0,0));
+    } else {
+        if (alphabet != "EOF" && alphabet != "0")
+        draw((xbase,ybase)--(xbase+blockwidth, ybase)--(xbase+blockwidth, ybase-blockheight)--(xbase, ybase-blockheight)--cycle);
+        label("$"+alphabet+"$", (xbase+blockwidth/2, ybase-blockheight/2), (0,0));
+    }
+}
+void blocks(int row ... string alphabets[]) {
+    for (int i = 0; i < alphabets.length; ++i) {
+        block(row, i, alphabets[i]);
+    }
+}
+void thruarrows(int row, int col, int cnt) {
+    for (int i = 0; i < cnt; ++i)
+        thruarrow(row, col+i);
+}
+void mixarrows(int row, int col, int cnt) {
+    for (int i = 0; i < cnt; i += 2)
+        mixarrow(row, col+i);
+}
+void passlabel(int row, string lbl) {
+    label("{\it " + lbl + "}", (-1, -row*rowheight-blockheight - arrowheight/2), W);
+}
+void mixer(real x, real y, real r=0.5) {
+    draw((x-r,y-r) -- (x+r,y-r)--(x+r, y+r) -- (x-r, y+r) -- cycle, halfthick);
+    draw( (x,y+r) {S} .. {E} (x+r,y), 0.5*white);
+    draw( (x,y+r) -- (x,y-r), 0.5*white);
+    draw( (x-r,y) {E} .. {S} (x,y-r), 0.5*white);
+}
+real PENTAMIXER_R  = 1;
+pair pm_dir(pair c, int angle, real r=PENTAMIXER_R) {
+    return shift(c)*scale(r)*dir(angle);
+}
+int A_IN = 180, A_OUT=0, A_COUT=90, A_CIN1=240, A_CIN2=300;
+void pentamixer(pair c, real r=PENTAMIXER_R) {
+    path unitcircle=E..N..W..S..cycle;
+    pair d(int angle) {
+        return pm_dir(c, angle, r);
+    }
+    draw(shift(c)*scale(r)*unitcircle);
+    draw( d(A_IN)--d(A_OUT), 0.5*white);
+    draw( d(A_IN){E}..{N}d(A_COUT), 0.5*white);
+    draw( d(A_CIN1){dir(60)}..{E}d(A_OUT), 0.5*white);
+    draw( d(A_CIN2){dir(120)}..{E}d(A_OUT), 0.5*white);
+}
+void pm_arrow(pair c, int angle, real length, bool out=false, real r=PENTAMIXER_R, string lbl="") {
+    pair p1 = pm_dir(c, angle, r);
+    pair p2 = pm_dir(c, angle, r+length);
+    if (out)
+        draw(p1--p2, e_arrow);
+    else
+        draw(p2--p1, e_arrow);
+    if (lbl != "")
+        label(lbl, p2, dir(angle));
+}
+void carry_arrow(pair mix1, pair mix2, string alphabet) {
+    draw(mix1 + (0.5,0) -- mix2 - (0.5, 0), e_arrow);
+    label((mix1+mix2)/2, "$"+alphabet+"$", N);
+}
--- a/fs-succinct/tree_shapes.asy
+++ b/fs-succinct/tree_shapes.asy
+import succinct_common;
+draw((-3,-2) -- (3,-2), 0.3*white);
+draw((-3.2,-4)--(-3.2,-2), Arrows);
+label((-3.2, -3), "$h$", W);
+draw((3.2,-3.5)--(3.2,-2), Arrows);
+label((3.2, -2.75), "$h-1$", E);
+void subtree(path p) {
+    filldraw(p, 0.65*white);
+}
+subtree((-1.75, -4)--(-0.75,-4)--(-1.25,-2)--cycle);
+subtree((-0.5, -4)--(0,-4)--(0,-3.5)--(0.5,-3.5)--(0,-2)--cycle);
+subtree((1.75, -3.5)--(0.75,-3.5)--(1.25,-2)--cycle);
+label((-1.25, -3), "A");
+label((-0, -3), "B");
+label((1.25, -3), "C");
+draw((-3, -4) -- (0,-4) -- (0,-3.5) -- (3,-3.5) -- (0, 0) -- cycle, halfthick);