diff --git a/fs-succinct/Makefile b/fs-succinct/Makefile index 08ab41e0f244a1f93051c26a871e87acf61334c2..cbece957916d94d3227c910caa2aedf592e80c46 100644 --- a/fs-succinct/Makefile +++ b/fs-succinct/Makefile @@ -1,5 +1,5 @@ TOP=.. -PICS=sole sole_boxes sole_hilevel mixer composition +PICS=sole sole_boxes sole_hilevel mixer composition mixer_chain mixer_tree include ../Makerules @@ -7,3 +7,5 @@ include ../Makerules sole.pdf:: succinct_common.asy sole_boxes.pdf:: succinct_common.asy sole_hilevel.pdf:: succinct_common.asy +mixer_chain.pdf:: succinct_common.asy +mixer_tree.pdf:: succinct_common.asy diff --git a/fs-succinct/mixer_chain.asy b/fs-succinct/mixer_chain.asy new file mode 100644 index 0000000000000000000000000000000000000000..39327d52806c784701c94323f1fae27d6d9b8433 --- /dev/null +++ b/fs-succinct/mixer_chain.asy @@ -0,0 +1,27 @@ +import succinct_common; + +real mixgrid = 2.5; +int nmixers = 5; + +for (int i = 0; i < nmixers; ++i) { + real x = mixgrid * i; + if (i == 3) { + label((x,0), "$\cdots$"); + } else { + draw((x, 1.25) -- (x, 0.5), e_arrow); + label((x, 1.25), "$X$", N); + mixer(x, 0); + draw((x, -0.5) -- (x, -1.25), e_arrow); + label((x, -1.25), (i == 4) ? "$2^{M_n}$" : "$2^{M_"+((string)(i+1))+"}$", S); + } +} + +string[] alphas = {"", "Y_1", "Y_2", "Y_3", "Y_{N-1}", "Y_N"}; + +for (int i = 1; i < nmixers; ++i) { + carry_arrow((mixgrid * (i-1), 0), (mixgrid*i, 0), alphas[i]); +} + +pair endb = (mixgrid * (nmixers-1), 0) + (0.5,0); +draw(endb -- endb + (0.5,0) {E} .. {S} endb + (1.5,-1) -- endb + (1.5,-1.25), e_arrow); +label(endb + (1.5,-1.25), "$2^{M_*}$", S); diff --git a/fs-succinct/mixer_tree.asy b/fs-succinct/mixer_tree.asy new file mode 100644 index 0000000000000000000000000000000000000000..1bf91def0ff9300c459cfd9879f7fb013f44d3fd --- /dev/null +++ b/fs-succinct/mixer_tree.asy @@ -0,0 +1,31 @@ +import succinct_common; + +pair C = (0,0); +PENTAMIXER_R = 0.75; +pentamixer(C); +pm_arrow(C, A_IN, 1, lbl="\vbox{\hbox{$x\in[X]$}\hbox{\eightrm (input)}}"); +pm_arrow(C, A_CIN1, 1, lbl="\vbox{\hbox{$y\in[Y]$}\hbox{\eightrm (carry in 1)}}"); +pm_arrow(C, A_CIN2, 1, lbl="\vbox{\hbox{$z\in[Z]$}\hbox{\eightrm (carry in 2)}}"); +pm_arrow(C, A_OUT, 1, out=true, lbl="\vbox{\hbox{$m\in[2^M]$}\hbox{\eightrm (output)}}"); +pm_arrow(C, A_COUT, 1, out=true, lbl="\vbox{\hbox{$s\in[S]$}\hbox{\eightrm (carry out)}}"); + +PENTAMIXER_R = 0.5; + +pair m1 = (5, -1); +pair m2 = (9, -1); +pair m3 = (7, 1.5); +pair mix[] = {m1, m2, m3}; +pentamixer(m1); +pentamixer(m2); +pentamixer(m3); +draw(pm_dir(m1, A_COUT)--pm_dir(m3, A_CIN1), e_arrow); +draw(pm_dir(m2, A_COUT)--pm_dir(m3, A_CIN2), e_arrow); +for (int i = 0; i < 3; ++i) { + pm_arrow(mix[i], A_IN, 0.5, lbl="in"); + pm_arrow(mix[i], A_OUT, 0.5, out=true, lbl="out"); +} +for (int i = 0; i < 2; ++i) { + pm_arrow(mix[i], A_CIN1, 0.5, "$\vdots$"); + pm_arrow(mix[i], A_CIN2, 0.5, "$\vdots$"); +} +pm_arrow(m3, A_COUT, 0.5, out=true, "$\vdots$"); diff --git a/fs-succinct/succinct.tex b/fs-succinct/succinct.tex index 03c46d3145f5b47fa6657717e934f63a6955271d..58bcc52f4ca5c34cc6ef3875c99f02dd7d010949 100644 --- a/fs-succinct/succinct.tex +++ b/fs-succinct/succinct.tex @@ -345,7 +345,72 @@ The whole mixer parameter selection process could be as follows All the inequalities required for mixer existence are satisfied and based on the analysis above the parameters satisfy what our lemma promised. \qed - \section{Succinct representation of arbitrary-alphabet strings} +\subsection{A naive first try} + +We would like to use mixers to encode string from an arbitrary alphabet into +the binary alphabet. Let's assume we have a string $A \in [\Sigma]^n$. +We shall split it into some blocks of size $k$, which gives us a block alphabet +$[X] = [\Sigma^k]$. Then we could use a mixer chain as in fig. \figref{mixer_chain}, +similar to what we did in the SOLE encoding. + +\figure[mixer_chain]{mixer_chain.pdf}{}{Mixer chain for string encoding} + +The intuition behind this is simple: whatever part of $X$ did not fit into a +whole number of bits is sent on as carry and whenever a whole extra bit of +information has accumulated in the chain, it can be output. The final carry +is output at the end using the neccessary number of bits. Here we don't mind +rounding because it is an additive constant. + +Everything is also locally decodable and modifiable -- to decode $i$-th input +block, you only need $i$-th and $(i+1)$-st output blocks. And vice versa, you +only need modify these two output blocks after changing the $i$-th input block. + +Now we just need to set $k$ and calculate redundancy. It will be useful to +set $k \approx 2\log_\Sigma n$. Then $X \approx n^2$ and by previous lemmas, +$Y_i \in \O(n)$ and redundancy of the mixers is $\O(1/n)$. As there is less +than $n$ mixers, the total redundancy is $\O(1)$. + +That all sounds wonderful. However, there is one serious problem. Each of the +mixers will have differrent parameters ($Y_i$, $M_i$, $S_i=Y_{i+1}$). In order +to compute the parameters for $i$-th mixer, we need to know the parameters for +the $(i - 1)$-st, namely the $Y_i=S_{i-1}$. For that, we need the $(i-2)$-nd and +so on... + +If we did encoding / decoding in a streaming fashion, this would not matter -- +we could compute the mixer parameters one by one as we go. +But if we wish for random access in constant time, we would need to store a table +of all the mixer parameters -- i.e., a table with $\Theta(n/\log_\Sigma n)$ rows. +That is impractical. + +Note that this was not an issue for sole as there the $Y_i$'s formed an arithmetic +sequence. The weren't even the optimal $Y_i$'s that would be created by the generic +mixer construction but a close enough approximation that still yielded good results, +up to an additive constant. That was a special case -- in general, we do now know +how to approximate the mixer parameters by something easier to compute locally. + +\subsection{A tree encoding to the rescue} + +To remedy the situation, instead of a chain, we will organize mixers into a +binary tree. Each vertex will contain one mixer whose carry output goes to its +parent (thus most vertices receive two carry inputs but it is trivial to +combine them into one). This is depicted in fig. \figref{mixer_tree}. Now we +need $Y\cdot Z \cdot C \le 2^M$. + +\figure[mixer_tree]{mixer_tree.pdf}{}{Mixer tree for string encoding} + +Then you can create a linear order on the vertices (e.g. by layers +bottom-to-top), split the input string into blocks and feed the blocks through +the mixer vertices in this order and save the corresponding outputs in the same +order. + +Note that this scheme still has all the nice properties, for example it is locally +decodable. To decode a vertex's input, you only need the output of that vertex and +its parent. + +We will use the same tree shape as for binary heaps: all the levels are full, except +for possibly the last and in the last level all the vertices in one contiguous segement +starting at the very left. + \endchapter diff --git a/fs-succinct/succinct_common.asy b/fs-succinct/succinct_common.asy index 17d410b9fdeb1e913fb94a677bc014fd779c136b..974355bbe55542f7385b968d8e9f2c8b8cc3d11d 100644 --- a/fs-succinct/succinct_common.asy +++ b/fs-succinct/succinct_common.asy @@ -62,6 +62,36 @@ void mixer(real x, real y, real r=0.5) { draw( (x-r,y) {E} .. {S} (x,y-r), 0.5*white); } +real PENTAMIXER_R = 1; + +pair pm_dir(pair c, int angle, real r=PENTAMIXER_R) { + return shift(c)*scale(r)*dir(angle); +} +int A_IN = 180, A_OUT=0, A_COUT=90, A_CIN1=240, A_CIN2=300; +void pentamixer(pair c, real r=PENTAMIXER_R) { + path unitcircle=E..N..W..S..cycle; + pair d(int angle) { + return pm_dir(c, angle, r); + } + draw(shift(c)*scale(r)*unitcircle); + draw( d(A_IN)--d(A_OUT), 0.5*white); + draw( d(A_IN){E}..{N}d(A_COUT), 0.5*white); + draw( d(A_CIN1){dir(60)}..{E}d(A_OUT), 0.5*white); + draw( d(A_CIN2){dir(120)}..{E}d(A_OUT), 0.5*white); +} + + +void pm_arrow(pair c, int angle, real length, bool out=false, real r=PENTAMIXER_R, string lbl="") { + pair p1 = pm_dir(c, angle, r); + pair p2 = pm_dir(c, angle, r+length); + if (out) + draw(p1--p2, e_arrow); + else + draw(p2--p1, e_arrow); + if (lbl != "") + label(lbl, p2, dir(angle)); +} + void carry_arrow(pair mix1, pair mix2, string alphabet) { draw(mix1 + (0.5,0) -- mix2 - (0.5, 0), e_arrow); label((mix1+mix2)/2, "$"+alphabet+"$", N);