diff --git a/fs-succinct/Makefile b/fs-succinct/Makefile index ba6c63ec5ded7d730418a9f14feceb7ebe02fa5e..0a96cb47914521b9d2e1689ee0a133021bb745cc 100644 --- a/fs-succinct/Makefile +++ b/fs-succinct/Makefile @@ -1,3 +1,4 @@ TOP=.. +PICS=sole sole_even include ../Makerules diff --git a/fs-succinct/sole.asy b/fs-succinct/sole.asy new file mode 100644 index 0000000000000000000000000000000000000000..94295808bd8b55700e8187a33552a0bd4e904b5d --- /dev/null +++ b/fs-succinct/sole.asy @@ -0,0 +1,18 @@ +import sole_common; +blocks(0 ... concat(array(6,"B"), new string[] {"...", "B", "EOF"})); +thruarrows(0,0,6); +thruarrows(0,7,2); +blocks(1 ... concat(array(6, "B+1"), array(1, "..."), array(2, "B+1")) ); +mixarrows(1,0,6); +mixarrow(1, 7); +block(1, 9, "0"); +blocks(2, "B", "B+3", "B-3", "B+6", "B-6", "B+9", "...", "B-i","B+j", "B-j"); +thruarrow(2, 0); +mixarrows(2, 1, 6); +mixarrow(2, 8); +thruarrow(1, 9); +blocks(3 ... concat(array(6,"B"), array(1, "..."), array(3, "B"))); + +passlabel(0, "Add EOF"); +passlabel(1, "Pass 1"); +passlabel(2, "Pass 2"); diff --git a/fs-succinct/sole_even.asy b/fs-succinct/sole_even.asy new file mode 100644 index 0000000000000000000000000000000000000000..69125af591e2b115931018a77641f64a982f5e2d --- /dev/null +++ b/fs-succinct/sole_even.asy @@ -0,0 +1,8 @@ +import sole_common; + +blocks(0, "...", "B", "B", "EOF"); +mixarrow(0, 1); +thruarrow(0,3); +blocks(1, "...", "B+i", "B-i", "B+j"); +mixarrows(1, 0, 4); +blocks(2, "...", "B", "B", "B"); diff --git a/fs-succinct/succinct.tex b/fs-succinct/succinct.tex index f856197852e33d578e63e9602e2bfbadcfa7df2f..15991bc94a14b304fb7ca98f962621b216afe2f2 100644 --- a/fs-succinct/succinct.tex +++ b/fs-succinct/succinct.tex @@ -156,6 +156,34 @@ possible input combinations). We will use this kind of alphabet re-encoding by pair heavily in the SOLE encoding. The best way to explain the exact scheme is with a diagram: +\figure[sole]{sole.pdf}{}{SOLE alphabet re-encoding scheme} + +There are two re-encoding phases. The first transforms blocks with alphabet +$[B+1]$ into blocks with variable alphabet sizes (of the form of alternating +$[B+3k]$, $[B-3k]$). The second phase runs phase shifted by one block and converts +the variable-alphabet blocks into blocks with alphabet $[B]$. + +What is the redundancy of this scheme? That depends on whether the original +number of blocks (after padding) is even or odd. Figure \figref{sole} showed +the case for an odd number. For an even number, the ending is a little bit different +(fig. \figref{sole_even}). + +You can easily check that this is reversible: after decoding phase 2, the last block +will always be either 0 (for the odd case) or EOF (for the even case). + +\figure[sole_even]{sole_even.pdf}{}{SOLE alphabet re-encoding scheme} + +Now we can finally analyze redundancy. Let us count how the number of blocks +increases throughout the encoding passes. +\tightlist{o} +\: If the original length was a multiple of $b$, we must add one block to complete padding. +\: We always add one block with EOF character. +\: In the first pass, number of blocks does not increase. +\: In the second pass, we may need to add an extra padding block to make number of blocks odd. +\endlist +In total, we add at most 3 blocks. Thus $r(n) \le 3B$. That is a constant +and thus we have a succinct scheme. + \section{Succinct representation of arbitrary-alphabet strings}