From 472c5af51e47547085b1677edac9def3c639f870 Mon Sep 17 00:00:00 2001
From: Jiri Kalvoda <jirikalvoda@kam.mff.cuni.cz>
Date: Mon, 6 May 2024 20:07:01 +0200
Subject: [PATCH] =?UTF-8?q?prace:=20P=C5=99id=C3=A1n=C3=AD=20tabulek?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 prace/bakalarka/index.md | 57 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/prace/bakalarka/index.md b/prace/bakalarka/index.md
index 6208bce3e..05df0570c 100755
--- a/prace/bakalarka/index.md
+++ b/prace/bakalarka/index.md
@@ -1206,6 +1206,63 @@ Z toho pak můžeme vyslovit hypotézu, že $\delta_{\algo{sdp}}(n) \le 0.34$ pr
 
 Z naměřených dat také můžeme usuzovat, že $\algo{sdp}$ je lepší než libovolný z jiných představených algoritmů.
 
+```python {redefine=tmp}
+from bakalarka import data_lib, g
+data = g.load_main_test()
+import math
+
+def row(*args):
+    return pf.TableRow(*(pf.TableCell(pf.Plain(*(i if isinstance(i, list) else parse_string(f"{i:0.3}") if isinstance(i, float) else parse_string(str(i))))) for i in args))
+
+def gen_alg(pipeline_name, algo_name, name_suffix, floatpage, add_note=False):
+    pipeline = data.pipelines[pipeline_name]
+    by_n = data_lib.group_by_n(pipeline)
+    rows = []
+    percentils = [10,50,90]
+    print_errors = any(i.error or i.data.get("broken", False) for i in pipeline)
+    for n in sorted(by_n.keys()):
+        d = by_n[n]
+        scores = [ i.score/n for i in d]
+        scores.sort()
+        l = len(scores)
+        avg = sum(scores) / l
+        variance = sum((avg-i)**2 for i in scores)/(l-1)
+        errors = len([None for i in d if i.error or i.data.get("broken", False)])
+        rows.append(row(
+            n,
+            l,
+            *([errors] if print_errors else []),
+            avg,
+            math.sqrt(variance),
+            *(scores[int((l-0.001)*perc/100)] for perc in percentils),
+        ))
+
+    table = pf.Table(pf.TableBody(*rows), head=pf.TableHead(row(
+            [pf.Math("n", format='InlineMath')],
+            "testů",
+            *(["chyb"] if print_errors else []),
+            [pf.Math("\\overline{\\delta_{\\algo{"+algo_name+"}}(n)}", format='InlineMath')],
+            [pf.Math("\\sqrt{\widehat{\\delta_{\\algo{"+algo_name+"}}(n)^2}}", format='InlineMath')],
+            *([pf.Math(f"{i} \%", format='InlineMath')] for i in percentils)
+            )))
+    note = element.content if add_note else []
+    return processor.transform([pf.Figure(table, *note, caption=pf.Caption(pf.Plain(*parse_string("Statistika algoritmu "), pf.Math(f"\\algo{{{algo_name}}}", format="InlineMath"), *name_suffix, *parse_string("."))), attributes=dict(floatpage=floatpage))])
+return [
+    *gen_alg("greedy", "g", [], "stat1", True),
+    *gen_alg("rg", "rg", [], "stat1!"),
+    *gen_alg("rsg", "rsg", [], "stat2"),
+    *gen_alg("semidef_prog_sage.sage(10, CVXOPT)", "sdp", parse_string(" – Sage"), "stat2"),
+    *gen_alg("semidef_prog(10)", "sdp", parse_string(" – SDPA-C"), "stat2!"),
+    ]
+```
+::: {c=tmp}
+Veškeré hodnoty jsou zaokrouhleny na 3 platné číslice.
+
+$\overline{\delta_{\alg}(n)}$ značí výběrový průměr, tedy $\frac{1}{m}\sum_{0\le i < m} r_i$, kde $m$ je počet testů a $r_i$ je relativní skóre $i$-tého z nich.
+
+$\widehat{\delta_{\alg}(n)^2}$ značí výběrový rozptyl, tedy $\frac{1}{m-1}\sum_{0\le i < m} \left(r_i - \overline{\delta_{\alg}(n)}\right)^2$.
+:::
+
 \vfil\eject
 
 Dolní odhad
-- 
GitLab