diff --git a/.gitignore b/.gitignore index e6c2cb5d8f4ef838544fe91065c6a4e7159566be..ffd9fcb890601a74ac7129d7ab321fb832920e21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,7 @@ __pycache__ .mypy_cache -/db/ruian -/db/skoly/html -/db/skoly/parsed /data +/extra /mo/config.py /osmo.egg-info /venv diff --git a/README.md b/README.md index 68049b282e993b06ac7949844f10a3ac8c4fd8d5..fe470b3439acdbd8317f592e9baa68e772ce6ecd 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ setfacl -m u:www-data:x /akce/mo/osmo-test /akce/mo/osmo-test/var # Inicializovat regiony v DB - # Obstarat si db/ruian/ a db/schools/parsed/ z jiné instance (nebo je znovu stáhnout) + # Obstarat si extra/ruian/ a extra/schools/parsed/ z jiné instance (nebo je znovu stáhnout) . ../venv/bin/activate bin/test-init # případně podmnožinu diff --git a/bin/init-schools b/bin/init-schools index 656f3e8f7390fb2d48b67020c42f337fe4bcac3e..1abdbe86415d8fead853978105b2d1eab2cc1b09 100755 --- a/bin/init-schools +++ b/bin/init-schools @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # Naplní databázi školami a obcemi, v nichž školy sídlí -# Používá db/skoly/parsed/*.tsv +# Používá extra/skoly/parsed/*.tsv # # Pozor, zrada: rejstřík škol je sice rozdělený do okresů dle NUTS/LAU, # ale školy tam řadí podle úřadu, u nějž je škole registrovaná, což vůbec @@ -182,14 +182,14 @@ ruian_obec_to_okres_nuts: DefaultDict[str, List[str]] = defaultdict(list) def load_ruian(): - ocols, okresy = load_ruian_csv('db/ruian/UI_OKRES.csv') + ocols, okresy = load_ruian_csv('extra/ruian/UI_OKRES.csv') okres_by_id: Dict[int, List[str]] = {} for o in okresy: id = int(o[ocols['KOD']]) assert id not in okres_by_id okres_by_id[id] = o - mcols, mesta = load_ruian_csv('db/ruian/UI_OBEC.csv') + mcols, mesta = load_ruian_csv('extra/ruian/UI_OBEC.csv') for m in mesta: jmeno = m[mcols['NAZEV']] oid = int(m[mcols['OKRES_KOD']]) @@ -204,7 +204,7 @@ args = parser.parse_args() load_ruian() -for path in Path('db/skoly/parsed').glob('*.tsv'): +for path in Path('extra/skoly/parsed').glob('*.tsv'): m = re.fullmatch(r'^[A-Z]-(CZ\w+)\.tsv', path.name) assert m is not None nuts = m[1] diff --git a/db/garanti/mk b/db/garanti/mk index 0fe740212a3a747250899b9be5cb6f34106f4d80..4adf1ea988f30f79d95e6b741b3a92cc331aaef7 100755 --- a/db/garanti/mk +++ b/db/garanti/mk @@ -10,7 +10,7 @@ class Garant(mo.csv.Row): jmeno: str = "" email: str = "" -f = open('garanti.csv') +f = open('data/garanti/garanti.csv') rows = mo.csv.read(f, mo.csv.FileFormat.en_csv, Garant) for g in rows: diff --git a/db/skoly/parse-all b/db/skoly/parse-all index 016bb0d9eb560be4768c9b5cae2711b0bfa19d65..a6bf9efe54d97d07dc70844b06b6ccec6533aadf 100755 --- a/db/skoly/parse-all +++ b/db/skoly/parse-all @@ -1,11 +1,11 @@ #!/bin/bash set -e -rm -rf parsed -mkdir parsed +rm -rf extra/parsed +mkdir extra/parsed -for src in html/*.html ; do - dst=parsed/$(basename $src .html).tsv +for src in extra/html/*.html ; do + dst=extra/parsed/$(basename $src .html).tsv echo -n "$src -> " ./rejskol-parse <$src >$dst wc -l $dst diff --git a/db/skoly/rejskol-download b/db/skoly/rejskol-download index 1f882b9158a6e1d78c8844652ae35fbe844ef9c9..c5a057c8615d880aa8844ce8e9d7506636e41af9 100755 --- a/db/skoly/rejskol-download +++ b/db/skoly/rejskol-download @@ -8,7 +8,7 @@ my $mech = WWW::Mechanize->new(autocheck => 1, strict_forms => 1); $mech->get('https://rejstriky.msmt.cz/rejskol/VREJVerejne/VerejneRozhrani.aspx'); $mech->form_id('form1'); -mkdir 'html'; +mkdir 'extra/html'; download_type('B'); # Základní školy download_type('C'); # Střední školy exit 0; @@ -59,7 +59,7 @@ sub download_region { sleep 1; my $resp = $mech->click_button(id => 'btnVybrat'); - open my $f, '>:utf8', "html/$type-$nuts.html"; + open my $f, '>:utf8', "extra/html/$type-$nuts.html"; print $f $resp->decoded_content; close $f;