Skip to content
Snippets Groups Projects
Commit a0c22cb4 authored by Martin Mareš's avatar Martin Mareš
Browse files

Merge branch 'master' of gitlab.kam.mff.cuni.cz:mj/mo-submit

parents dac580fb 60395123
No related branches found
No related tags found
No related merge requests found
......@@ -138,7 +138,7 @@ def process_schools() -> None:
ps = ProtoSchool(
town=town,
town_id=town.place_id,
town_name=f'{town.name} ({town.get_code()})',
town_name=town.name,
unsure_region=unsure_region,
short_name=nazev,
official_name=nazev,
......@@ -275,7 +275,7 @@ def get_old_schools() -> DefaultDict[str, List[ProtoSchool]]:
ps = ProtoSchool(
town=town,
town_id=town.place_id,
town_name=f'{town.name} ({town.get_code()})',
town_name=town.name,
unsure_region=False,
short_name=s.place.name,
official_name=s.official_name,
......@@ -295,6 +295,7 @@ def simplify_name(name: str, town: str) -> str:
name = re.sub('základní škola', '', name, flags=re.IGNORECASE)
name = re.sub('mateřská škola', '', name, flags=re.IGNORECASE)
name = re.sub('střední škola', '', name, flags=re.IGNORECASE)
name = re.sub('střední průmyslová škola', 'SPŠ', name, flags=re.IGNORECASE)
name = re.sub('gymnázium', 'G', name, flags=re.IGNORECASE)
name = re.sub(r',?\s*s\.r\.o\.', "", name)
return name + ', ' + town
......@@ -356,7 +357,9 @@ def plan_single_change(old: Optional[ProtoSchool], new: Optional[ProtoSchool]) -
if getattr(old, field) != getattr(new, field):
changes.append((field, getattr(old, field), getattr(new, field)))
if changes:
if set(changes) <= {'is_zs', 'is_ss'}:
return None
act = create_action(old, ActionType.EDIT, 'změny')
for field, old_val, new_val in changes:
print(f'\t{field}: {new_val}')
......@@ -562,6 +565,8 @@ elif args.run:
if not args.dry_run:
session.commit()
else:
print('*** Změny neprovedeny***')
print(f"Zpracováno {processed_school_cnt} škol z rejstříku.")
print(f"Založeno {new_school_cnt} nových škol a {new_town_cnt} nových obcí.")
......
......@@ -24,7 +24,7 @@ Tu převést ssconvert-em na extra/skoly/SkolyAObory.csv.
### TODO ###
- neformální jména nových škol (přidat obec, používat běžné zkratky)
- před skrytím školy zkontrolovat, že se na ní nesoutěží
### Poznámky z prvního importu škol ###
......
#!/bin/bash
set -e
rm -rf extra/skoly/parsed
mkdir extra/skoly/parsed
for src in extra/skoly/html/*.html ; do
dst=extra/skoly/parsed/$(basename $src .html).tsv
echo -n "$src -> "
db/skoly/rejskol-parse <$src >$dst
wc -l $dst
done
#!/usr/bin/perl -CSA
use common::sense;
use WWW::Mechanize;
my $mech = WWW::Mechanize->new(autocheck => 1, strict_forms => 1);
$mech->get('https://rejstriky.msmt.cz/rejskol/VREJVerejne/VerejneRozhrani.aspx');
$mech->form_id('form1');
mkdir 'extra/skoly/html';
download_type('B'); # Základní školy
download_type('C'); # Střední školy
exit 0;
sub download_type {
my ($type) = @_;
my @regions = $mech->find_all_inputs(name => 'ctl39');
for my $in (@regions) {
my @vals = $in->possible_values;
my @names = $in->value_names;
if (my ($nuts) = ($names[1] =~ m{^(CZ\d{3}[0-9A-Z]),})) {
# Okres
download_region($type, $nuts, $vals[1]);
}
}
}
sub download_region {
my ($type, $nuts, $reg_val) = @_;
print "Downloading type=$type region=$nuts ($reg_val)\n";
my @types = $mech->find_all_inputs(name => 'ctl38');
for my $in (@types) {
my @vals = $in->possible_values;
# print $in->name, " ", $in->type, " ", join("|", @vals), "\n";
if ($vals[1] eq $type) {
$in->value($vals[1]);
} else {
$in->value($vals[0]);
}
}
my @regions = $mech->find_all_inputs(name => 'ctl39');
for my $in (@regions) {
my @vals = $in->possible_values;
# print $in->name, " ", $in->type, " ", join("|", @vals), "\n";
if ($vals[1] =~ $reg_val) {
$in->value($vals[1]);
} else {
$in->value($vals[0]);
}
}
$mech->field('txtPocetZaznamu', '1000');
sleep 1;
my $resp = $mech->click_button(id => 'btnVybrat');
open my $f, '>:utf8', "extra/skoly/html/$type-$nuts.html";
print $f $resp->decoded_content;
close $f;
$mech->back;
}
#!/usr/bin/perl -CSA
use common::sense;
use HTML::TreeBuilder;
my $tree = HTML::TreeBuilder->new;
$tree->parse_file(\*STDIN);
# $tree->dump;
my $lbl = $tree->find_by_attribute('id', 'lblSkutecnyPocet');
if ($lbl) {
print STDERR "WARNING: ", $lbl->as_trimmed_text, "\n";
}
my @divs = $tree->find_by_tag_name('div');
my $div_with_tables = $divs[1] // die;
my @last_main = ();
my $tab_count = 0;
for my $table ($div_with_tables->find_by_tag_name('table')) {
my $style = $table->attr('style') // "";
my $main = 1;
if ($tab_count == 1 || $style =~ m{#f0f8ff}) {
$main = 0;
}
my $last_out = "";
for my $tr ($table->find_by_tag_name('tr')) {
my @cols = ();
for my $td ($table->find_by_tag_name('td')) {
my $text = $td->as_trimmed_text;
$text =~ s{^\xa0+}{};
$text =~ s{\xa0}{ }g;
push @cols, $text;
}
if ($main) {
@last_main = @cols;
} else {
@cols = (@last_main, @cols);
my $out = join("\t", @cols);
if ($out ne $last_out) {
if ($last_out ne "") {
print STDERR "WARNING: Multiple IZOs per RED_IZO: $out /// $last_out\n";
}
print $out, "\n";
$last_out = $out;
}
}
}
$tab_count++;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment