Skip to content
Snippets Groups Projects
Commit d0ae894c authored by Martin Mareš's avatar Martin Mareš
Browse files

init-schools: První části

... ovšem narážíme na to, že rejskol zařazuje školy do regionů podle
správních úřadů, což občas neodpovídá reálné adrese školy. Zejména
v Praze.
parent 0a0a3d80
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# Initialize schools from parsed school register
# Uses db/skoly/parsed/*.tsv
from typing import List, Dict
import sys
from pathlib import Path
import mo.db as db
import re
session = db.get_session()
new_town_cnt = 0
new_school_cnt = 0
def import_schools(path: Path, nuts: str):
# XXX: The school register uses several invalid NUTS codes :( Fix them!
nuts = re.sub('^CZ011', 'CZ010', nuts)
nuts = re.sub('^CZ021', 'CZ020', nuts)
nuts = re.sub('^CZ081', 'CZ080', nuts)
with path.open('r') as file:
columns = parse_header(file.readline())
print(columns)
for line in file:
f = line.split('\t')
red_izo = f[columns['Red IZO']]
izo = f[columns['IZO']]
druh = f[columns['Druh školy/zařízení']]
# Address of legal entity
nazev = f[columns['Název']]
misto = f[columns['Místo']]
ulice = f[columns['Ulice']]
cp = f[columns['Č.p.']]
co = f[columns['Č.o.']]
# Address of school building
misto2 = f[columns['_Místo']]
ulice2 = f[columns['_Ulice']]
cp2 = f[columns['_Č.p.']]
co2 = f[columns['_Č.o.']]
addr = make_address(misto, ulice, cp, co)
addr2 = make_address(misto2, ulice2, cp2, co2)
if addr != addr2:
print(f"WARNING: Address mismatch, check regions: <{addr}> != <{addr2}>", file=sys.stderr)
town = lookup_town(misto2, nuts)
print(town)
global new_school_cnt
new_school_cnt += 1
def parse_header(header: str) -> Dict[str, int]:
columns = {}
i = 0
for col in header.split('\t'):
if col.endswith(':'):
col = col[:-1]
while col in columns:
col = '_' + col
columns[col] = i
i += 1
return columns
def make_address(misto: str, ulice: str, cp: str, co: str) -> str:
if cp and co:
c = f"{cp}/{co}"
else:
c = cp or co
if ulice:
if c:
return f"{ulice} {c}, {misto}"
else:
return f"{ulice}, {misto}"
else:
return misto
def lookup_town(misto: str, region_nuts: str) -> db.Place:
town = session.query(db.Place).filter_by(level=3, name=misto).first()
if town is None:
region = session.query(db.Place).filter_by(level=2, nuts=region_nuts).first()
assert region is not None, f"Failed to find region with NUTS code {region_nuts}"
town = db.Place(level=3, parent=region.place_id, name=misto, type=db.PlaceType.region)
session.add(town)
session.flush()
global new_town_cnt
new_town_cnt += 1
return town
for path in Path('db/skoly/parsed').glob('*.tsv'):
m = re.fullmatch(r'^[A-Z]-(CZ\w+)\.tsv', path.name)
assert m is not None
nuts = m[1]
import_schools(path, nuts)
session.commit()
print(f"Imported {new_school_cnt} schools, created {new_town_cnt} new towns.")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment