Skip to content
Snippets Groups Projects
Commit 1d37fdc6 authored by Martin Mareš's avatar Martin Mareš
Browse files

Skript na parsování seznamu všech okresních garantů

parent 549940a7
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# Skript na parsování seznamu okresních garantů pro kategorie Zn.
# ÚKMO seznam udržuje jako wordovský dokument s celkem pravidelnou strukturou,
# pokud se pomocí LibreOffice exportuje do txt, tento skript ho dokáže z velké
# části naparsovat. Ručně je potřeba v txt přidat pár chybějících prázdných řádků
# mezi záznamy.
from collections import defaultdict
from dataclasses import dataclass
from pprint import pprint
from typing import List
import re
import sys
regiony = { 'Praha': 'A' }
reg_list = []
with open('../regions') as reg:
for line in reg:
fields = line.rstrip().split("\t")
level = 0
while fields[0] == "":
fields.pop(0)
level += 1
code, name = fields
if level == 0:
pass
elif level == 1 or level == 2:
regiony[name] = code
reg_list.append((code, name))
else:
assert False
@dataclass
class Garant:
name: str
raw_name: str
role: str
emails: List[str]
phones: List[str]
curr_g = None
curr = None
state = 0
garanti = {}
for line in sys.stdin:
line = line.strip().replace('\xa0', ' ')
if line.endswith(':'):
line = line[:-1]
if line in regiony:
code = regiony[line]
print(f"### Region {code} ({line}) ###", file=sys.stderr)
assert code not in garanti
curr_g = []
garanti[code] = curr_g
state = 0
elif curr_g is None:
print(f'> {line}', file=sys.stderr)
elif line == "":
state = 0
else:
print(f'\t{line}', file=sys.stderr)
if state == 0:
m = re.match(r'^(?P<role>(městský|krajský|oblastní) (garant|koordinátor).*)[−–-] *(?P<rest>.*)', line)
role = ""
if m:
role = m['role'].strip()
line = m['rest']
print(f'\t==> {role}|{line}', file=sys.stderr)
m = re.match(r'^(?P<role>((pouze )?[Kk]ategorie|Předseda)[^:]*): *(?P<rest>.*)', line)
if m:
assert role == ""
role = m['role'].strip()
line = m['rest']
print(f'\t==> {role}|{line}', file=sys.stderr)
m = re.match('^([^,]+)(,|$)', line)
assert m
name = m[1]
if m[1] is not None:
print(f'\t==> {m[1]}', file=sys.stderr)
role = re.sub(' +', ' ', role)
raw_name = re.sub('^((Mgr|Paed?Dr|Bc|doc|PhDr|RNDr)\. *)+', "", name)
print(f': {name} -> {raw_name}', file=sys.stderr)
curr = Garant(name=name, raw_name=raw_name, role=role, emails=[], phones=[])
curr_g.append(curr)
state = 1
elif state == 1:
p = line.split()
for f in p:
if '@' in f:
print(f'@ {f}', file=sys.stderr)
curr.emails.append(f)
for phone in re.findall(r'\b(\d{9}|\d{3} +\d{3} +\d{3})\b', line):
curr.phones.append(phone.replace(" ", ""))
# pprint(garanti)
for reg_code, reg_name in reg_list:
if reg_code in garanti:
for g in garanti[reg_code]:
fields = [ reg_code, reg_name, g.name, g.raw_name, g.role, "/".join(g.phones), *g.emails ]
print("\t".join(fields))
else:
print(f'!!! Chybí garant pro region {reg_code}')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment