Skip to content
Snippets Groups Projects

WIP: Reforma importů

Closed Martin Mareš requested to merge mj/import into master
Files
21
+ 77
10
@@ -7,9 +7,67 @@
# - quoting pomocí uvozovek
import csv
import dataclasses
from enum import auto
from dataclasses import dataclass, fields
from typing import Type, List, IO, Sequence
import mo.db as db
class FileFormat(db.MOEnum):
en_csv = auto()
cs_csv = auto()
tsv = auto()
def friendly_name(self) -> str:
return format_desc[self].friendly_name
def get_dialect(self) -> str:
return format_desc[self].csv_dialect
def get_charset(self) -> str:
return format_desc[self].charset
def get_content_type(self) -> str:
return format_desc[self].content_type
def get_extension(self) -> str:
return format_desc[self].extension
@dataclass
class FileFormatDesc:
friendly_name: str
csv_dialect: str
charset: str
content_type: str
extension: str
format_desc = {
FileFormat.en_csv: FileFormatDesc(
friendly_name='sloupce oddělené čárkami v UTF-8 (anglický Excel)',
csv_dialect='excel',
charset='utf-8',
content_type='text/csv; charset=utf-8',
extension='csv',
),
FileFormat.cs_csv: FileFormatDesc(
friendly_name='sloupce oddělené středníky ve windows-1250 (český Excel)',
csv_dialect='cs-excel',
charset='windows-1250',
content_type='text/plain; charset=windows-1250',
extension='csv',
),
FileFormat.tsv: FileFormatDesc(
friendly_name='sloupce oddělené tabulátory v UTF-8',
csv_dialect='tsv',
charset='utf-8',
content_type='text/tab-separated-values; charset=utf-8',
extension='tsv',
),
}
class TSV(csv.Dialect):
delimiter = '\t'
@@ -19,7 +77,12 @@ class TSV(csv.Dialect):
quoting = csv.QUOTE_MINIMAL
class CzechExcel(csv.excel):
delimiter = ';'
csv.register_dialect('tsv', TSV)
csv.register_dialect('cs-excel', CzechExcel)
class Row:
@@ -27,10 +90,10 @@ class Row:
odpovídají sloupečkům, musí být typu str a mít defaultní hodnotu."""
def write(file: IO, dialect: str, row_class: Type[Row], rows: Sequence[Row]):
writer = csv.writer(file, dialect=dialect)
def write(file: IO, fmt: FileFormat, row_class: Type[Row], rows: Sequence[Row]):
writer = csv.writer(file, dialect=fmt.get_dialect())
columns = [field.name for field in dataclasses.fields(row_class)]
columns = [field.name for field in fields(row_class)]
writer.writerow(columns)
for row in rows:
@@ -38,12 +101,12 @@ def write(file: IO, dialect: str, row_class: Type[Row], rows: Sequence[Row]):
writer.writerow(r)
def read(file: IO, dialect: str, row_class: Type[Row]):
reader = csv.reader(file, dialect=dialect, strict=True)
def read(file: IO, fmt: FileFormat, row_class: Type[Row]):
reader = csv.reader(file, dialect=fmt.get_dialect(), strict=True)
header: List[str] = []
rows: List[Row] = []
columns = set(field.name for field in dataclasses.fields(row_class))
columns = set(field.name for field in fields(row_class))
for r in reader:
if reader.line_num == 1:
@@ -53,11 +116,15 @@ def read(file: IO, dialect: str, row_class: Type[Row]):
header = r
else:
row = row_class()
not_empty = False
for i in range(min(len(r), len(header))):
f = header[i]
x = r[i].strip()
if f in columns and x != "" and x != '-':
setattr(row, f, x)
rows.append(row)
if x != "" and x != '-':
not_empty = True
if f in columns:
setattr(row, f, x)
if not_empty:
rows.append(row)
return rows
Loading