Select Git revision
org_users.py
submit.py 4.28 KiB
import datetime
import multiprocessing
import os
import pikepdf
from typing import Any
import werkzeug.utils
import mo.db as db
import mo.util
from mo.util import logger
class SubmitException(RuntimeError):
pass
class Submitter:
submit_dir: str
def __init__(self):
self.submit_dir = mo.util.data_dir('submits')
def submit_paper(self, paper: db.Paper, tmpfile: str):
logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}')
t_start = datetime.datetime.now()
try:
self._do_submit(paper, tmpfile)
duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.broken else 0}')
except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds()
preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
raise
def _file_paper(self, paper: db.Paper, tmpfile: str):
round = paper.task.round
secure_category = werkzeug.utils.secure_filename(round.category)
top_level = f'{round.year}-{secure_category}-{round.seq}'
user_dir = os.path.join(top_level, str(paper.for_user_obj.user_id))
sub_user_dir = os.path.join(self.submit_dir, user_dir)
os.makedirs(sub_user_dir, exist_ok=True)
secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
paper.file_name = os.path.join(user_dir, os.path.basename(full_name))
def _do_submit(self, paper: db.Paper, tmpfile: str):
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx))
proc.start()
pipe_tx.close()
if not pipe_rx.poll(10):
proc.terminate()
proc.join()
raise SubmitException('Timeout při zpracování PDF.')
try:
result = pipe_rx.recv()
except EOFError:
result = None
proc.terminate()
proc.join()
assert proc.exitcode is not None
if proc.exitcode != 0:
raise SubmitException(f'Interní chyba při zpracování PDF: Exit code {proc.exitcode}.')
if not result:
raise SubmitException('Interní chyba při zpracování PDF: EOF.')
if 'error' in result:
logger.info('Submit: PDF error: %s', result['error'])
if result['pdf-like']:
logger.info('Submit: Soubor akceptován s varováním')
paper.broken = True
else:
raise SubmitException('Soubor není korektní PDF.')
else:
paper.pages = result['pages']
paper.bytes = os.path.getsize(tmpfile)
self._file_paper(paper, tmpfile)
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
def _process_pdf(tmpfile: str, pipe):
result: Any = {}
try:
with pikepdf.open(tmpfile, attempt_recovery=False) as pdf:
result['pages'] = len(pdf.pages)
except pikepdf.PdfError as e:
result['error'] = str(e)
result['pdf-like'] = Submitter._looks_like_pdf(tmpfile)
pipe.send(result)
def _looks_like_pdf(tmpfile: str) -> bool:
"""PDFka, která nezvládne otevřít QPDF, jsme ochotni akceptovat s warningem,
pokud začátek i konec souboru vypadá jako PDF."""
with open(tmpfile, 'rb') as f:
header = f.read(5)
if header != b'%PDF-':
return False
f.seek(0, 2)
size = f.tell()
if size < 100:
return False
f.seek(-100, 2)
trailer = f.read(100)
return b'startxref' in trailer and b'%%EOF' in trailer