Project 'mj/mo-submit' was moved to 'mo-p/osmo'. Please update any links and bookmarks that may still have the old path.
Select Git revision
submit.py 5.28 KiB
import datetime
import multiprocessing
import os
import pikepdf
from typing import Any
import werkzeug.utils
import mo.db as db
import mo.util
from mo.util import logger
class SubmitException(RuntimeError):
pass
class Submitter:
submit_dir: str
def __init__(self):
self.submit_dir = mo.util.data_dir('submits')
def submit_paper(self, paper: db.Paper, tmpfile: str):
logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}')
t_start = datetime.datetime.now()
try:
broken = self._process_pdf(paper, tmpfile, False)
self._file_paper(paper, tmpfile, broken)
duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit: Hotovo: file={paper.file_name or paper.orig_file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.is_broken() else 0}')
except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds()
preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
raise
def submit_fix(self, paper: db.Paper, tmpfile: str):
logger.info(f'Submit fix: Zpracovávám file={tmpfile} fix_for=#{paper.paper_id}')
t_start = datetime.datetime.now()
try:
self._process_pdf(paper, tmpfile, True)
self._file_paper(paper, tmpfile, False)
duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit fix: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}')
except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit fix: {e} (time={duration:.3f})')
raise
def _file_paper(self, paper: db.Paper, tmpfile: str, broken: bool):
round = paper.task.round
secure_category = werkzeug.utils.secure_filename(round.category)
top_level = f'{round.year}-{secure_category}-{round.seq}'
user_dir = os.path.join(top_level, str(paper.for_user_obj.user_id))
sub_user_dir = os.path.join(self.submit_dir, user_dir)
os.makedirs(sub_user_dir, exist_ok=True)
secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
file_name = os.path.join(user_dir, os.path.basename(full_name))
if broken:
paper.orig_file_name = file_name
else:
paper.file_name = file_name
def _process_pdf(self, paper: db.Paper, tmpfile: str, allow_broken: bool) -> bool:
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
proc = multiprocessing.Process(name='submit', target=Submitter._do_process_pdf, args=(tmpfile, pipe_tx))
proc.start()
pipe_tx.close()
if not pipe_rx.poll(10):
proc.terminate()
proc.join()
raise SubmitException('Timeout při zpracování PDF.')
try:
result = pipe_rx.recv()
except EOFError:
result = None
proc.terminate()
proc.join()
assert proc.exitcode is not None
if proc.exitcode != 0:
raise SubmitException(f'Interní chyba při zpracování PDF: Exit code {proc.exitcode}.')
if not result:
raise SubmitException('Interní chyba při zpracování PDF: EOF.')
if 'error' in result:
logger.info('Submit: PDF error: %s', result['error'])
if result['pdf-like'] and allow_broken:
logger.info('Submit: Soubor akceptován s varováním')
broken = True
else:
raise SubmitException('Soubor není korektní PDF.')
else:
paper.pages = result['pages']
broken = False
paper.bytes = os.path.getsize(tmpfile)
return broken
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
@staticmethod
def _do_process_pdf(tmpfile: str, pipe):
result: Any = {}
try:
with pikepdf.open(tmpfile, attempt_recovery=False) as pdf:
result['pages'] = len(pdf.pages)
except pikepdf.PdfError as e:
result['error'] = str(e)
result['pdf-like'] = Submitter._looks_like_pdf(tmpfile)
pipe.send(result)
@staticmethod
def _looks_like_pdf(tmpfile: str) -> bool:
"""PDFka, která nezvládne otevřít QPDF, jsme ochotni akceptovat s warningem,
pokud začátek i konec souboru vypadá jako PDF."""
with open(tmpfile, 'rb') as f:
header = f.read(5)
if header != b'%PDF-':
return False
f.seek(0, 2)
size = f.tell()
if size < 100:
return False
f.seek(-100, 2)
trailer = f.read(100)
return b'startxref' in trailer and b'%%EOF' in trailer