diff --git a/mo/submit.py b/mo/submit.py index 9dd9ca3d14ee5df28bedd7e392e49bc5480b6913..32bb6905607700f5ffaef817e681af0bdec640df 100644 --- a/mo/submit.py +++ b/mo/submit.py @@ -25,16 +25,31 @@ class Submitter: t_start = datetime.datetime.now() try: - self._do_submit(paper, tmpfile) + broken = self._process_pdf(paper, tmpfile, False) + self._file_paper(paper, tmpfile, broken) duration = (datetime.datetime.now() - t_start).total_seconds() - logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.broken else 0}') + logger.info(f'Submit: Hotovo: file={paper.file_name or paper.orig_file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.is_broken() else 0}') except SubmitException as e: duration = (datetime.datetime.now() - t_start).total_seconds() preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-') logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}') raise - def _file_paper(self, paper: db.Paper, tmpfile: str): + def submit_fix(self, paper: db.Paper, tmpfile: str): + logger.info(f'Submit fix: Zpracovávám file={tmpfile} fix_for=#{paper.paper_id}') + t_start = datetime.datetime.now() + + try: + self._process_pdf(paper, tmpfile, True) + self._file_paper(paper, tmpfile, False) + duration = (datetime.datetime.now() - t_start).total_seconds() + logger.info(f'Submit fix: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}') + except SubmitException as e: + duration = (datetime.datetime.now() - t_start).total_seconds() + logger.info(f'Submit fix: {e} (time={duration:.3f})') + raise + + def _file_paper(self, paper: db.Paper, tmpfile: str, broken: bool): round = paper.task.round secure_category = werkzeug.utils.secure_filename(round.category) top_level = f'{round.year}-{secure_category}-{round.seq}' @@ -44,12 +59,16 @@ class Submitter: secure_task_code = werkzeug.utils.secure_filename(paper.task.code) full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf') - paper.file_name = os.path.join(user_dir, os.path.basename(full_name)) + file_name = os.path.join(user_dir, os.path.basename(full_name)) + if broken: + paper.orig_file_name = file_name + else: + paper.file_name = file_name - def _do_submit(self, paper: db.Paper, tmpfile: str): + def _process_pdf(self, paper: db.Paper, tmpfile: str, allow_broken: bool) -> bool: # Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False) - proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx)) + proc = multiprocessing.Process(name='submit', target=Submitter._do_process_pdf, args=(tmpfile, pipe_tx)) proc.start() pipe_tx.close() @@ -73,19 +92,21 @@ class Submitter: if 'error' in result: logger.info('Submit: PDF error: %s', result['error']) - if result['pdf-like']: + if result['pdf-like'] and allow_broken: logger.info('Submit: Soubor akceptován s varováním') - paper.broken = True + broken = True else: raise SubmitException('Soubor není korektní PDF.') else: paper.pages = result['pages'] + broken = False paper.bytes = os.path.getsize(tmpfile) - self._file_paper(paper, tmpfile) + return broken # Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou. - def _process_pdf(tmpfile: str, pipe): + @staticmethod + def _do_process_pdf(tmpfile: str, pipe): result: Any = {} try: with pikepdf.open(tmpfile, attempt_recovery=False) as pdf: @@ -95,6 +116,7 @@ class Submitter: result['pdf-like'] = Submitter._looks_like_pdf(tmpfile) pipe.send(result) + @staticmethod def _looks_like_pdf(tmpfile: str) -> bool: """PDFka, která nezvládne otevřít QPDF, jsme ochotni akceptovat s warningem, pokud začátek i konec souboru vypadá jako PDF."""