Skip to content
Snippets Groups Projects
Select Git revision
  • ac0c098deeca6285a15d746d35cb93d196d314c3
  • devel default
  • master
  • fo
  • jirka/typing
  • fo-base
  • mj/submit-images
  • jk/issue-96
  • jk/issue-196
  • honza/add-contestant
  • honza/mr7
  • honza/mrf
  • honza/mrd
  • honza/mra
  • honza/mr6
  • honza/submit-images
  • honza/kolo-vs-soutez
  • jh-stress-test-wip
  • shorten-schools
19 results

org_users.py

Blame
  • submit.py 4.28 KiB
    import datetime
    import multiprocessing
    import os
    import pikepdf
    from typing import Any
    import werkzeug.utils
    
    import mo.db as db
    import mo.util
    from mo.util import logger
    
    
    class SubmitException(RuntimeError):
        pass
    
    
    class Submitter:
        submit_dir: str
    
        def __init__(self):
            self.submit_dir = mo.util.data_dir('submits')
    
        def submit_paper(self, paper: db.Paper, tmpfile: str):
            logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}')
            t_start = datetime.datetime.now()
    
            try:
                self._do_submit(paper, tmpfile)
                duration = (datetime.datetime.now() - t_start).total_seconds()
                logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.broken else 0}')
            except SubmitException as e:
                duration = (datetime.datetime.now() - t_start).total_seconds()
                preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
                logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
                raise
    
        def _file_paper(self, paper: db.Paper, tmpfile: str):
            round = paper.task.round
            secure_category = werkzeug.utils.secure_filename(round.category)
            top_level = f'{round.year}-{secure_category}-{round.seq}'
            user_dir = os.path.join(top_level, str(paper.for_user_obj.user_id))
            sub_user_dir = os.path.join(self.submit_dir, user_dir)
            os.makedirs(sub_user_dir, exist_ok=True)
    
            secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
            full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
            paper.file_name = os.path.join(user_dir, os.path.basename(full_name))
    
        def _do_submit(self, paper: db.Paper, tmpfile: str):
            # Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
            pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
            proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx))
            proc.start()
            pipe_tx.close()
    
            if not pipe_rx.poll(10):
                proc.terminate()
                proc.join()
                raise SubmitException('Timeout při zpracování PDF.')
    
            try:
                result = pipe_rx.recv()
            except EOFError:
                result = None
                proc.terminate()
            proc.join()
    
            assert proc.exitcode is not None
            if proc.exitcode != 0:
                raise SubmitException(f'Interní chyba při zpracování PDF: Exit code {proc.exitcode}.')
            if not result:
                raise SubmitException('Interní chyba při zpracování PDF: EOF.')
    
            if 'error' in result:
                logger.info('Submit: PDF error: %s', result['error'])
                if result['pdf-like']:
                    logger.info('Submit: Soubor akceptován s varováním')
                    paper.broken = True
                else:
                    raise SubmitException('Soubor není korektní PDF.')
            else:
                paper.pages = result['pages']
    
            paper.bytes = os.path.getsize(tmpfile)
            self._file_paper(paper, tmpfile)
    
        # Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
        def _process_pdf(tmpfile: str, pipe):
            result: Any = {}
            try:
                with pikepdf.open(tmpfile, attempt_recovery=False) as pdf:
                    result['pages'] = len(pdf.pages)
            except pikepdf.PdfError as e:
                result['error'] = str(e)
                result['pdf-like'] = Submitter._looks_like_pdf(tmpfile)
            pipe.send(result)
    
        def _looks_like_pdf(tmpfile: str) -> bool:
            """PDFka, která nezvládne otevřít QPDF, jsme ochotni akceptovat s warningem,
            pokud začátek i konec souboru vypadá jako PDF."""
    
            with open(tmpfile, 'rb') as f:
                header = f.read(5)
                if header != b'%PDF-':
                    return False
    
                f.seek(0, 2)
                size = f.tell()
                if size < 100:
                    return False
    
                f.seek(-100, 2)
                trailer = f.read(100)
                return b'startxref' in trailer and b'%%EOF' in trailer