Skip to content
Snippets Groups Projects

Změna reprezentace rozbitých submitů

Merged Martin Mareš requested to merge mj/broken into devel
All threads resolved!
1 file
+ 32
10
Compare changes
  • Side-by-side
  • Inline
+ 32
10
@@ -25,16 +25,31 @@ class Submitter:
@@ -25,16 +25,31 @@ class Submitter:
t_start = datetime.datetime.now()
t_start = datetime.datetime.now()
try:
try:
self._do_submit(paper, tmpfile)
broken = self._process_pdf(paper, tmpfile, False)
 
self._file_paper(paper, tmpfile, broken)
duration = (datetime.datetime.now() - t_start).total_seconds()
duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.broken else 0}')
logger.info(f'Submit: Hotovo: file={paper.file_name or paper.orig_file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f} broken={1 if paper.is_broken() else 0}')
except SubmitException as e:
except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds()
duration = (datetime.datetime.now() - t_start).total_seconds()
preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
raise
raise
def _file_paper(self, paper: db.Paper, tmpfile: str):
def submit_fix(self, paper: db.Paper, tmpfile: str):
 
logger.info(f'Submit fix: Zpracovávám file={tmpfile} fix_for=#{paper.paper_id}')
 
t_start = datetime.datetime.now()
 
 
try:
 
self._process_pdf(paper, tmpfile, True)
 
self._file_paper(paper, tmpfile, False)
 
duration = (datetime.datetime.now() - t_start).total_seconds()
 
logger.info(f'Submit fix: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}')
 
except SubmitException as e:
 
duration = (datetime.datetime.now() - t_start).total_seconds()
 
logger.info(f'Submit fix: {e} (time={duration:.3f})')
 
raise
 
 
def _file_paper(self, paper: db.Paper, tmpfile: str, broken: bool):
round = paper.task.round
round = paper.task.round
secure_category = werkzeug.utils.secure_filename(round.category)
secure_category = werkzeug.utils.secure_filename(round.category)
top_level = f'{round.year}-{secure_category}-{round.seq}'
top_level = f'{round.year}-{secure_category}-{round.seq}'
@@ -44,12 +59,16 @@ class Submitter:
@@ -44,12 +59,16 @@ class Submitter:
secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
paper.file_name = os.path.join(user_dir, os.path.basename(full_name))
file_name = os.path.join(user_dir, os.path.basename(full_name))
 
if broken:
 
paper.orig_file_name = file_name
 
else:
 
paper.file_name = file_name
def _do_submit(self, paper: db.Paper, tmpfile: str):
def _process_pdf(self, paper: db.Paper, tmpfile: str, allow_broken: bool) -> bool:
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx))
proc = multiprocessing.Process(name='submit', target=Submitter._do_process_pdf, args=(tmpfile, pipe_tx))
proc.start()
proc.start()
pipe_tx.close()
pipe_tx.close()
@@ -73,19 +92,21 @@ class Submitter:
@@ -73,19 +92,21 @@ class Submitter:
if 'error' in result:
if 'error' in result:
logger.info('Submit: PDF error: %s', result['error'])
logger.info('Submit: PDF error: %s', result['error'])
if result['pdf-like']:
if result['pdf-like'] and allow_broken:
logger.info('Submit: Soubor akceptován s varováním')
logger.info('Submit: Soubor akceptován s varováním')
paper.broken = True
broken = True
else:
else:
raise SubmitException('Soubor není korektní PDF.')
raise SubmitException('Soubor není korektní PDF.')
else:
else:
paper.pages = result['pages']
paper.pages = result['pages']
 
broken = False
paper.bytes = os.path.getsize(tmpfile)
paper.bytes = os.path.getsize(tmpfile)
self._file_paper(paper, tmpfile)
return broken
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
def _process_pdf(tmpfile: str, pipe):
@staticmethod
 
def _do_process_pdf(tmpfile: str, pipe):
result: Any = {}
result: Any = {}
try:
try:
with pikepdf.open(tmpfile, attempt_recovery=False) as pdf:
with pikepdf.open(tmpfile, attempt_recovery=False) as pdf:
@@ -95,6 +116,7 @@ class Submitter:
@@ -95,6 +116,7 @@ class Submitter:
result['pdf-like'] = Submitter._looks_like_pdf(tmpfile)
result['pdf-like'] = Submitter._looks_like_pdf(tmpfile)
pipe.send(result)
pipe.send(result)
 
@staticmethod
def _looks_like_pdf(tmpfile: str) -> bool:
def _looks_like_pdf(tmpfile: str) -> bool:
"""PDFka, která nezvládne otevřít QPDF, jsme ochotni akceptovat s warningem,
"""PDFka, která nezvládne otevřít QPDF, jsme ochotni akceptovat s warningem,
pokud začátek i konec souboru vypadá jako PDF."""
pokud začátek i konec souboru vypadá jako PDF."""
Loading