Skip to content
Snippets Groups Projects
Commit 33f84e0a authored by Martin Mareš's avatar Martin Mareš
Browse files

Merge branch 'mj/upload' into 'devel'

Reforma uploadů

See merge request mj/mo-submit!21
parents 225a4b4d c8a45584
Branches
No related tags found
1 merge request!21Reforma uploadů
...@@ -23,7 +23,7 @@ else ...@@ -23,7 +23,7 @@ else
fi fi
echo "Zakládám adresáře" echo "Zakládám adresáře"
mkdir -p $DEST/{log,var,data/{imports,jobs,statements,submits,tmp}} mkdir -p $DEST/{log,var,data/{errors,imports,jobs,statements,submits,tmp}}
echo "Instaluji balíček" echo "Instaluji balíček"
pip install -c constraints.txt . pip install -c constraints.txt .
... ...
......
...@@ -23,7 +23,9 @@ WEB_ROOT = 'https://mo.mff.cuni.cz/osmo-test/' ...@@ -23,7 +23,9 @@ WEB_ROOT = 'https://mo.mff.cuni.cz/osmo-test/'
WEB_FLAVOR = 'devel' WEB_FLAVOR = 'devel'
# Maximální velikost uploadu. Pozor, je omezena i konfigurací Nginxu. # Maximální velikost uploadu. Pozor, je omezena i konfigurací Nginxu.
# První hodnota se použije pro běžný upload, druhá pro dávkový upload řešení.
MAX_CONTENT_LENGTH = 16777216 MAX_CONTENT_LENGTH = 16777216
MAX_BATCH_CONTENT_LENGTH = 1000000000
# Adresář, do kterého ukládáme data (pro vývoj relativní, pro instalaci absolutní) # Adresář, do kterého ukládáme data (pro vývoj relativní, pro instalaci absolutní)
DATA_DIR = 'data' DATA_DIR = 'data'
... ...
......
...@@ -73,6 +73,7 @@ class Import: ...@@ -73,6 +73,7 @@ class Import:
if self.line_number > 0: if self.line_number > 0:
msg = f"Řádek {self.line_number}: {msg}" msg = f"Řádek {self.line_number}: {msg}"
self.errors.append(msg) self.errors.append(msg)
logger.info('Import: >> %s', msg)
return None # Kdyby bylo otypováno správně jako -> None, při volání by si mypy stěžoval return None # Kdyby bylo otypováno správně jako -> None, při volání by si mypy stěžoval
def parse_email(self, email: str) -> Optional[str]: def parse_email(self, email: str) -> Optional[str]:
... ...
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
from datetime import timedelta from datetime import timedelta
import os import os
import secrets
from sqlalchemy import or_ from sqlalchemy import or_
from typing import Optional, Dict, Callable, List from typing import Optional, Dict, Callable, List
...@@ -49,14 +48,8 @@ class TheJob: ...@@ -49,14 +48,8 @@ class TheJob:
def attach_file(self, tmp_name: str, suffix: str): def attach_file(self, tmp_name: str, suffix: str):
"""Vytvoří hardlink na daný pracovní soubor v adresáři jobů.""" """Vytvoří hardlink na daný pracovní soubor v adresáři jobů."""
while True: full_name = mo.util.link_to_dir(tmp_name, mo.util.data_dir('jobs'))
name = secrets.token_hex(16) + suffix name = os.path.basename(full_name)
try:
os.link(tmp_name, job_file_path(name))
break
except FileExistsError:
pass
logger.debug(f'Job: Příloha {tmp_name} -> {name}') logger.debug(f'Job: Příloha {tmp_name} -> {name}')
return name return name
...@@ -74,10 +67,10 @@ class TheJob: ...@@ -74,10 +67,10 @@ class TheJob:
job = self.job job = self.job
if job.in_file is not None: if job.in_file is not None:
os.unlink(job_file_path(job.in_file)) mo.util.unlink_if_exists(job_file_path(job.in_file))
if job.out_file is not None: if job.out_file is not None:
os.unlink(job_file_path(job.out_file)) mo.util.unlink_if_exists(job_file_path(job.out_file))
sess.delete(job) sess.delete(job)
sess.commit() sess.commit()
... ...
......
...@@ -2,10 +2,10 @@ import datetime ...@@ -2,10 +2,10 @@ import datetime
import multiprocessing import multiprocessing
import os import os
import pikepdf import pikepdf
import secrets
import werkzeug.utils import werkzeug.utils
import mo.db as db import mo.db as db
import mo.util
from mo.util import logger from mo.util import logger
...@@ -16,8 +16,8 @@ class SubmitException(RuntimeError): ...@@ -16,8 +16,8 @@ class SubmitException(RuntimeError):
class Submitter: class Submitter:
submit_dir: str submit_dir: str
def __init__(self, instance_path: str = 'data'): def __init__(self):
self.submit_dir = os.path.join(instance_path, 'submits') self.submit_dir = mo.util.data_dir('submits')
def submit_paper(self, paper: db.Paper, tmpfile: str): def submit_paper(self, paper: db.Paper, tmpfile: str):
logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}') logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}')
...@@ -29,10 +29,11 @@ class Submitter: ...@@ -29,10 +29,11 @@ class Submitter:
logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}') logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}')
except SubmitException as e: except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds() duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit: Chyba: {e} (time={duration:.3f})') preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
raise raise
def _create_file_name(self, paper: db.Paper) -> str: def _file_paper(self, paper: db.Paper, tmpfile: str):
round = paper.task.round round = paper.task.round
secure_category = werkzeug.utils.secure_filename(round.category) secure_category = werkzeug.utils.secure_filename(round.category)
top_level = f'{round.year}-{secure_category}-{round.seq}' top_level = f'{round.year}-{secure_category}-{round.seq}'
...@@ -41,18 +42,11 @@ class Submitter: ...@@ -41,18 +42,11 @@ class Submitter:
os.makedirs(sub_user_dir, exist_ok=True) os.makedirs(sub_user_dir, exist_ok=True)
secure_task_code = werkzeug.utils.secure_filename(paper.task.code) secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
while True: full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
nonce = secrets.token_hex(8) paper.file_name = os.path.join(user_dir, os.path.basename(full_name))
file_name = f'{secure_task_code}-{paper.type.name[:3]}-{nonce}.pdf'
if not os.path.lexists(os.path.join(sub_user_dir, file_name)):
break
logger.warning(f'Retrying file creation for {sub_user_dir}/{file_name}')
return os.path.join(user_dir, file_name)
def _do_submit(self, paper: db.Paper, tmpfile: str): def _do_submit(self, paper: db.Paper, tmpfile: str):
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené # Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
# FIXME: Omezit paměť apod.
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False) pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx)) proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx))
proc.start() proc.start()
...@@ -82,11 +76,7 @@ class Submitter: ...@@ -82,11 +76,7 @@ class Submitter:
paper.bytes = os.path.getsize(tmpfile) paper.bytes = os.path.getsize(tmpfile)
paper.pages = result['pages'] paper.pages = result['pages']
paper.file_name = self._create_file_name(paper) self._file_paper(paper, tmpfile)
# FIXME: fsync?
dest = os.path.join(self.submit_dir, paper.file_name)
os.rename(tmpfile, dest)
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou. # Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
def _process_pdf(tmpfile, pipe): def _process_pdf(tmpfile, pipe):
... ...
......
...@@ -9,6 +9,7 @@ import locale ...@@ -9,6 +9,7 @@ import locale
import logging import logging
import os import os
import re import re
import secrets
import subprocess import subprocess
import sys import sys
from typing import Any, Optional, NoReturn from typing import Any, Optional, NoReturn
...@@ -164,3 +165,22 @@ def get_round_by_code(code: RoundCode) -> Optional[db.Round]: ...@@ -164,3 +165,22 @@ def get_round_by_code(code: RoundCode) -> Optional[db.Round]:
def data_dir(name: str) -> str: def data_dir(name: str) -> str:
return os.path.join(config.DATA_DIR, name) return os.path.join(config.DATA_DIR, name)
def link_to_dir(src: str, dest_dir: str, prefix: str = "", suffix: str = "") -> str:
"""Vytvoří hardlink na zdrojový soubor pod unikátním jménem v cílovém adresáři."""
while True:
dest = os.path.join(dest_dir, prefix + secrets.token_hex(8) + suffix)
try:
os.link(src, dest)
return dest
except FileExistsError:
logger.warning('Iteruji link_to_dir: %s už existuje', dest)
def unlink_if_exists(name: str):
try:
os.unlink(name)
except FileNotFoundError:
pass
from flask import Flask, request, g, session from flask import Flask, request, g, session
import flask.logging import flask.logging
import flask.wrappers
from flask_bootstrap import Bootstrap from flask_bootstrap import Bootstrap
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
import locale import locale
import logging import logging
import os import os
import tempfile
from typing import Optional
import werkzeug.exceptions import werkzeug.exceptions
import werkzeug.formparser
import mo import mo
import mo.config as config import mo.config as config
...@@ -15,13 +19,52 @@ import mo.rights ...@@ -15,13 +19,52 @@ import mo.rights
import mo.users import mo.users
import mo.util import mo.util
# Ohýbáme Flask, aby uploadované soubory ukládal do adresáře podle našeho přání,
# aby se pak daly zařadit mezi datové soubory prostým hardlinkováním. Za tímto účelem
# subclassujeme Request, aby použil subclassovaný FormDataParser, který použije naši
# stream factory místo defaultní.
def mo_stream_factory(total_content_length, filename, content_type, content_length=None):
return tempfile.NamedTemporaryFile(dir=mo.util.data_dir('tmp'), prefix='upload-')
class FormDataParser(werkzeug.formparser.FormDataParser):
def __init__(self,
stream_factory=None,
charset='utf-8',
errors='replace',
max_form_memory_size=None,
max_content_length=None,
cls=None,
silent=True):
super().__init__(mo_stream_factory, charset, errors, max_form_memory_size, max_content_length, cls, silent)
class Request(flask.wrappers.Request):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.form_data_parser_class = FormDataParser
# Můžeme zvýšit maximální velikost souboru pro dávkové uploady
custom_max_content_length: Optional[int] = None
# Původně atribut ve werkzeug.BaseRequest, předefinován na property ve flask.Request
@property
def max_content_length(self):
return self.custom_max_content_length or mo.config.MAX_CONTENT_LENGTH
# Flask interpretuje relativní cesty všelijak, tak mu vyrobíme absolutní # Flask interpretuje relativní cesty všelijak, tak mu vyrobíme absolutní
mo.config.DATA_DIR = os.path.abspath(mo.config.DATA_DIR) mo.config.DATA_DIR = os.path.abspath(mo.config.DATA_DIR)
static_dir = os.path.abspath('static') static_dir = os.path.abspath('static')
# Aplikační objekt # Aplikační objekt
app = Flask(__name__, instance_path=mo.config.DATA_DIR, static_folder=static_dir) app = Flask(__name__, static_folder=static_dir)
app.config.from_object(config) app.config.from_object(config)
app.request_class = Request
db.flask_db = SQLAlchemy(app, metadata=db.metadata) db.flask_db = SQLAlchemy(app, metadata=db.metadata)
Bootstrap(app) # make bootstrap libs accessible for the app Bootstrap(app) # make bootstrap libs accessible for the app
... ...
......
...@@ -2,8 +2,6 @@ from dataclasses import dataclass ...@@ -2,8 +2,6 @@ from dataclasses import dataclass
from flask import render_template, g, redirect, url_for, flash, request from flask import render_template, g, redirect, url_for, flash, request
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
import flask_wtf.file import flask_wtf.file
import os
import secrets
from sqlalchemy import func, and_ from sqlalchemy import func, and_
from sqlalchemy.orm import joinedload, aliased from sqlalchemy.orm import joinedload, aliased
from sqlalchemy.orm.query import Query from sqlalchemy.orm.query import Query
...@@ -305,11 +303,10 @@ def generic_import(round: db.Round, contest: Optional[db.Contest]): ...@@ -305,11 +303,10 @@ def generic_import(round: db.Round, contest: Optional[db.Contest]):
imp = create_import(user=g.user, type=form.typ.data, fmt=fmt, round=round, contest=contest) imp = create_import(user=g.user, type=form.typ.data, fmt=fmt, round=round, contest=contest)
if form.submit.data: if form.submit.data:
if form.file.data is not None: if form.file.data is not None:
tmp_name = secrets.token_hex(16) + '.' + fmt.get_extension() file = form.file.data.stream
tmp_path = os.path.join(app.instance_path, 'imports', tmp_name) import_tmp = mo.util.link_to_dir(file.name, mo.util.data_dir('imports'), suffix='.csv')
form.file.data.save(tmp_path)
if imp.run(tmp_path): if imp.run(import_tmp):
if imp.cnt_rows == 0: if imp.cnt_rows == 0:
flash('Soubor neobsahoval žádné řádky s daty', 'danger') flash('Soubor neobsahoval žádné řádky s daty', 'danger')
else: else:
...@@ -587,10 +584,7 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option ...@@ -587,10 +584,7 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option
del form.submit_fb del form.submit_fb
if form.validate_on_submit(): if form.validate_on_submit():
# FIXME: Viz komentář o efektivitě v user_contest_task file = form.file.data.stream
tmp_name = secrets.token_hex(16)
tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
form.file.data.save(tmp_path)
if sc.allow_upload_solutions and form.submit_sol.data: if sc.allow_upload_solutions and form.submit_sol.data:
type = db.PaperType.solution type = db.PaperType.solution
...@@ -601,13 +595,13 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option ...@@ -601,13 +595,13 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option
assert sc.task is not None and sc.user is not None assert sc.task is not None and sc.user is not None
paper = db.Paper(task=sc.task, for_user_obj=sc.user, uploaded_by_obj=g.user, type=type, note=form.note.data) paper = db.Paper(task=sc.task, for_user_obj=sc.user, uploaded_by_obj=g.user, type=type, note=form.note.data)
submitter = mo.submit.Submitter(instance_path=app.instance_path) submitter = mo.submit.Submitter()
self_url = url_for('org_submit_list', contest_id=contest_id, user_id=user_id, task_id=task_id, site_id=site_id)
try: try:
submitter.submit_paper(paper, tmp_path) submitter.submit_paper(paper, file.name)
except mo.submit.SubmitException as e: except mo.submit.SubmitException as e:
flash(f'Chyba: {e}', 'danger') flash(f'Chyba: {e}', 'danger')
# FIXME: Tady nemažeme tmpfile, zatím si ho chceme nechat pro analýzu.
return redirect(self_url) return redirect(self_url)
sess.add(paper) sess.add(paper)
...@@ -1032,15 +1026,12 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O ...@@ -1032,15 +1026,12 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O
if not can_upload_feedback: if not can_upload_feedback:
raise werkzeug.exceptions.Forbidden() raise werkzeug.exceptions.Forbidden()
request.custom_max_content_length = mo.config.MAX_BATCH_CONTENT_LENGTH
form = UploadSubmitsForm() form = UploadSubmitsForm()
if form.validate_on_submit(): if form.validate_on_submit():
# FIXME: Viz komentář o efektivitě v user_contest_task file = form.file.data.stream
tmp_name = secrets.token_hex(16) mo.jobs.submit.schedule_upload_feedback(round, file.name, f'Nahrání opravených řešení {round.round_code()}',
tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
form.file.data.save(tmp_path)
mo.jobs.submit.schedule_upload_feedback(round, tmp_path, f'Nahrání opravených řešení {round.round_code()}',
for_user=g.user, for_user=g.user,
only_contest=contest, only_site=site, only_task=task) only_contest=contest, only_site=site, only_task=task)
return redirect(url_for('org_jobs')) return redirect(url_for('org_jobs'))
...@@ -1048,6 +1039,7 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O ...@@ -1048,6 +1039,7 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O
return render_template( return render_template(
'org_generic_batch_upload.html', 'org_generic_batch_upload.html',
round=round, contest=contest, site=site, task=task, round=round, contest=contest, site=site, task=task,
max_size=mo.config.MAX_BATCH_CONTENT_LENGTH,
form=form, form=form,
) )
... ...
......
...@@ -12,7 +12,9 @@ ...@@ -12,7 +12,9 @@
</h2> </h2>
<p>Zde můžete najednou nahrát více opravených řešení zabalených do souboru typu ZIP. <p>Zde můžete najednou nahrát více opravených řešení zabalených do souboru typu ZIP.
Soubory opravených řešení se musí jmenovat stejně jako původní soubory účastnických řešení. Maximální možná velikost ZIPu je {{ max_size|data_size }}.
<p>Soubory opravených řešení se musí jmenovat stejně jako původní soubory účastnických řešení.
{{ wtf.quick_form(form, form_type='basic') }} {{ wtf.quick_form(form, form_type='basic') }}
... ...
......
from flask import render_template, request, g, redirect, url_for, flash from flask import render_template, request, g, redirect, url_for, flash
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
import flask_wtf.file import flask_wtf.file
import os from sqlalchemy import and_
import secrets
from sqlalchemy import or_, and_
from sqlalchemy.orm import joinedload from sqlalchemy.orm import joinedload
from typing import Optional
import werkzeug.exceptions import werkzeug.exceptions
import wtforms import wtforms
import wtforms.validators as validators import wtforms.validators as validators
...@@ -119,21 +116,14 @@ def user_contest_task(contest_id: int, task_id: int): ...@@ -119,21 +116,14 @@ def user_contest_task(contest_id: int, task_id: int):
form = SubmitForm() form = SubmitForm()
if round.ct_can_submit() and form.validate_on_submit(): if round.ct_can_submit() and form.validate_on_submit():
# FIXME: Tohle je pomalé, dělá se tu zbytečná další kopie dat. file = form.file.data.stream
# Nicméně werkzeugu by měla jít podstrčit stream factory,
# která bude vyrábět streamy rovnou uložené v našem tmp.
tmp_name = secrets.token_hex(16)
tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
form.file.data.save(tmp_path)
paper = db.Paper(task=task, for_user_obj=g.user, uploaded_by_obj=g.user, type=db.PaperType.solution, note=form.note.data) paper = db.Paper(task=task, for_user_obj=g.user, uploaded_by_obj=g.user, type=db.PaperType.solution, note=form.note.data)
submitter = mo.submit.Submitter(instance_path=app.instance_path) submitter = mo.submit.Submitter()
try: try:
submitter.submit_paper(paper, tmp_path) submitter.submit_paper(paper, file.name)
except mo.submit.SubmitException as e: except mo.submit.SubmitException as e:
flash(f'Chyba: {e}', 'danger') flash(f'Chyba: {e}', 'danger')
# FIXME: Tady nemažeme tmpfile, zatím si ho chceme nechat pro analýzu.
return redirect(url_for('user_contest_task', contest_id=contest_id, task_id=task_id)) return redirect(url_for('user_contest_task', contest_id=contest_id, task_id=task_id))
sess.add(paper) sess.add(paper)
... ...
......
...@@ -9,8 +9,8 @@ import wtforms ...@@ -9,8 +9,8 @@ import wtforms
import mo.db as db import mo.db as db
import mo.jobs import mo.jobs
import mo.util
from mo.util import logger from mo.util import logger
from mo.web import app
class PagerForm(FlaskForm): class PagerForm(FlaskForm):
...@@ -41,7 +41,7 @@ class PagerForm(FlaskForm): ...@@ -41,7 +41,7 @@ class PagerForm(FlaskForm):
def send_task_statement(round: db.Round) -> Response: def send_task_statement(round: db.Round) -> Response:
assert round.tasks_file is not None assert round.tasks_file is not None
file = os.path.join(app.instance_path, 'statements', round.tasks_file) file = os.path.join(mo.util.data_dir('statements'), round.tasks_file)
if os.path.isfile(file): if os.path.isfile(file):
return send_file(file, mimetype='application/pdf') return send_file(file, mimetype='application/pdf')
else: else:
...@@ -64,7 +64,7 @@ def task_paper_filename(paper: db.Paper) -> str: ...@@ -64,7 +64,7 @@ def task_paper_filename(paper: db.Paper) -> str:
def send_task_paper(paper: db.Paper) -> Response: def send_task_paper(paper: db.Paper) -> Response:
file = os.path.join(app.instance_path, 'submits', paper.file_name) file = os.path.join(mo.util.data_dir('submits'), paper.file_name)
if os.path.isfile(file): if os.path.isfile(file):
return send_file(file, mimetype='application/pdf') return send_file(file, mimetype='application/pdf')
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment