Skip to content
Snippets Groups Projects
Commit 33f84e0a authored by Martin Mareš's avatar Martin Mareš
Browse files

Merge branch 'mj/upload' into 'devel'

Reforma uploadů

See merge request !21
parents 225a4b4d c8a45584
No related branches found
No related tags found
1 merge request!21Reforma uploadů
......@@ -23,7 +23,7 @@ else
fi
echo "Zakládám adresáře"
mkdir -p $DEST/{log,var,data/{imports,jobs,statements,submits,tmp}}
mkdir -p $DEST/{log,var,data/{errors,imports,jobs,statements,submits,tmp}}
echo "Instaluji balíček"
pip install -c constraints.txt .
......
......@@ -23,7 +23,9 @@ WEB_ROOT = 'https://mo.mff.cuni.cz/osmo-test/'
WEB_FLAVOR = 'devel'
# Maximální velikost uploadu. Pozor, je omezena i konfigurací Nginxu.
# První hodnota se použije pro běžný upload, druhá pro dávkový upload řešení.
MAX_CONTENT_LENGTH = 16777216
MAX_BATCH_CONTENT_LENGTH = 1000000000
# Adresář, do kterého ukládáme data (pro vývoj relativní, pro instalaci absolutní)
DATA_DIR = 'data'
......
......@@ -73,6 +73,7 @@ class Import:
if self.line_number > 0:
msg = f"Řádek {self.line_number}: {msg}"
self.errors.append(msg)
logger.info('Import: >> %s', msg)
return None # Kdyby bylo otypováno správně jako -> None, při volání by si mypy stěžoval
def parse_email(self, email: str) -> Optional[str]:
......
......@@ -2,7 +2,6 @@
from datetime import timedelta
import os
import secrets
from sqlalchemy import or_
from typing import Optional, Dict, Callable, List
......@@ -49,14 +48,8 @@ class TheJob:
def attach_file(self, tmp_name: str, suffix: str):
"""Vytvoří hardlink na daný pracovní soubor v adresáři jobů."""
while True:
name = secrets.token_hex(16) + suffix
try:
os.link(tmp_name, job_file_path(name))
break
except FileExistsError:
pass
full_name = mo.util.link_to_dir(tmp_name, mo.util.data_dir('jobs'))
name = os.path.basename(full_name)
logger.debug(f'Job: Příloha {tmp_name} -> {name}')
return name
......@@ -74,10 +67,10 @@ class TheJob:
job = self.job
if job.in_file is not None:
os.unlink(job_file_path(job.in_file))
mo.util.unlink_if_exists(job_file_path(job.in_file))
if job.out_file is not None:
os.unlink(job_file_path(job.out_file))
mo.util.unlink_if_exists(job_file_path(job.out_file))
sess.delete(job)
sess.commit()
......
......@@ -2,10 +2,10 @@ import datetime
import multiprocessing
import os
import pikepdf
import secrets
import werkzeug.utils
import mo.db as db
import mo.util
from mo.util import logger
......@@ -16,8 +16,8 @@ class SubmitException(RuntimeError):
class Submitter:
submit_dir: str
def __init__(self, instance_path: str = 'data'):
self.submit_dir = os.path.join(instance_path, 'submits')
def __init__(self):
self.submit_dir = mo.util.data_dir('submits')
def submit_paper(self, paper: db.Paper, tmpfile: str):
logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}')
......@@ -29,10 +29,11 @@ class Submitter:
logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}')
except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds()
logger.info(f'Submit: Chyba: {e} (time={duration:.3f})')
preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
raise
def _create_file_name(self, paper: db.Paper) -> str:
def _file_paper(self, paper: db.Paper, tmpfile: str):
round = paper.task.round
secure_category = werkzeug.utils.secure_filename(round.category)
top_level = f'{round.year}-{secure_category}-{round.seq}'
......@@ -41,18 +42,11 @@ class Submitter:
os.makedirs(sub_user_dir, exist_ok=True)
secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
while True:
nonce = secrets.token_hex(8)
file_name = f'{secure_task_code}-{paper.type.name[:3]}-{nonce}.pdf'
if not os.path.lexists(os.path.join(sub_user_dir, file_name)):
break
logger.warning(f'Retrying file creation for {sub_user_dir}/{file_name}')
return os.path.join(user_dir, file_name)
full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
paper.file_name = os.path.join(user_dir, os.path.basename(full_name))
def _do_submit(self, paper: db.Paper, tmpfile: str):
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
# FIXME: Omezit paměť apod.
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx))
proc.start()
......@@ -82,11 +76,7 @@ class Submitter:
paper.bytes = os.path.getsize(tmpfile)
paper.pages = result['pages']
paper.file_name = self._create_file_name(paper)
# FIXME: fsync?
dest = os.path.join(self.submit_dir, paper.file_name)
os.rename(tmpfile, dest)
self._file_paper(paper, tmpfile)
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
def _process_pdf(tmpfile, pipe):
......
......@@ -9,6 +9,7 @@ import locale
import logging
import os
import re
import secrets
import subprocess
import sys
from typing import Any, Optional, NoReturn
......@@ -164,3 +165,22 @@ def get_round_by_code(code: RoundCode) -> Optional[db.Round]:
def data_dir(name: str) -> str:
return os.path.join(config.DATA_DIR, name)
def link_to_dir(src: str, dest_dir: str, prefix: str = "", suffix: str = "") -> str:
"""Vytvoří hardlink na zdrojový soubor pod unikátním jménem v cílovém adresáři."""
while True:
dest = os.path.join(dest_dir, prefix + secrets.token_hex(8) + suffix)
try:
os.link(src, dest)
return dest
except FileExistsError:
logger.warning('Iteruji link_to_dir: %s už existuje', dest)
def unlink_if_exists(name: str):
try:
os.unlink(name)
except FileNotFoundError:
pass
from flask import Flask, request, g, session
import flask.logging
import flask.wrappers
from flask_bootstrap import Bootstrap
from flask_sqlalchemy import SQLAlchemy
import locale
import logging
import os
import tempfile
from typing import Optional
import werkzeug.exceptions
import werkzeug.formparser
import mo
import mo.config as config
......@@ -15,13 +19,52 @@ import mo.rights
import mo.users
import mo.util
# Ohýbáme Flask, aby uploadované soubory ukládal do adresáře podle našeho přání,
# aby se pak daly zařadit mezi datové soubory prostým hardlinkováním. Za tímto účelem
# subclassujeme Request, aby použil subclassovaný FormDataParser, který použije naši
# stream factory místo defaultní.
def mo_stream_factory(total_content_length, filename, content_type, content_length=None):
return tempfile.NamedTemporaryFile(dir=mo.util.data_dir('tmp'), prefix='upload-')
class FormDataParser(werkzeug.formparser.FormDataParser):
def __init__(self,
stream_factory=None,
charset='utf-8',
errors='replace',
max_form_memory_size=None,
max_content_length=None,
cls=None,
silent=True):
super().__init__(mo_stream_factory, charset, errors, max_form_memory_size, max_content_length, cls, silent)
class Request(flask.wrappers.Request):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.form_data_parser_class = FormDataParser
# Můžeme zvýšit maximální velikost souboru pro dávkové uploady
custom_max_content_length: Optional[int] = None
# Původně atribut ve werkzeug.BaseRequest, předefinován na property ve flask.Request
@property
def max_content_length(self):
return self.custom_max_content_length or mo.config.MAX_CONTENT_LENGTH
# Flask interpretuje relativní cesty všelijak, tak mu vyrobíme absolutní
mo.config.DATA_DIR = os.path.abspath(mo.config.DATA_DIR)
static_dir = os.path.abspath('static')
# Aplikační objekt
app = Flask(__name__, instance_path=mo.config.DATA_DIR, static_folder=static_dir)
app = Flask(__name__, static_folder=static_dir)
app.config.from_object(config)
app.request_class = Request
db.flask_db = SQLAlchemy(app, metadata=db.metadata)
Bootstrap(app) # make bootstrap libs accessible for the app
......
......@@ -2,8 +2,6 @@ from dataclasses import dataclass
from flask import render_template, g, redirect, url_for, flash, request
from flask_wtf import FlaskForm
import flask_wtf.file
import os
import secrets
from sqlalchemy import func, and_
from sqlalchemy.orm import joinedload, aliased
from sqlalchemy.orm.query import Query
......@@ -305,11 +303,10 @@ def generic_import(round: db.Round, contest: Optional[db.Contest]):
imp = create_import(user=g.user, type=form.typ.data, fmt=fmt, round=round, contest=contest)
if form.submit.data:
if form.file.data is not None:
tmp_name = secrets.token_hex(16) + '.' + fmt.get_extension()
tmp_path = os.path.join(app.instance_path, 'imports', tmp_name)
form.file.data.save(tmp_path)
file = form.file.data.stream
import_tmp = mo.util.link_to_dir(file.name, mo.util.data_dir('imports'), suffix='.csv')
if imp.run(tmp_path):
if imp.run(import_tmp):
if imp.cnt_rows == 0:
flash('Soubor neobsahoval žádné řádky s daty', 'danger')
else:
......@@ -587,10 +584,7 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option
del form.submit_fb
if form.validate_on_submit():
# FIXME: Viz komentář o efektivitě v user_contest_task
tmp_name = secrets.token_hex(16)
tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
form.file.data.save(tmp_path)
file = form.file.data.stream
if sc.allow_upload_solutions and form.submit_sol.data:
type = db.PaperType.solution
......@@ -601,13 +595,13 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option
assert sc.task is not None and sc.user is not None
paper = db.Paper(task=sc.task, for_user_obj=sc.user, uploaded_by_obj=g.user, type=type, note=form.note.data)
submitter = mo.submit.Submitter(instance_path=app.instance_path)
submitter = mo.submit.Submitter()
self_url = url_for('org_submit_list', contest_id=contest_id, user_id=user_id, task_id=task_id, site_id=site_id)
try:
submitter.submit_paper(paper, tmp_path)
submitter.submit_paper(paper, file.name)
except mo.submit.SubmitException as e:
flash(f'Chyba: {e}', 'danger')
# FIXME: Tady nemažeme tmpfile, zatím si ho chceme nechat pro analýzu.
return redirect(self_url)
sess.add(paper)
......@@ -1032,15 +1026,12 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O
if not can_upload_feedback:
raise werkzeug.exceptions.Forbidden()
request.custom_max_content_length = mo.config.MAX_BATCH_CONTENT_LENGTH
form = UploadSubmitsForm()
if form.validate_on_submit():
# FIXME: Viz komentář o efektivitě v user_contest_task
tmp_name = secrets.token_hex(16)
tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
form.file.data.save(tmp_path)
mo.jobs.submit.schedule_upload_feedback(round, tmp_path, f'Nahrání opravených řešení {round.round_code()}',
file = form.file.data.stream
mo.jobs.submit.schedule_upload_feedback(round, file.name, f'Nahrání opravených řešení {round.round_code()}',
for_user=g.user,
only_contest=contest, only_site=site, only_task=task)
return redirect(url_for('org_jobs'))
......@@ -1048,6 +1039,7 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O
return render_template(
'org_generic_batch_upload.html',
round=round, contest=contest, site=site, task=task,
max_size=mo.config.MAX_BATCH_CONTENT_LENGTH,
form=form,
)
......
......@@ -12,7 +12,9 @@
</h2>
<p>Zde můžete najednou nahrát více opravených řešení zabalených do souboru typu ZIP.
Soubory opravených řešení se musí jmenovat stejně jako původní soubory účastnických řešení.
Maximální možná velikost ZIPu je {{ max_size|data_size }}.
<p>Soubory opravených řešení se musí jmenovat stejně jako původní soubory účastnických řešení.
{{ wtf.quick_form(form, form_type='basic') }}
......
from flask import render_template, request, g, redirect, url_for, flash
from flask_wtf import FlaskForm
import flask_wtf.file
import os
import secrets
from sqlalchemy import or_, and_
from sqlalchemy import and_
from sqlalchemy.orm import joinedload
from typing import Optional
import werkzeug.exceptions
import wtforms
import wtforms.validators as validators
......@@ -119,21 +116,14 @@ def user_contest_task(contest_id: int, task_id: int):
form = SubmitForm()
if round.ct_can_submit() and form.validate_on_submit():
# FIXME: Tohle je pomalé, dělá se tu zbytečná další kopie dat.
# Nicméně werkzeugu by měla jít podstrčit stream factory,
# která bude vyrábět streamy rovnou uložené v našem tmp.
tmp_name = secrets.token_hex(16)
tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
form.file.data.save(tmp_path)
file = form.file.data.stream
paper = db.Paper(task=task, for_user_obj=g.user, uploaded_by_obj=g.user, type=db.PaperType.solution, note=form.note.data)
submitter = mo.submit.Submitter(instance_path=app.instance_path)
submitter = mo.submit.Submitter()
try:
submitter.submit_paper(paper, tmp_path)
submitter.submit_paper(paper, file.name)
except mo.submit.SubmitException as e:
flash(f'Chyba: {e}', 'danger')
# FIXME: Tady nemažeme tmpfile, zatím si ho chceme nechat pro analýzu.
return redirect(url_for('user_contest_task', contest_id=contest_id, task_id=task_id))
sess.add(paper)
......
......@@ -9,8 +9,8 @@ import wtforms
import mo.db as db
import mo.jobs
import mo.util
from mo.util import logger
from mo.web import app
class PagerForm(FlaskForm):
......@@ -41,7 +41,7 @@ class PagerForm(FlaskForm):
def send_task_statement(round: db.Round) -> Response:
assert round.tasks_file is not None
file = os.path.join(app.instance_path, 'statements', round.tasks_file)
file = os.path.join(mo.util.data_dir('statements'), round.tasks_file)
if os.path.isfile(file):
return send_file(file, mimetype='application/pdf')
else:
......@@ -64,7 +64,7 @@ def task_paper_filename(paper: db.Paper) -> str:
def send_task_paper(paper: db.Paper) -> Response:
file = os.path.join(app.instance_path, 'submits', paper.file_name)
file = os.path.join(mo.util.data_dir('submits'), paper.file_name)
if os.path.isfile(file):
return send_file(file, mimetype='application/pdf')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment