diff --git a/bin/deploy b/bin/deploy
index 608752711202613956911406379409ede83f2d90..01b4d38edc6354b254c6f82d73665fbb326fead1 100755
--- a/bin/deploy
+++ b/bin/deploy
@@ -23,7 +23,7 @@ else
fi
echo "Zakládám adresáře"
-mkdir -p $DEST/{log,var,data/{imports,jobs,statements,submits,tmp}}
+mkdir -p $DEST/{log,var,data/{errors,imports,jobs,statements,submits,tmp}}
echo "Instaluji balíček"
pip install -c constraints.txt .
diff --git a/etc/config.py.example b/etc/config.py.example
index c2ab8536fe2fe169603bb672b8e99e28a4f68592..442f687040432ee42509af88a5ba4979aa3f8b41 100644
--- a/etc/config.py.example
+++ b/etc/config.py.example
@@ -23,7 +23,9 @@ WEB_ROOT = 'https://mo.mff.cuni.cz/osmo-test/'
WEB_FLAVOR = 'devel'
# Maximální velikost uploadu. Pozor, je omezena i konfigurací Nginxu.
+# První hodnota se použije pro běžný upload, druhá pro dávkový upload řešení.
MAX_CONTENT_LENGTH = 16777216
+MAX_BATCH_CONTENT_LENGTH = 1000000000
# Adresář, do kterého ukládáme data (pro vývoj relativní, pro instalaci absolutní)
DATA_DIR = 'data'
diff --git a/mo/imports.py b/mo/imports.py
index 263c3cbb416858897ec63c22c660d1c6dd5f723c..ffb5a91ee34bd6a5bacb153e49bff23dc1891f27 100644
--- a/mo/imports.py
+++ b/mo/imports.py
@@ -73,6 +73,7 @@ class Import:
if self.line_number > 0:
msg = f"Řádek {self.line_number}: {msg}"
self.errors.append(msg)
+ logger.info('Import: >> %s', msg)
return None # Kdyby bylo otypováno správně jako -> None, při volání by si mypy stěžoval
def parse_email(self, email: str) -> Optional[str]:
diff --git a/mo/jobs/__init__.py b/mo/jobs/__init__.py
index 31c9d4673a19f6e500a73db345298fc1846c6fce..f85f121698a25739a5fe014e25a18fadb4ba9978 100644
--- a/mo/jobs/__init__.py
+++ b/mo/jobs/__init__.py
@@ -2,7 +2,6 @@
from datetime import timedelta
import os
-import secrets
from sqlalchemy import or_
from typing import Optional, Dict, Callable, List
@@ -49,14 +48,8 @@ class TheJob:
def attach_file(self, tmp_name: str, suffix: str):
"""Vytvoří hardlink na daný pracovní soubor v adresáři jobů."""
- while True:
- name = secrets.token_hex(16) + suffix
- try:
- os.link(tmp_name, job_file_path(name))
- break
- except FileExistsError:
- pass
-
+ full_name = mo.util.link_to_dir(tmp_name, mo.util.data_dir('jobs'))
+ name = os.path.basename(full_name)
logger.debug(f'Job: Příloha {tmp_name} -> {name}')
return name
@@ -74,10 +67,10 @@ class TheJob:
job = self.job
if job.in_file is not None:
- os.unlink(job_file_path(job.in_file))
+ mo.util.unlink_if_exists(job_file_path(job.in_file))
if job.out_file is not None:
- os.unlink(job_file_path(job.out_file))
+ mo.util.unlink_if_exists(job_file_path(job.out_file))
sess.delete(job)
sess.commit()
diff --git a/mo/submit.py b/mo/submit.py
index d5ad24853ba496629d1090c7bbd9b7ba58510c25..9f896b00a8de954b9fb0ddded291216b8af7c182 100644
--- a/mo/submit.py
+++ b/mo/submit.py
@@ -2,10 +2,10 @@ import datetime
import multiprocessing
import os
import pikepdf
-import secrets
import werkzeug.utils
import mo.db as db
+import mo.util
from mo.util import logger
@@ -16,8 +16,8 @@ class SubmitException(RuntimeError):
class Submitter:
submit_dir: str
- def __init__(self, instance_path: str = 'data'):
- self.submit_dir = os.path.join(instance_path, 'submits')
+ def __init__(self):
+ self.submit_dir = mo.util.data_dir('submits')
def submit_paper(self, paper: db.Paper, tmpfile: str):
logger.info(f'Submit: Zpracovávám file={tmpfile} for=#{paper.for_user_obj.user_id} by=#{paper.uploaded_by_obj.user_id} type={paper.type.name}')
@@ -29,10 +29,11 @@ class Submitter:
logger.info(f'Submit: Hotovo: file={paper.file_name} pages={paper.pages} bytes={paper.bytes} time={duration:.3f}')
except SubmitException as e:
duration = (datetime.datetime.now() - t_start).total_seconds()
- logger.info(f'Submit: Chyba: {e} (time={duration:.3f})')
+ preserved_as = mo.util.link_to_dir(tmpfile, mo.util.data_dir('errors'), prefix='submit-')
+ logger.info(f'Submit: Chyba: {e} (time={duration:.3f}), uloženo do {preserved_as}')
raise
- def _create_file_name(self, paper: db.Paper) -> str:
+ def _file_paper(self, paper: db.Paper, tmpfile: str):
round = paper.task.round
secure_category = werkzeug.utils.secure_filename(round.category)
top_level = f'{round.year}-{secure_category}-{round.seq}'
@@ -41,18 +42,11 @@ class Submitter:
os.makedirs(sub_user_dir, exist_ok=True)
secure_task_code = werkzeug.utils.secure_filename(paper.task.code)
- while True:
- nonce = secrets.token_hex(8)
- file_name = f'{secure_task_code}-{paper.type.name[:3]}-{nonce}.pdf'
- if not os.path.lexists(os.path.join(sub_user_dir, file_name)):
- break
- logger.warning(f'Retrying file creation for {sub_user_dir}/{file_name}')
-
- return os.path.join(user_dir, file_name)
+ full_name = mo.util.link_to_dir(tmpfile, sub_user_dir, prefix=f'{secure_task_code}-{paper.type.name[:3]}-', suffix='.pdf')
+ paper.file_name = os.path.join(user_dir, os.path.basename(full_name))
def _do_submit(self, paper: db.Paper, tmpfile: str):
# Zpracování PDF spustíme v samostatném procesu, aby bylo dostatečně oddělené
- # FIXME: Omezit paměť apod.
pipe_rx, pipe_tx = multiprocessing.Pipe(duplex=False)
proc = multiprocessing.Process(name='submit', target=Submitter._process_pdf, args=(tmpfile, pipe_tx))
proc.start()
@@ -82,11 +76,7 @@ class Submitter:
paper.bytes = os.path.getsize(tmpfile)
paper.pages = result['pages']
- paper.file_name = self._create_file_name(paper)
-
- # FIXME: fsync?
- dest = os.path.join(self.submit_dir, paper.file_name)
- os.rename(tmpfile, dest)
+ self._file_paper(paper, tmpfile)
# Zpracování PDF běží v samostatném procesu, výsledek pošle jako slovník rourou.
def _process_pdf(tmpfile, pipe):
diff --git a/mo/util.py b/mo/util.py
index 587bfa99cceda2ca27a3f2d62189504fa4615408..7021284e21d63b3c613075e0dd375508a1859d06 100644
--- a/mo/util.py
+++ b/mo/util.py
@@ -9,6 +9,7 @@ import locale
import logging
import os
import re
+import secrets
import subprocess
import sys
from typing import Any, Optional, NoReturn
@@ -164,3 +165,22 @@ def get_round_by_code(code: RoundCode) -> Optional[db.Round]:
def data_dir(name: str) -> str:
return os.path.join(config.DATA_DIR, name)
+
+
+def link_to_dir(src: str, dest_dir: str, prefix: str = "", suffix: str = "") -> str:
+ """Vytvoří hardlink na zdrojový soubor pod unikátním jménem v cílovém adresáři."""
+
+ while True:
+ dest = os.path.join(dest_dir, prefix + secrets.token_hex(8) + suffix)
+ try:
+ os.link(src, dest)
+ return dest
+ except FileExistsError:
+ logger.warning('Iteruji link_to_dir: %s už existuje', dest)
+
+
+def unlink_if_exists(name: str):
+ try:
+ os.unlink(name)
+ except FileNotFoundError:
+ pass
diff --git a/mo/web/__init__.py b/mo/web/__init__.py
index 3c7449c91bc6b4cf9b393e6ee3d07dc48f0b4335..b4b09f4dc41efa41333982ce6ae15f2a945ba140 100644
--- a/mo/web/__init__.py
+++ b/mo/web/__init__.py
@@ -1,11 +1,15 @@
from flask import Flask, request, g, session
import flask.logging
+import flask.wrappers
from flask_bootstrap import Bootstrap
from flask_sqlalchemy import SQLAlchemy
import locale
import logging
import os
+import tempfile
+from typing import Optional
import werkzeug.exceptions
+import werkzeug.formparser
import mo
import mo.config as config
@@ -15,13 +19,52 @@ import mo.rights
import mo.users
import mo.util
+
+# Ohýbáme Flask, aby uploadované soubory ukládal do adresáře podle našeho přání,
+# aby se pak daly zařadit mezi datové soubory prostým hardlinkováním. Za tímto účelem
+# subclassujeme Request, aby použil subclassovaný FormDataParser, který použije naši
+# stream factory místo defaultní.
+
+def mo_stream_factory(total_content_length, filename, content_type, content_length=None):
+ return tempfile.NamedTemporaryFile(dir=mo.util.data_dir('tmp'), prefix='upload-')
+
+
+class FormDataParser(werkzeug.formparser.FormDataParser):
+
+ def __init__(self,
+ stream_factory=None,
+ charset='utf-8',
+ errors='replace',
+ max_form_memory_size=None,
+ max_content_length=None,
+ cls=None,
+ silent=True):
+ super().__init__(mo_stream_factory, charset, errors, max_form_memory_size, max_content_length, cls, silent)
+
+
+class Request(flask.wrappers.Request):
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.form_data_parser_class = FormDataParser
+
+ # Můžeme zvýšit maximální velikost souboru pro dávkové uploady
+ custom_max_content_length: Optional[int] = None
+
+ # Původně atribut ve werkzeug.BaseRequest, předefinován na property ve flask.Request
+ @property
+ def max_content_length(self):
+ return self.custom_max_content_length or mo.config.MAX_CONTENT_LENGTH
+
+
# Flask interpretuje relativní cesty všelijak, tak mu vyrobíme absolutní
mo.config.DATA_DIR = os.path.abspath(mo.config.DATA_DIR)
static_dir = os.path.abspath('static')
# Aplikační objekt
-app = Flask(__name__, instance_path=mo.config.DATA_DIR, static_folder=static_dir)
+app = Flask(__name__, static_folder=static_dir)
app.config.from_object(config)
+app.request_class = Request
db.flask_db = SQLAlchemy(app, metadata=db.metadata)
Bootstrap(app) # make bootstrap libs accessible for the app
diff --git a/mo/web/org_contest.py b/mo/web/org_contest.py
index 9a7b48b44e0dacaff3f055d0157377e89cb47977..22b1210a85d984148ce36933e6fec44b1afa4035 100644
--- a/mo/web/org_contest.py
+++ b/mo/web/org_contest.py
@@ -2,8 +2,6 @@ from dataclasses import dataclass
from flask import render_template, g, redirect, url_for, flash, request
from flask_wtf import FlaskForm
import flask_wtf.file
-import os
-import secrets
from sqlalchemy import func, and_
from sqlalchemy.orm import joinedload, aliased
from sqlalchemy.orm.query import Query
@@ -305,11 +303,10 @@ def generic_import(round: db.Round, contest: Optional[db.Contest]):
imp = create_import(user=g.user, type=form.typ.data, fmt=fmt, round=round, contest=contest)
if form.submit.data:
if form.file.data is not None:
- tmp_name = secrets.token_hex(16) + '.' + fmt.get_extension()
- tmp_path = os.path.join(app.instance_path, 'imports', tmp_name)
- form.file.data.save(tmp_path)
+ file = form.file.data.stream
+ import_tmp = mo.util.link_to_dir(file.name, mo.util.data_dir('imports'), suffix='.csv')
- if imp.run(tmp_path):
+ if imp.run(import_tmp):
if imp.cnt_rows == 0:
flash('Soubor neobsahoval žádné řádky s daty', 'danger')
else:
@@ -587,10 +584,7 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option
del form.submit_fb
if form.validate_on_submit():
- # FIXME: Viz komentář o efektivitě v user_contest_task
- tmp_name = secrets.token_hex(16)
- tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
- form.file.data.save(tmp_path)
+ file = form.file.data.stream
if sc.allow_upload_solutions and form.submit_sol.data:
type = db.PaperType.solution
@@ -601,13 +595,13 @@ def org_submit_list(contest_id: int, user_id: int, task_id: int, site_id: Option
assert sc.task is not None and sc.user is not None
paper = db.Paper(task=sc.task, for_user_obj=sc.user, uploaded_by_obj=g.user, type=type, note=form.note.data)
- submitter = mo.submit.Submitter(instance_path=app.instance_path)
+ submitter = mo.submit.Submitter()
+ self_url = url_for('org_submit_list', contest_id=contest_id, user_id=user_id, task_id=task_id, site_id=site_id)
try:
- submitter.submit_paper(paper, tmp_path)
+ submitter.submit_paper(paper, file.name)
except mo.submit.SubmitException as e:
flash(f'Chyba: {e}', 'danger')
- # FIXME: Tady nemažeme tmpfile, zatím si ho chceme nechat pro analýzu.
return redirect(self_url)
sess.add(paper)
@@ -1032,15 +1026,12 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O
if not can_upload_feedback:
raise werkzeug.exceptions.Forbidden()
+ request.custom_max_content_length = mo.config.MAX_BATCH_CONTENT_LENGTH
form = UploadSubmitsForm()
if form.validate_on_submit():
- # FIXME: Viz komentář o efektivitě v user_contest_task
- tmp_name = secrets.token_hex(16)
- tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
- form.file.data.save(tmp_path)
-
- mo.jobs.submit.schedule_upload_feedback(round, tmp_path, f'Nahrání opravených řešení {round.round_code()}',
+ file = form.file.data.stream
+ mo.jobs.submit.schedule_upload_feedback(round, file.name, f'Nahrání opravených řešení {round.round_code()}',
for_user=g.user,
only_contest=contest, only_site=site, only_task=task)
return redirect(url_for('org_jobs'))
@@ -1048,6 +1039,7 @@ def generic_batch_upload(round: db.Round, contest: Optional[db.Contest], site: O
return render_template(
'org_generic_batch_upload.html',
round=round, contest=contest, site=site, task=task,
+ max_size=mo.config.MAX_BATCH_CONTENT_LENGTH,
form=form,
)
diff --git a/mo/web/templates/org_generic_batch_upload.html b/mo/web/templates/org_generic_batch_upload.html
index 26ce369d59781db6d4f3ad63c46b2a739768d4d5..10b57e8de866ea57855da5729c9f972f1d7eac02 100644
--- a/mo/web/templates/org_generic_batch_upload.html
+++ b/mo/web/templates/org_generic_batch_upload.html
@@ -12,7 +12,9 @@
</h2>
<p>Zde můžete najednou nahrát více opravených řešení zabalených do souboru typu ZIP.
-Soubory opravených řešení se musí jmenovat stejně jako původní soubory účastnických řešení.
+Maximální možná velikost ZIPu je {{ max_size|data_size }}.
+
+<p>Soubory opravených řešení se musí jmenovat stejně jako původní soubory účastnických řešení.
{{ wtf.quick_form(form, form_type='basic') }}
diff --git a/mo/web/user.py b/mo/web/user.py
index 42cd5634941626092504e7bca38a1565074c60ab..34c85c98b93d9a417e3f2b166fb1b2de7b77ff2a 100644
--- a/mo/web/user.py
+++ b/mo/web/user.py
@@ -1,11 +1,8 @@
from flask import render_template, request, g, redirect, url_for, flash
from flask_wtf import FlaskForm
import flask_wtf.file
-import os
-import secrets
-from sqlalchemy import or_, and_
+from sqlalchemy import and_
from sqlalchemy.orm import joinedload
-from typing import Optional
import werkzeug.exceptions
import wtforms
import wtforms.validators as validators
@@ -119,21 +116,14 @@ def user_contest_task(contest_id: int, task_id: int):
form = SubmitForm()
if round.ct_can_submit() and form.validate_on_submit():
- # FIXME: Tohle je pomalé, dělá se tu zbytečná další kopie dat.
- # Nicméně werkzeugu by měla jít podstrčit stream factory,
- # která bude vyrábět streamy rovnou uložené v našem tmp.
- tmp_name = secrets.token_hex(16)
- tmp_path = os.path.join(app.instance_path, 'tmp', tmp_name)
- form.file.data.save(tmp_path)
-
+ file = form.file.data.stream
paper = db.Paper(task=task, for_user_obj=g.user, uploaded_by_obj=g.user, type=db.PaperType.solution, note=form.note.data)
- submitter = mo.submit.Submitter(instance_path=app.instance_path)
+ submitter = mo.submit.Submitter()
try:
- submitter.submit_paper(paper, tmp_path)
+ submitter.submit_paper(paper, file.name)
except mo.submit.SubmitException as e:
flash(f'Chyba: {e}', 'danger')
- # FIXME: Tady nemažeme tmpfile, zatím si ho chceme nechat pro analýzu.
return redirect(url_for('user_contest_task', contest_id=contest_id, task_id=task_id))
sess.add(paper)
diff --git a/mo/web/util.py b/mo/web/util.py
index 98527f1ad78c06087c421879d5fab05186673532..f021d878e20400ca16c6a68628d396a4c2af3f6b 100644
--- a/mo/web/util.py
+++ b/mo/web/util.py
@@ -9,8 +9,8 @@ import wtforms
import mo.db as db
import mo.jobs
+import mo.util
from mo.util import logger
-from mo.web import app
class PagerForm(FlaskForm):
@@ -41,7 +41,7 @@ class PagerForm(FlaskForm):
def send_task_statement(round: db.Round) -> Response:
assert round.tasks_file is not None
- file = os.path.join(app.instance_path, 'statements', round.tasks_file)
+ file = os.path.join(mo.util.data_dir('statements'), round.tasks_file)
if os.path.isfile(file):
return send_file(file, mimetype='application/pdf')
else:
@@ -64,7 +64,7 @@ def task_paper_filename(paper: db.Paper) -> str:
def send_task_paper(paper: db.Paper) -> Response:
- file = os.path.join(app.instance_path, 'submits', paper.file_name)
+ file = os.path.join(mo.util.data_dir('submits'), paper.file_name)
if os.path.isfile(file):
return send_file(file, mimetype='application/pdf')