Skip to content
Snippets Groups Projects
Commit 74dfe259 authored by Jiří Setnička's avatar Jiří Setnička Committed by Martin Mareš
Browse files

Třídění skenů - samotné třídění pomocí PyPDF2

parent bf47dbb0
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !94. Comments created here will be created in the context of that merge request.
......@@ -11,7 +11,8 @@ from sqlalchemy import delete
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.query import Query
import subprocess
from typing import List, Optional
from typing import Dict, List, Optional, Tuple
import PyPDF2
import mo
import mo.config as config
......@@ -391,6 +392,18 @@ def schedule_sort_scans(job_id: int, for_user: db.User) -> int:
return the_job.job_id
class SortScansPaper:
paper: db.Paper
pages: List[db.ScanPage]
def __init__(self, paper: db.Paper) -> None:
self.paper = paper
self.pages = []
def filename(self) -> str:
return f"out_{self.paper.task.task_id}_{self.paper.for_user_obj.user_id}.pdf"
@job_handler(db.JobType.sort_scans)
def handle_sort_scans(the_job: TheJob):
job = the_job.job
......@@ -403,17 +416,106 @@ def handle_sort_scans(the_job: TheJob):
sess = db.get_session()
contest = sess.query(db.Contest).options(joinedload(db.Contest.round)).get(contest_id)
assert contest is not None
round = contest.round
round_code = round.round_code_short()
user_ids = set(u[0] for u in _get_user_id_query(contest, site_id).all())
users = sess.query(db.User).filter(db.User.user_id.in_(user_ids)).all()
users_by_id = {u.user_id: u for u in users}
tasks = sess.query(db.Task).filter(db.Task.task_id.in_(task_ids)).all()
tasks_by_code = {t.code: t for t in tasks}
tasks_by_id = {t.task_id: t for t in tasks}
pages = sess.query(db.ScanPage).filter_by(job_id=the_job.job_id).all()
sols = sess.query(db.Solution).filter(
db.Solution.task_id.in_(task_ids),
db.Solution.user_id.in_(user_ids),
).all()
# Jelikož se plánujeme zamyslet na dlouhou dobu, uzavřeme databázovou session.
sess.commit()
# TODO: paralelně rozstříhat a sestavit správná PDFka
# Nejdříve si vše naplánujeme
sols_map = {(sol.task_id, sol.user_id): sol for sol in sols}
sols_to_create: Dict[Tuple[int, int], db.Solution] = {}
papers: Dict[Tuple[int, int], SortScansPaper] = {}
for p in pages:
if p.is_empty():
continue
task = tasks_by_id[p.task_id]
user = users_by_id[p.user_id]
index = (p.task_id, p.user_id)
if index in sols_map:
sol = sols_map[index]
elif index in sols_to_create:
sol = sols_to_create[index]
else:
sol = db.Solution(task=task, user=user)
sols_to_create[index] = sol
if index not in papers:
papers[index] = SortScansPaper(db.Paper(
task=task,
for_user_obj=user,
uploaded_by_obj=job.user,
type=db.PaperType.solution,
note='Z hromadného skenování',
))
papers[index].pages.append(p)
for index in papers:
papers[index].pages.sort(key=lambda p: p.seq_id)
# Poté poskládáme výsledné PDF soubory
readers: Dict[int, PyPDF2.PdfFileReader] = {}
for index in papers:
paper = papers[index]
writer = PyPDF2.PdfFileWriter()
for p in paper.pages:
if p.file_nr not in readers:
readers[p.file_nr] = PyPDF2.PdfFileReader(job.file_path(in_files[p.file_nr]), strict=False)
# Přihodíme správnou stránku na výstup
writer.addPage(
readers[p.file_nr].getPage(p.page_nr)
)
# Zapíšeme vše do správného souboru
with open(job.file_path(paper.filename()), 'wb') as f:
writer.write(f)
# ... a uložíme je na správné místo
submitter = mo.submit.Submitter()
for index in papers:
paper = papers[index]
try:
print(paper.paper)
submitter.submit_paper(paper.paper, job.file_path(paper.filename()))
except mo.submit.SubmitException as e:
logger.error(f"Paper task:{paper.paper.for_task}, user:{paper.paper.for_user}: {e}")
# Nakonec vše uložíme do databáze
for index in sols_to_create:
sol = sols_to_create[index]
sess.add(sol)
mo.util.log(
type=db.LogType.participant,
what=sol.user.user_id,
details={
'action': 'solution-created',
'task': sol.task.task_id,
},
)
for index in papers:
paper = papers[index]
sess.add(paper.paper)
if index in sols:
sols[index].final_submit_obj = paper.paper
elif index in sols_to_create:
sols_to_create[index].final_submit_obj = paper.paper
# TODO: založit správná řešení
sess.commit()
job.result = 'Celkem ' + mo.util_format.inflect_number(len(papers), 'roztříděné řešení', 'roztříděná řešení', 'roztříděných řešení')
the_job.expires_in_minutes = config.JOB_EXPIRATION_LONG
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment