diff --git a/mo/jobs/protocols.py b/mo/jobs/protocols.py index f908fc81769c5edd6b9ba4d7146daaaf01c12572..2940757081b9bbb6e6933e00e46a252444ae5d5f 100644 --- a/mo/jobs/protocols.py +++ b/mo/jobs/protocols.py @@ -11,7 +11,8 @@ from sqlalchemy import delete from sqlalchemy.orm import joinedload from sqlalchemy.orm.query import Query import subprocess -from typing import List, Optional +from typing import Dict, List, Optional, Tuple +import PyPDF2 import mo import mo.config as config @@ -391,6 +392,18 @@ def schedule_sort_scans(job_id: int, for_user: db.User) -> int: return the_job.job_id +class SortScansPaper: + paper: db.Paper + pages: List[db.ScanPage] + + def __init__(self, paper: db.Paper) -> None: + self.paper = paper + self.pages = [] + + def filename(self) -> str: + return f"out_{self.paper.task.task_id}_{self.paper.for_user_obj.user_id}.pdf" + + @job_handler(db.JobType.sort_scans) def handle_sort_scans(the_job: TheJob): job = the_job.job @@ -403,17 +416,106 @@ def handle_sort_scans(the_job: TheJob): sess = db.get_session() contest = sess.query(db.Contest).options(joinedload(db.Contest.round)).get(contest_id) assert contest is not None - round = contest.round - round_code = round.round_code_short() user_ids = set(u[0] for u in _get_user_id_query(contest, site_id).all()) + users = sess.query(db.User).filter(db.User.user_id.in_(user_ids)).all() + users_by_id = {u.user_id: u for u in users} tasks = sess.query(db.Task).filter(db.Task.task_id.in_(task_ids)).all() - tasks_by_code = {t.code: t for t in tasks} + tasks_by_id = {t.task_id: t for t in tasks} + + pages = sess.query(db.ScanPage).filter_by(job_id=the_job.job_id).all() + + sols = sess.query(db.Solution).filter( + db.Solution.task_id.in_(task_ids), + db.Solution.user_id.in_(user_ids), + ).all() # Jelikož se plánujeme zamyslet na dlouhou dobu, uzavřeme databázovou session. sess.commit() - # TODO: paralelně rozstříhat a sestavit správná PDFka + # Nejdříve si vše naplánujeme + sols_map = {(sol.task_id, sol.user_id): sol for sol in sols} + sols_to_create: Dict[Tuple[int, int], db.Solution] = {} + papers: Dict[Tuple[int, int], SortScansPaper] = {} + + for p in pages: + if p.is_empty(): + continue + task = tasks_by_id[p.task_id] + user = users_by_id[p.user_id] + + index = (p.task_id, p.user_id) + if index in sols_map: + sol = sols_map[index] + elif index in sols_to_create: + sol = sols_to_create[index] + else: + sol = db.Solution(task=task, user=user) + sols_to_create[index] = sol + + if index not in papers: + papers[index] = SortScansPaper(db.Paper( + task=task, + for_user_obj=user, + uploaded_by_obj=job.user, + type=db.PaperType.solution, + note='Z hromadného skenování', + )) + + papers[index].pages.append(p) + + for index in papers: + papers[index].pages.sort(key=lambda p: p.seq_id) + + # Poté poskládáme výsledné PDF soubory + readers: Dict[int, PyPDF2.PdfFileReader] = {} + for index in papers: + paper = papers[index] + writer = PyPDF2.PdfFileWriter() + for p in paper.pages: + if p.file_nr not in readers: + readers[p.file_nr] = PyPDF2.PdfFileReader(job.file_path(in_files[p.file_nr]), strict=False) + # Přihodíme správnou stránku na výstup + writer.addPage( + readers[p.file_nr].getPage(p.page_nr) + ) + # Zapíšeme vše do správného souboru + with open(job.file_path(paper.filename()), 'wb') as f: + writer.write(f) + + # ... a uložíme je na správné místo + submitter = mo.submit.Submitter() + + for index in papers: + paper = papers[index] + try: + print(paper.paper) + submitter.submit_paper(paper.paper, job.file_path(paper.filename())) + except mo.submit.SubmitException as e: + logger.error(f"Paper task:{paper.paper.for_task}, user:{paper.paper.for_user}: {e}") + + # Nakonec vše uložíme do databáze + for index in sols_to_create: + sol = sols_to_create[index] + sess.add(sol) + mo.util.log( + type=db.LogType.participant, + what=sol.user.user_id, + details={ + 'action': 'solution-created', + 'task': sol.task.task_id, + }, + ) - # TODO: založit správná řešení + for index in papers: + paper = papers[index] + sess.add(paper.paper) + if index in sols: + sols[index].final_submit_obj = paper.paper + elif index in sols_to_create: + sols_to_create[index].final_submit_obj = paper.paper + + sess.commit() + job.result = 'Celkem ' + mo.util_format.inflect_number(len(papers), 'roztříděné řešení', 'roztříděná řešení', 'roztříděných řešení') + the_job.expires_in_minutes = config.JOB_EXPIRATION_LONG