Skip to content
Snippets Groups Projects

Třídění skenů

Merged Jiří Setnička requested to merge jirka/protokoly-trideni into devel
All threads resolved!
1 file
+ 1
1
Compare changes
  • Side-by-side
  • Inline
+ 169
3
@@ -11,7 +11,8 @@ from sqlalchemy import delete
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.query import Query
import subprocess
from typing import List, Optional
from typing import Dict, List, Optional, Tuple
import PyPDF2
import mo
import mo.config as config
@@ -174,7 +175,7 @@ def handle_create_protocols(the_job: TheJob):
#
def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_user: db.User, tasks: List[db.Task], in_file_names: List[str]):
def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_user: db.User, tasks: List[db.Task], in_file_names: List[str]) -> int:
place = site or contest.place
the_job = TheJob()
@@ -197,6 +198,7 @@ def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_us
'in_files': in_files,
}
the_job.submit()
return the_job.job_id
@dataclass
@@ -270,7 +272,7 @@ def handle_process_scans(the_job: TheJob):
return 'Neznámý účastník'
sp.user_id = user_id
sp.task_id = tasks_by_code[fields[2]].task_id
sp.seq_id = 0
sp.seq_id = 1
return None
return 'Neznamý formát kódu'
@@ -352,3 +354,167 @@ def _process_scan_file(args: ScanJobArgs) -> List[ScanJobPage]:
logger.debug(f'Scan: Strana #{page_nr}: {qr}')
return output
#
# Job sort_scans: Roztřídí nascanované protokoly a založí jednotlivá řešení
#
# Je to recyklovaný process_scans job.
#
# Vstupní JSON (beze změny z process_scans):
# { 'contest_id': ID contestu,
# 'site_id': ID soutěžního místa nebo none,
# 'task_ids': [task_id, ...],
# 'in_files': [názvy vstupních souborů]
# }
#
# Výstupní JSON:
# null
def schedule_sort_scans(job_id: int, for_user: db.User) -> int:
# Znovupoužijeme starý job, jen mu změníme typ
the_job = TheJob(job_id)
job = the_job.load()
assert job is not None
sess = db.get_session()
contest = sess.query(db.Contest).options(joinedload(db.Contest.round)).get(job.in_json['contest_id'])
assert contest is not None
job.type = db.JobType.sort_scans
job.created_at = mo.now
job.expires_at = None
job.user = for_user
job.description = f'Rozdělení již roztříděných scanů {contest.round.round_code_short()}'
the_job.submit()
return the_job.job_id
class SortScansPaper:
paper: db.Paper
pages: List[db.ScanPage]
def __init__(self, paper: db.Paper) -> None:
self.paper = paper
self.pages = []
def filename(self) -> str:
return f"out_{self.paper.task.task_id}_{self.paper.for_user_obj.user_id}.pdf"
@job_handler(db.JobType.sort_scans)
def handle_sort_scans(the_job: TheJob):
job = the_job.job
assert job.in_json is not None
contest_id = job.in_json['contest_id'] # type: ignore
site_id = job.in_json['site_id'] # type: ignore
task_ids = job.in_json['task_ids'] # type: ignore
in_files: List[str] = job.in_json['in_files'] # type: ignore
sess = db.get_session()
contest = sess.query(db.Contest).options(joinedload(db.Contest.round)).get(contest_id)
assert contest is not None
user_ids = set(u[0] for u in _get_user_id_query(contest, site_id).all())
users = sess.query(db.User).filter(db.User.user_id.in_(user_ids)).all()
users_by_id = {u.user_id: u for u in users}
tasks = sess.query(db.Task).filter(db.Task.task_id.in_(task_ids)).all()
tasks_by_id = {t.task_id: t for t in tasks}
pages = sess.query(db.ScanPage).filter_by(job_id=the_job.job_id).all()
sols = sess.query(db.Solution).filter(
db.Solution.task_id.in_(task_ids),
db.Solution.user_id.in_(user_ids),
).all()
# Jelikož se plánujeme zamyslet na dlouhou dobu, uzavřeme databázovou session.
sess.commit()
# Nejdříve si vše naplánujeme
sols_map = {(sol.task_id, sol.user_id): sol for sol in sols}
sols_to_create: Dict[Tuple[int, int], db.Solution] = {}
papers: Dict[Tuple[int, int], SortScansPaper] = {}
for p in pages:
if p.is_empty():
continue
task = tasks_by_id[p.task_id]
user = users_by_id[p.user_id]
index = (p.task_id, p.user_id)
if index in sols_map:
sol = sols_map[index]
elif index in sols_to_create:
sol = sols_to_create[index]
else:
sol = db.Solution(task=task, user=user)
sols_to_create[index] = sol
if index not in papers:
papers[index] = SortScansPaper(db.Paper(
task=task,
for_user_obj=user,
uploaded_by_obj=job.user,
type=db.PaperType.solution,
note='Z hromadného skenování',
))
papers[index].pages.append(p)
for index in papers:
papers[index].pages.sort(key=lambda p: p.seq_id)
# Poté poskládáme výsledné PDF soubory
readers: Dict[int, PyPDF2.PdfFileReader] = {}
for index in papers:
paper = papers[index]
writer = PyPDF2.PdfFileWriter()
for p in paper.pages:
if p.file_nr not in readers:
readers[p.file_nr] = PyPDF2.PdfFileReader(job.file_path(in_files[p.file_nr]), strict=False)
# Přihodíme správnou stránku na výstup
writer.addPage(
readers[p.file_nr].getPage(p.page_nr)
)
# Zapíšeme vše do správného souboru
with open(job.file_path(paper.filename()), 'wb') as f:
writer.write(f)
# ... a uložíme je na správné místo
submitter = mo.submit.Submitter()
for index in papers:
paper = papers[index]
try:
submitter.submit_paper(paper.paper, job.file_path(paper.filename()))
except mo.submit.SubmitException as e:
logger.error(f"Paper task:{paper.paper.for_task}, user:{paper.paper.for_user}: {e}")
# Nakonec vše uložíme do databáze
for index in sols_to_create:
sol = sols_to_create[index]
sess.add(sol)
mo.util.log(
type=db.LogType.participant,
what=sol.user.user_id,
details={
'action': 'solution-created',
'task': sol.task.task_id,
},
)
for index in papers:
paper = papers[index]
sess.add(paper.paper)
if index in sols_map:
sols_map[index].final_submit_obj = paper.paper
elif index in sols_to_create:
sols_to_create[index].final_submit_obj = paper.paper
sess.commit()
job.result = 'Celkem ' + mo.util_format.inflect_number(len(papers), 'roztříděné řešení', 'roztříděná řešení', 'roztříděných řešení')
the_job.expires_in_minutes = config.JOB_EXPIRATION_LONG
Loading