Skip to content
Snippets Groups Projects

Třídění skenů

Merged Jiří Setnička requested to merge jirka/protokoly-trideni into devel
2 files
+ 62
4
Compare changes
  • Side-by-side
  • Inline

Files

+ 169
3
@@ -11,7 +11,8 @@ from sqlalchemy import delete
@@ -11,7 +11,8 @@ from sqlalchemy import delete
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.query import Query
from sqlalchemy.orm.query import Query
import subprocess
import subprocess
from typing import List, Optional
from typing import Dict, List, Optional, Tuple
 
import PyPDF2
import mo
import mo
import mo.config as config
import mo.config as config
@@ -174,7 +175,7 @@ def handle_create_protocols(the_job: TheJob):
@@ -174,7 +175,7 @@ def handle_create_protocols(the_job: TheJob):
#
#
def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_user: db.User, tasks: List[db.Task], in_file_names: List[str]):
def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_user: db.User, tasks: List[db.Task], in_file_names: List[str]) -> int:
place = site or contest.place
place = site or contest.place
the_job = TheJob()
the_job = TheJob()
@@ -197,6 +198,7 @@ def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_us
@@ -197,6 +198,7 @@ def schedule_process_scans(contest: db.Contest, site: Optional[db.Place], for_us
'in_files': in_files,
'in_files': in_files,
}
}
the_job.submit()
the_job.submit()
 
return the_job.job_id
@dataclass
@dataclass
@@ -270,7 +272,7 @@ def handle_process_scans(the_job: TheJob):
@@ -270,7 +272,7 @@ def handle_process_scans(the_job: TheJob):
return 'Neznámý účastník'
return 'Neznámý účastník'
sp.user_id = user_id
sp.user_id = user_id
sp.task_id = tasks_by_code[fields[2]].task_id
sp.task_id = tasks_by_code[fields[2]].task_id
sp.seq_id = 0
sp.seq_id = 1
return None
return None
return 'Neznamý formát kódu'
return 'Neznamý formát kódu'
@@ -352,3 +354,167 @@ def _process_scan_file(args: ScanJobArgs) -> List[ScanJobPage]:
@@ -352,3 +354,167 @@ def _process_scan_file(args: ScanJobArgs) -> List[ScanJobPage]:
logger.debug(f'Scan: Strana #{page_nr}: {qr}')
logger.debug(f'Scan: Strana #{page_nr}: {qr}')
return output
return output
 
 
 
#
 
# Job sort_scans: Roztřídí nascanované protokoly a založí jednotlivá řešení
 
#
 
# Je to recyklovaný process_scans job.
 
#
 
# Vstupní JSON (beze změny z process_scans):
 
# { 'contest_id': ID contestu,
 
# 'site_id': ID soutěžního místa nebo none,
 
# 'task_ids': [task_id, ...],
 
# 'in_files': [názvy vstupních souborů]
 
# }
 
#
 
# Výstupní JSON:
 
# null
 
 
 
def schedule_sort_scans(job_id: int, for_user: db.User) -> int:
 
# Znovupoužijeme starý job, jen mu změníme typ
 
the_job = TheJob(job_id)
 
job = the_job.load()
 
assert job is not None
 
 
sess = db.get_session()
 
contest = sess.query(db.Contest).options(joinedload(db.Contest.round)).get(job.in_json['contest_id'])
 
assert contest is not None
 
 
job.type = db.JobType.sort_scans
 
job.created_at = mo.now
 
job.expires_at = None
 
job.user = for_user
 
job.description = f'Rozdělení již roztříděných scanů {contest.round.round_code_short()}'
 
 
the_job.submit()
 
return the_job.job_id
 
 
 
class SortScansPaper:
 
paper: db.Paper
 
pages: List[db.ScanPage]
 
 
def __init__(self, paper: db.Paper) -> None:
 
self.paper = paper
 
self.pages = []
 
 
def filename(self) -> str:
 
return f"out_{self.paper.task.task_id}_{self.paper.for_user_obj.user_id}.pdf"
 
 
 
@job_handler(db.JobType.sort_scans)
 
def handle_sort_scans(the_job: TheJob):
 
job = the_job.job
 
assert job.in_json is not None
 
contest_id = job.in_json['contest_id'] # type: ignore
 
site_id = job.in_json['site_id'] # type: ignore
 
task_ids = job.in_json['task_ids'] # type: ignore
 
in_files: List[str] = job.in_json['in_files'] # type: ignore
 
 
sess = db.get_session()
 
contest = sess.query(db.Contest).options(joinedload(db.Contest.round)).get(contest_id)
 
assert contest is not None
 
 
user_ids = set(u[0] for u in _get_user_id_query(contest, site_id).all())
 
users = sess.query(db.User).filter(db.User.user_id.in_(user_ids)).all()
 
users_by_id = {u.user_id: u for u in users}
 
 
tasks = sess.query(db.Task).filter(db.Task.task_id.in_(task_ids)).all()
 
tasks_by_id = {t.task_id: t for t in tasks}
 
 
pages = sess.query(db.ScanPage).filter_by(job_id=the_job.job_id).all()
 
 
sols = sess.query(db.Solution).filter(
 
db.Solution.task_id.in_(task_ids),
 
db.Solution.user_id.in_(user_ids),
 
).all()
 
 
# Jelikož se plánujeme zamyslet na dlouhou dobu, uzavřeme databázovou session.
 
sess.commit()
 
 
# Nejdříve si vše naplánujeme
 
sols_map = {(sol.task_id, sol.user_id): sol for sol in sols}
 
sols_to_create: Dict[Tuple[int, int], db.Solution] = {}
 
papers: Dict[Tuple[int, int], SortScansPaper] = {}
 
 
for p in pages:
 
if p.is_empty():
 
continue
 
task = tasks_by_id[p.task_id]
 
user = users_by_id[p.user_id]
 
 
index = (p.task_id, p.user_id)
 
if index in sols_map:
 
sol = sols_map[index]
 
elif index in sols_to_create:
 
sol = sols_to_create[index]
 
else:
 
sol = db.Solution(task=task, user=user)
 
sols_to_create[index] = sol
 
 
if index not in papers:
 
papers[index] = SortScansPaper(db.Paper(
 
task=task,
 
for_user_obj=user,
 
uploaded_by_obj=job.user,
 
type=db.PaperType.solution,
 
note='Z hromadného skenování',
 
))
 
 
papers[index].pages.append(p)
 
 
for index in papers:
 
papers[index].pages.sort(key=lambda p: p.seq_id)
 
 
# Poté poskládáme výsledné PDF soubory
 
readers: Dict[int, PyPDF2.PdfFileReader] = {}
 
for index in papers:
 
paper = papers[index]
 
writer = PyPDF2.PdfFileWriter()
 
for p in paper.pages:
 
if p.file_nr not in readers:
 
readers[p.file_nr] = PyPDF2.PdfFileReader(job.file_path(in_files[p.file_nr]), strict=False)
 
# Přihodíme správnou stránku na výstup
 
writer.addPage(
 
readers[p.file_nr].getPage(p.page_nr)
 
)
 
# Zapíšeme vše do správného souboru
 
with open(job.file_path(paper.filename()), 'wb') as f:
 
writer.write(f)
 
 
# ... a uložíme je na správné místo
 
submitter = mo.submit.Submitter()
 
 
for index in papers:
 
paper = papers[index]
 
try:
 
submitter.submit_paper(paper.paper, job.file_path(paper.filename()))
 
except mo.submit.SubmitException as e:
 
logger.error(f"Paper task:{paper.paper.for_task}, user:{paper.paper.for_user}: {e}")
 
 
# Nakonec vše uložíme do databáze
 
for index in sols_to_create:
 
sol = sols_to_create[index]
 
sess.add(sol)
 
mo.util.log(
 
type=db.LogType.participant,
 
what=sol.user.user_id,
 
details={
 
'action': 'solution-created',
 
'task': sol.task.task_id,
 
},
 
)
 
 
for index in papers:
 
paper = papers[index]
 
sess.add(paper.paper)
 
if index in sols_map:
 
sols_map[index].final_submit_obj = paper.paper
 
elif index in sols_to_create:
 
sols_to_create[index].final_submit_obj = paper.paper
 
 
sess.commit()
 
job.result = 'Celkem ' + mo.util_format.inflect_number(len(papers), 'roztříděné řešení', 'roztříděná řešení', 'roztříděných řešení')
 
the_job.expires_in_minutes = config.JOB_EXPIRATION_LONG
Loading