Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Odevzdávací Systém MO
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Analyze
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MO-P
Odevzdávací Systém MO
Commits
74dfe259
Project 'mj/mo-submit' was moved to 'mo-p/osmo'. Please update any links and bookmarks that may still have the old path.
Commit
74dfe259
authored
Sep 8, 2021
by
Jiří Setnička
Committed by
Martin Mareš
Sep 25, 2021
Browse files
Options
Downloads
Patches
Plain Diff
Třídění skenů - samotné třídění pomocí PyPDF2
parent
bf47dbb0
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
mo/jobs/protocols.py
+108
-6
108 additions, 6 deletions
mo/jobs/protocols.py
with
108 additions
and
6 deletions
mo/jobs/protocols.py
+
108
−
6
View file @
74dfe259
...
...
@@ -11,7 +11,8 @@ from sqlalchemy import delete
from
sqlalchemy.orm
import
joinedload
from
sqlalchemy.orm.query
import
Query
import
subprocess
from
typing
import
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
PyPDF2
import
mo
import
mo.config
as
config
...
...
@@ -391,6 +392,18 @@ def schedule_sort_scans(job_id: int, for_user: db.User) -> int:
return
the_job
.
job_id
class
SortScansPaper
:
paper
:
db
.
Paper
pages
:
List
[
db
.
ScanPage
]
def
__init__
(
self
,
paper
:
db
.
Paper
)
->
None
:
self
.
paper
=
paper
self
.
pages
=
[]
def
filename
(
self
)
->
str
:
return
f
"
out_
{
self
.
paper
.
task
.
task_id
}
_
{
self
.
paper
.
for_user_obj
.
user_id
}
.pdf
"
@job_handler
(
db
.
JobType
.
sort_scans
)
def
handle_sort_scans
(
the_job
:
TheJob
):
job
=
the_job
.
job
...
...
@@ -403,17 +416,106 @@ def handle_sort_scans(the_job: TheJob):
sess
=
db
.
get_session
()
contest
=
sess
.
query
(
db
.
Contest
).
options
(
joinedload
(
db
.
Contest
.
round
)).
get
(
contest_id
)
assert
contest
is
not
None
round
=
contest
.
round
round_code
=
round
.
round_code_short
()
user_ids
=
set
(
u
[
0
]
for
u
in
_get_user_id_query
(
contest
,
site_id
).
all
())
users
=
sess
.
query
(
db
.
User
).
filter
(
db
.
User
.
user_id
.
in_
(
user_ids
)).
all
()
users_by_id
=
{
u
.
user_id
:
u
for
u
in
users
}
tasks
=
sess
.
query
(
db
.
Task
).
filter
(
db
.
Task
.
task_id
.
in_
(
task_ids
)).
all
()
tasks_by_code
=
{
t
.
code
:
t
for
t
in
tasks
}
tasks_by_id
=
{
t
.
task_id
:
t
for
t
in
tasks
}
pages
=
sess
.
query
(
db
.
ScanPage
).
filter_by
(
job_id
=
the_job
.
job_id
).
all
()
sols
=
sess
.
query
(
db
.
Solution
).
filter
(
db
.
Solution
.
task_id
.
in_
(
task_ids
),
db
.
Solution
.
user_id
.
in_
(
user_ids
),
).
all
()
# Jelikož se plánujeme zamyslet na dlouhou dobu, uzavřeme databázovou session.
sess
.
commit
()
# TODO: paralelně rozstříhat a sestavit správná PDFka
# Nejdříve si vše naplánujeme
sols_map
=
{(
sol
.
task_id
,
sol
.
user_id
):
sol
for
sol
in
sols
}
sols_to_create
:
Dict
[
Tuple
[
int
,
int
],
db
.
Solution
]
=
{}
papers
:
Dict
[
Tuple
[
int
,
int
],
SortScansPaper
]
=
{}
for
p
in
pages
:
if
p
.
is_empty
():
continue
task
=
tasks_by_id
[
p
.
task_id
]
user
=
users_by_id
[
p
.
user_id
]
index
=
(
p
.
task_id
,
p
.
user_id
)
if
index
in
sols_map
:
sol
=
sols_map
[
index
]
elif
index
in
sols_to_create
:
sol
=
sols_to_create
[
index
]
else
:
sol
=
db
.
Solution
(
task
=
task
,
user
=
user
)
sols_to_create
[
index
]
=
sol
if
index
not
in
papers
:
papers
[
index
]
=
SortScansPaper
(
db
.
Paper
(
task
=
task
,
for_user_obj
=
user
,
uploaded_by_obj
=
job
.
user
,
type
=
db
.
PaperType
.
solution
,
note
=
'
Z hromadného skenování
'
,
))
papers
[
index
].
pages
.
append
(
p
)
for
index
in
papers
:
papers
[
index
].
pages
.
sort
(
key
=
lambda
p
:
p
.
seq_id
)
# Poté poskládáme výsledné PDF soubory
readers
:
Dict
[
int
,
PyPDF2
.
PdfFileReader
]
=
{}
for
index
in
papers
:
paper
=
papers
[
index
]
writer
=
PyPDF2
.
PdfFileWriter
()
for
p
in
paper
.
pages
:
if
p
.
file_nr
not
in
readers
:
readers
[
p
.
file_nr
]
=
PyPDF2
.
PdfFileReader
(
job
.
file_path
(
in_files
[
p
.
file_nr
]),
strict
=
False
)
# Přihodíme správnou stránku na výstup
writer
.
addPage
(
readers
[
p
.
file_nr
].
getPage
(
p
.
page_nr
)
)
# Zapíšeme vše do správného souboru
with
open
(
job
.
file_path
(
paper
.
filename
()),
'
wb
'
)
as
f
:
writer
.
write
(
f
)
# ... a uložíme je na správné místo
submitter
=
mo
.
submit
.
Submitter
()
for
index
in
papers
:
paper
=
papers
[
index
]
try
:
print
(
paper
.
paper
)
submitter
.
submit_paper
(
paper
.
paper
,
job
.
file_path
(
paper
.
filename
()))
except
mo
.
submit
.
SubmitException
as
e
:
logger
.
error
(
f
"
Paper task:
{
paper
.
paper
.
for_task
}
, user:
{
paper
.
paper
.
for_user
}
:
{
e
}
"
)
# Nakonec vše uložíme do databáze
for
index
in
sols_to_create
:
sol
=
sols_to_create
[
index
]
sess
.
add
(
sol
)
mo
.
util
.
log
(
type
=
db
.
LogType
.
participant
,
what
=
sol
.
user
.
user_id
,
details
=
{
'
action
'
:
'
solution-created
'
,
'
task
'
:
sol
.
task
.
task_id
,
},
)
for
index
in
papers
:
paper
=
papers
[
index
]
sess
.
add
(
paper
.
paper
)
if
index
in
sols
:
sols
[
index
].
final_submit_obj
=
paper
.
paper
elif
index
in
sols_to_create
:
sols_to_create
[
index
].
final_submit_obj
=
paper
.
paper
# TODO: založit správná řešení
sess
.
commit
()
job
.
result
=
'
Celkem
'
+
mo
.
util_format
.
inflect_number
(
len
(
papers
),
'
roztříděné řešení
'
,
'
roztříděná řešení
'
,
'
roztříděných řešení
'
)
the_job
.
expires_in_minutes
=
config
.
JOB_EXPIRATION_LONG
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment