Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Odevzdávací Systém MO
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Analyze
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MO-P
Odevzdávací Systém MO
Commits
ab04f3f1
Commit
ab04f3f1
authored
Jan 27, 2024
by
Martin Mareš
Browse files
Options
Downloads
Patches
Plain Diff
Jobs: Soft errors a retry
Též úklid v logování.
parent
42ff29b7
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
etc/config.py.example
+10
-0
10 additions, 0 deletions
etc/config.py.example
mo/jobs/__init__.py
+30
-5
30 additions, 5 deletions
mo/jobs/__init__.py
with
40 additions
and
5 deletions
etc/config.py.example
+
10
−
0
View file @
ab04f3f1
...
...
@@ -55,6 +55,16 @@ JOB_EXPIRATION = 5
# Některé dávky (analýza skenů) mají delší expiraci [min]
JOB_EXPIRATION_LONG = 1440
# Opakované spouštění dávek, které selhaly s dočasnou chybou:
# Kolikrát maximálně jsme ochotni spustit jednu dávku
JOB_MAX_TRIES = 8
# Jak dlouho čekáme po prvním selhání [min]
JOB_RETRY_AFTER = 5
# Faktor, kterým čekání násobíme při každém dalším pokusu
JOB_RETRY_LONGER = 2
# Maximální doba čekání [min]
JOB_RETRY_CEILING = 60
# Kolik nejvýše dovolujeme registrací za minutu
REG_MAX_PER_MINUTE = 10
...
...
This diff is collapsed.
Click to expand it.
mo/jobs/__init__.py
+
30
−
5
View file @
ab04f3f1
...
...
@@ -3,7 +3,7 @@
from
datetime
import
timedelta
import
os
import
shutil
from
sqlalchemy
import
or_
from
sqlalchemy
import
or_
,
and_
import
sys
from
typing
import
Optional
,
Dict
,
Callable
,
List
...
...
@@ -22,6 +22,11 @@ def send_notify(priority: int):
logger
.
debug
(
'
Job: Není komu poslat notifikaci
'
)
class
SoftJobError
(
RuntimeError
):
"""
Pokud tato chyba nastane při zpracování jobu, job časem zkusíme spustit znovu.
"""
pass
class
TheJob
:
"""
Job z pohledu Pythonu.
"""
...
...
@@ -143,17 +148,22 @@ class TheJob:
return
job
=
self
.
job
logger
.
info
(
f
'
{
self
.
log_prefix
}
Spouštím job
{
job
.
type
}
uživatele #
{
job
.
user_id
}
'
)
job
.
state
=
db
.
JobState
.
running
job
.
started_at
=
mo
.
util
.
get_now
()
job
.
finished_at
=
None
job
.
expires_at
=
None
job
.
retry_at
=
None
job
.
num_tries
+=
1
job
.
result
=
""
job
.
out_json
=
None
job
.
out_file
=
None
sess
.
commit
()
logger
.
info
(
f
'
{
self
.
log_prefix
}
Spouštím job
{
job
.
type
.
name
}
uživatele #
{
job
.
user_id
}
'
+
(
f
'
(pokus
{
job
.
num_tries
}
)
'
if
job
.
num_tries
>
1
else
""
))
exc_info
:
ExceptionInfo
=
(
None
,
None
,
None
)
retry_in_min
:
Optional
[
float
]
=
None
try
:
self
.
gatekeeper
=
mo
.
rights
.
Gatekeeper
(
job
.
user
)
...
...
@@ -172,6 +182,16 @@ class TheJob:
else
:
logger
.
info
(
f
'
{
self
.
log_prefix
}
Úspěšně dokončen (
{
job
.
result
}
)
'
)
job
.
state
=
db
.
JobState
.
done
except
SoftJobError
as
e
:
logger
.
error
(
f
'
{
self
.
log_prefix
}
Dočasná chyba:
{
e
}
'
)
if
job
.
num_tries
>=
config
.
JOB_MAX_TRIES
:
logger
.
error
(
f
'
{
self
.
log_prefix
}
Příliš mnoho dočasných chyb
'
)
job
.
state
=
db
.
JobState
.
internal_error
job
.
result
=
'
Interní chyba, informujte prosím správce systému.
'
else
:
job
.
state
=
db
.
JobState
.
soft_error
job
.
result
=
'
Dočasná chyba, dávka bude později spuštěna znovu.
'
retry_in_min
=
min
(
config
.
JOB_RETRY_CEILING
,
config
.
JOB_RETRY_AFTER
*
config
.
JOB_RETRY_LONGER
**
(
job
.
num_tries
-
1
))
except
Exception
as
e
:
exc_info
=
sys
.
exc_info
()
logger
.
error
(
f
'
{
self
.
log_prefix
}
Chyba při zpracování: %s
'
,
e
,
exc_info
=
exc_info
)
...
...
@@ -179,13 +199,17 @@ class TheJob:
job
.
result
=
'
Interní chyba, informujte prosím správce systému.
'
job
.
finished_at
=
mo
.
util
.
get_now
()
if
job
.
state
!=
db
.
JobState
.
internal_error
:
if
retry_in_min
is
not
None
:
job
.
retry_at
=
job
.
finished_at
+
timedelta
(
minutes
=
retry_in_min
)
elif
job
.
state
!=
db
.
JobState
.
internal_error
:
job
.
expires_at
=
job
.
finished_at
+
timedelta
(
minutes
=
self
.
expires_in_minutes
)
sess
.
commit
()
wait_sec
=
(
job
.
started_at
-
job
.
created_at
).
total_seconds
()
run_sec
=
(
job
.
finished_at
-
job
.
started_at
).
total_seconds
()
logger
.
info
(
f
'
{
self
.
log_prefix
}
Doba běhu
{
run_sec
:
.
3
f
}
s, čekání ve frontě
{
wait_sec
:
.
3
f
}
s
'
)
logger
.
info
(
f
'
{
self
.
log_prefix
}
Doba běhu
{
run_sec
:
.
3
f
}
s, čekání ve frontě
{
wait_sec
:
.
3
f
}
s
'
+
(
f
'
, pokus
{
job
.
num_tries
}
'
if
job
.
num_tries
>
1
else
""
)
+
(
f
'
, retry za
{
retry_in_min
*
60
:
.
0
f
}
s
'
if
retry_in_min
is
not
None
else
""
))
# Maily o interních chybách posíláme mimo transakci
if
job
.
state
==
db
.
JobState
.
internal_error
:
...
...
@@ -218,7 +242,8 @@ def process_jobs(min_priority: int = 0):
# Probereme joby, které by měly běžet
ready
=
(
sess
.
query
(
db
.
Job
.
job_id
)
.
filter_by
(
state
=
db
.
JobState
.
ready
)
.
filter
(
or_
(
db
.
Job
.
state
==
db
.
JobState
.
ready
,
and_
(
db
.
Job
.
state
==
db
.
JobState
.
soft_error
,
db
.
Job
.
retry_at
<=
mo
.
now
)))
.
filter
(
db
.
Job
.
priority
>=
min_priority
)
.
order_by
(
db
.
Job
.
created_at
)
.
all
())
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment