diff --git a/data_lib.py b/data_lib.py index 37812aed72624343cb2f042a11fd0a670d5a8881..5ee16d641e013562292e4191d4130ee73769f6ca 100644 --- a/data_lib.py +++ b/data_lib.py @@ -68,7 +68,7 @@ class Run: return self.algo_version == self.algo.current_version class Data: - def __init__(self, logfile=None, validate_versions=True): + def __init__(self, logfile=None, validate_versions=True, remove_duplications=False): self.validate_versions = validate_versions logfile = logfile or os.environ.get('LOGFILE', 'log') raw_json = "[" + open(logfile,mode='r').read()[0:-2] + "]" @@ -78,10 +78,17 @@ class Data: self.pipelines = {} self.runs = [] + duplication_checker = {} + for it in raw_data: run = Run(self, it) self.runs.append(run) if not validate_versions or run.is_up_to_date(): + if remove_duplications: + key = (run.n, run.seed, run.print_pipeline()) + if key in duplication_checker: + continue + duplication_checker[key] = run self.pipelines.setdefault(run.print_pipeline(), []) self.pipelines[run.print_pipeline()].append(run) diff --git a/prace/bakalarka/g.py b/prace/bakalarka/g.py index 236a59f0db5df18cf6b40d93f928b83b749fd33e..3465c2842a6f9b039eeb0bd4d996cac3bc7d890e 100644 --- a/prace/bakalarka/g.py +++ b/prace/bakalarka/g.py @@ -9,7 +9,7 @@ import sys, os d = pathlib.Path("/".join(__file__.split("/")[:-1])) def load(name): - return data_lib.Data(d/name, validate_versions=False) + return data_lib.Data(d/name, validate_versions=False, remove_duplications=True) def load_main_test(): return load(d/"main_test/log")