import json
import os
import subprocess
import time

class Algorithm():
    def __init__(self, data_cls, name, path):
        self.path = path
        self.data_cls = data_cls
        self.name = name
        if data_cls.validate_versions:
            try:
                with subprocess.Popen([self.path, "-v"], stdout=subprocess.PIPE) as proc:
                    self.current_version = int(proc.stdout.read())
            except FileNotFoundError:
                self.current_version = None
        self.data = []


class Run:
    def __init__(self, data_cls, json):
        self.data_cls = data_cls
        algo = json["algo"].split("/")[-1]
        if algo not in data_cls.algorithms:
            data_cls.algorithms[algo] = Algorithm(data_cls, algo, json["algo"])
        self.algo = data_cls.algorithms[algo]
        self.algo_version = json["version"]
        self.args = json["args"] if "args" in json else []

        self.n = json["n"]
        self.error = json.get("error", None)
        self.score = self.n if self.error else json["score"]
        self.data = json.get("data", {})

        resources = json.get("resources", {})
        self.resources_memory_kb = resources.get("memory_kb", None)
        self.resources_cpu_time_s = resources.get("cpu_time_s", None)
        self.resources_wallclock_s = resources.get("wallclock_s", None)

        self.mistakes = json["mistakes"] if "mistakes" in json else None

        if "from" in json:
            self._from = Run(data_cls, json["from"])
            self.seed = self._from.seed
        else:
            self._from = None
            self.seed = json["seed"]

    def print_pipeline(self, versions=False):
        x = ""
        if self._from:
            x += self._from.print_pipeline(versions=versions) + "|"
        x += self.algo.name
        if versions:
            x += f":{self.algo_version}"
        if len(self.args):
            x += "("
            for arg in self.args[:-1]:
                x += f"{arg}, "
            x += f"{self.args[-1]})"
        return x

    def is_up_to_date(self):
        if not self.data_cls.validate_versions:
            return True
        if self._from and not self._from.is_up_to_date():
            return False
        return self.algo_version == self.algo.current_version

class Data:
    def __init__(self, logfile=None, validate_versions=True):
        self.validate_versions = validate_versions
        logfile = logfile or os.environ.get('LOGFILE', 'log')
        raw_json = "[" + open(logfile,mode='r').read()[0:-2] + "]"
        raw_data = json.loads(raw_json)

        self.algorithms = {}
        self.pipelines = {}
        self.runs = []

        for it in raw_data:
            run = Run(self, it)
            self.runs.append(run)
            if not validate_versions or run.is_up_to_date():
                self.pipelines.setdefault(run.print_pipeline(), [])
                self.pipelines[run.print_pipeline()].append(run)

def group_by_n(arr):
    by_n = {}
    for i in arr:
        by_n.setdefault(i.n, [])
        by_n[i.n].append(i)
    return by_n