Skip to content
Snippets Groups Projects
Select Git revision
  • 579fa1bd535c1f6a6866f5136dd7eb645017336e
  • master default
2 results

data_utils.py

Blame
  • data_utils.py 7.17 KiB
    import asyncio
    import json
    import math
    import pprint
    import time
    
    import gtfs
    
    
    def dist(a, b):
        assert 40 < a[0] and a[0] < 60, a[0]
        assert  5 < a[1] and a[1] < 25, a[1]
        assert 40 < b[0] and b[0] < 60, b[0]
        assert  5 < b[1] and b[1] < 25, b[1]
        #           lat                           lon
        return math.sqrt(((a[0]-b[0])*111.2)**2 + ((a[1]-b[1])*71.50)**2)
    
    def shape_indexer(shape, i: float):
        ii = int(i)
        rest = i - ii
        if rest < 0.0001:
            return shape[ii]
        return shape[ii]*(1-rest) + shape[ii+1]*rest
    
    
    lon_muntiplicator = 71.50/111.2
    async def get_data_of_trip(trip_id, date_from, date_to):
        c = await get_communication()
        dts = await c.list_realtime_data(date_from, date_to)
        out = []
        for dt in dts:
            tc = None
            print("GET", dt)
            for fname, data in (await c.get_data(dt)).items():
                proc = await asyncio.create_subprocess_exec("gunzip", stdout=asyncio.subprocess.PIPE, stdin=asyncio.subprocess.PIPE)
                stdout, stderr = await proc.communicate(data)
                data = json.loads(stdout)
                for dato in data["features"]:
                    if dato["properties"]["trip"]["gtfs"]["trip_id"] == trip_id:
                        tc = dato
                break
            out.append((dt, tc))
        return out
    
    class Trip:
        def __init__(self, trip_id, date):
            self.trip_id = trip_id
            self.date = date
    
    class HistoryPoint:
        def __init__(self, json, capture_time):
            self.capture_time = capture_time
            self.json = json
            self.state_position = json['properties']['last_position']['state_position']
            self.openapi_shape_dist_traveled = json['properties']['last_position']['shape_dist_traveled']
            self.lon, self.lat = json["geometry"]["coordinates"]
            self.repeated = 0
    
    class TripHistory:
        def __init__(self, trip):
            self.trip = trip
            self.history = []
            self.trip_json = None
    
        async def load_stops(self):
            self.stops = await gtfs.for_date(self.trip.date).get_stops_for_trip_id(self.trip.trip_id)
    
        async def load_gtfs_shape(self):
            await self.load_stops()
            self.gtfs_shape = await gtfs.for_date(self.trip.date).get_shape_for_trip_id(self.trip.trip_id)
    
    
        async def load_history(self, dt_from, dt_to):
            tps = await get_data_of_trip(self.trip.trip_id, dt_from, dt_to)
    
            for dt, tp in tps:
                self.add_history_point(dt, tp)
    
        def add_history_point(self, dt, json):
            if json is not None:
                if self.trip_json is None:
                    self.trip_json = json["properties"]["trip"]
                if self.trip_json != json["properties"]["trip"]:
                    ...
                        # print("Trip json changed")
                        # pprint.pp(self.trip_json)
                        # print("---------------------")
                        # pprint.pp(json["properties"]["trip"])
                        # print("=====================")
    
                if (
                        len(self.history)
                        and json["geometry"]["coordinates"] == self.history[-1].json["geometry"]["coordinates"]
                        and json["properties"]["last_position"]["origin_timestamp"] == self.history[-1].json["properties"]["last_position"]["origin_timestamp"]
                        and json["properties"]["last_position"]["state_position"] == self.history[-1].json["properties"]["last_position"]["state_position"]
                ):
                    if self.history[-1].json != json and False:
                        print("Same coordinates but different data:")
                        pprint.pp(self.history[-1].json)
                        print("---------------------")
                        pprint.pp(json)
                        print("=====================")
                    self.history[-1].repeated += 1
                else:
                    hp = HistoryPoint(json, dt)
                    if hp.state_position in ['on_track', 'at_stop']:
                        if len(self.history):
                            last_shape_point_id = self.history[-1].shape_point
                            if last_shape_point_id is None:
                                last_shape_point_id = 0 # We are on the begin of the track (last point was in before track state)
                        else:
                            last_shape_point_id = None # We don't know where we are
    
                        last_shape_point = shape_indexer(self.gtfs_shape, last_shape_point_id) if last_shape_point_id is not None else None
    
                        dist_traveled_mutiplicator = 0.01
    
                        def calc_key(i):
                            x1, y1 = self.gtfs_shape[i][0], self.gtfs_shape[i][1] * lon_muntiplicator
                            x2, y2 = self.gtfs_shape[i+1][0], self.gtfs_shape[i+1][1] * lon_muntiplicator
                            x3, y3 = hp.lat, hp.lon * lon_muntiplicator
                            dx, dy = x2-x1, y2-y1
                            det = dx*dx + dy*dy
                            if det < 0.000000000001:
                                a = 0
                            else:
                                a = (dy*(y3-y1)+dx*(x3-x1))/det
                                a = min(max(a, 0), 1)
                            near_pt = shape_indexer(self.gtfs_shape, i+a)
                            return dist((hp.lat, hp.lon), (near_pt[0], near_pt[1])) + (dist_traveled_mutiplicator*abs(near_pt[2] - last_shape_point[2]) if last_shape_point is not None else 0), i+a
    
                        if last_shape_point is None:
                            hp.shape_point = min(
                                    calc_key(i) for i in range(len(self.gtfs_shape)-1)
                            )[1]
                        else:
                            opt, opt_key = None, 10e9
    
                            i = int(last_shape_point_id)
                            while i < len(self.gtfs_shape)-1 and opt_key >= dist_traveled_mutiplicator*abs(self.gtfs_shape[i][2] - last_shape_point[2]):
                                k, v = calc_key(i)
                                if k < opt_key:
                                    opt_key, opt = k, v
                                i += 1
    
                            i = int(last_shape_point_id) - 1
                            while i >= 0 and opt_key >= dist_traveled_mutiplicator*abs(self.gtfs_shape[i+1][2] - last_shape_point[2]):
                                k, v = calc_key(i)
                                if k < opt_key:
                                    opt_key, opt = k, v
                                i -= 1
                            hp.shape_point = opt
    
                        i = hp.shape_point
                        #print(i, last_shape_point, dist((hp.lat, hp.lon), (self.gtfs_shape[i].lat, self.gtfs_shape[i].lon)), self.gtfs_shape[i].dist_traveled - self.gtfs_shape[last_shape_point].dist_traveled if last_shape_point is not None else None)
                        hp.shape_point_dist_traveled = shape_indexer(self.gtfs_shape, i)[2]
                    else:
                        hp.shape_point = hp.shape_point_dist_traveled = None
    
                    self.history.append(hp)
    
            else:
               if len(tps_new):
                   ...
                    # tps_new[-1][2]["without_data"] += 1
    
    
    
    
    
    
    class TripPoint:
        def __init__(self, json, capture_time):
            self.json = json
            self.trip = Trip(json["properties"]["trip"]["gtfs"]["trip_id"], capture_time.date())
            self.capture_time = capture_time