Select Git revision
data_utils.py
Jiří Kalvoda authored
data_utils.py 7.17 KiB
import asyncio
import json
import math
import pprint
import time
import gtfs
def dist(a, b):
assert 40 < a[0] and a[0] < 60, a[0]
assert 5 < a[1] and a[1] < 25, a[1]
assert 40 < b[0] and b[0] < 60, b[0]
assert 5 < b[1] and b[1] < 25, b[1]
# lat lon
return math.sqrt(((a[0]-b[0])*111.2)**2 + ((a[1]-b[1])*71.50)**2)
def shape_indexer(shape, i: float):
ii = int(i)
rest = i - ii
if rest < 0.0001:
return shape[ii]
return shape[ii]*(1-rest) + shape[ii+1]*rest
lon_muntiplicator = 71.50/111.2
async def get_data_of_trip(trip_id, date_from, date_to):
c = await get_communication()
dts = await c.list_realtime_data(date_from, date_to)
out = []
for dt in dts:
tc = None
print("GET", dt)
for fname, data in (await c.get_data(dt)).items():
proc = await asyncio.create_subprocess_exec("gunzip", stdout=asyncio.subprocess.PIPE, stdin=asyncio.subprocess.PIPE)
stdout, stderr = await proc.communicate(data)
data = json.loads(stdout)
for dato in data["features"]:
if dato["properties"]["trip"]["gtfs"]["trip_id"] == trip_id:
tc = dato
break
out.append((dt, tc))
return out
class Trip:
def __init__(self, trip_id, date):
self.trip_id = trip_id
self.date = date
class HistoryPoint:
def __init__(self, json, capture_time):
self.capture_time = capture_time
self.json = json
self.state_position = json['properties']['last_position']['state_position']
self.openapi_shape_dist_traveled = json['properties']['last_position']['shape_dist_traveled']
self.lon, self.lat = json["geometry"]["coordinates"]
self.repeated = 0
class TripHistory:
def __init__(self, trip):
self.trip = trip
self.history = []
self.trip_json = None
async def load_stops(self):
self.stops = await gtfs.for_date(self.trip.date).get_stops_for_trip_id(self.trip.trip_id)
async def load_gtfs_shape(self):
await self.load_stops()
self.gtfs_shape = await gtfs.for_date(self.trip.date).get_shape_for_trip_id(self.trip.trip_id)
async def load_history(self, dt_from, dt_to):
tps = await get_data_of_trip(self.trip.trip_id, dt_from, dt_to)
for dt, tp in tps:
self.add_history_point(dt, tp)
def add_history_point(self, dt, json):
if json is not None:
if self.trip_json is None:
self.trip_json = json["properties"]["trip"]
if self.trip_json != json["properties"]["trip"]:
...
# print("Trip json changed")
# pprint.pp(self.trip_json)
# print("---------------------")
# pprint.pp(json["properties"]["trip"])
# print("=====================")
if (
len(self.history)
and json["geometry"]["coordinates"] == self.history[-1].json["geometry"]["coordinates"]
and json["properties"]["last_position"]["origin_timestamp"] == self.history[-1].json["properties"]["last_position"]["origin_timestamp"]
and json["properties"]["last_position"]["state_position"] == self.history[-1].json["properties"]["last_position"]["state_position"]
):
if self.history[-1].json != json and False:
print("Same coordinates but different data:")
pprint.pp(self.history[-1].json)
print("---------------------")
pprint.pp(json)
print("=====================")
self.history[-1].repeated += 1
else:
hp = HistoryPoint(json, dt)
if hp.state_position in ['on_track', 'at_stop']:
if len(self.history):
last_shape_point_id = self.history[-1].shape_point
if last_shape_point_id is None:
last_shape_point_id = 0 # We are on the begin of the track (last point was in before track state)
else:
last_shape_point_id = None # We don't know where we are
last_shape_point = shape_indexer(self.gtfs_shape, last_shape_point_id) if last_shape_point_id is not None else None
dist_traveled_mutiplicator = 0.01
def calc_key(i):
x1, y1 = self.gtfs_shape[i][0], self.gtfs_shape[i][1] * lon_muntiplicator
x2, y2 = self.gtfs_shape[i+1][0], self.gtfs_shape[i+1][1] * lon_muntiplicator
x3, y3 = hp.lat, hp.lon * lon_muntiplicator
dx, dy = x2-x1, y2-y1
det = dx*dx + dy*dy
if det < 0.000000000001:
a = 0
else:
a = (dy*(y3-y1)+dx*(x3-x1))/det
a = min(max(a, 0), 1)
near_pt = shape_indexer(self.gtfs_shape, i+a)
return dist((hp.lat, hp.lon), (near_pt[0], near_pt[1])) + (dist_traveled_mutiplicator*abs(near_pt[2] - last_shape_point[2]) if last_shape_point is not None else 0), i+a
if last_shape_point is None:
hp.shape_point = min(
calc_key(i) for i in range(len(self.gtfs_shape)-1)
)[1]
else:
opt, opt_key = None, 10e9
i = int(last_shape_point_id)
while i < len(self.gtfs_shape)-1 and opt_key >= dist_traveled_mutiplicator*abs(self.gtfs_shape[i][2] - last_shape_point[2]):
k, v = calc_key(i)
if k < opt_key:
opt_key, opt = k, v
i += 1
i = int(last_shape_point_id) - 1
while i >= 0 and opt_key >= dist_traveled_mutiplicator*abs(self.gtfs_shape[i+1][2] - last_shape_point[2]):
k, v = calc_key(i)
if k < opt_key:
opt_key, opt = k, v
i -= 1
hp.shape_point = opt
i = hp.shape_point
#print(i, last_shape_point, dist((hp.lat, hp.lon), (self.gtfs_shape[i].lat, self.gtfs_shape[i].lon)), self.gtfs_shape[i].dist_traveled - self.gtfs_shape[last_shape_point].dist_traveled if last_shape_point is not None else None)
hp.shape_point_dist_traveled = shape_indexer(self.gtfs_shape, i)[2]
else:
hp.shape_point = hp.shape_point_dist_traveled = None
self.history.append(hp)
else:
if len(tps_new):
...
# tps_new[-1][2]["without_data"] += 1
class TripPoint:
def __init__(self, json, capture_time):
self.json = json
self.trip = Trip(json["properties"]["trip"]["gtfs"]["trip_id"], capture_time.date())
self.capture_time = capture_time