In [1]:
from datetime import datetime
import itertools
import json
import sys

import importlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns
from sklearn import metrics
from torch.utils.data import DataLoader

from utils import data_utils, data_loader, shape_utils
from models import basic_ff

importlib.reload(data_utils)
importlib.reload(basic_ff)
importlib.reload(data_loader)
importlib.reload(shape_utils)

RUN_FOLDER = "../results/debug/"

In [None]:
kcm_inputs = data_utils.load_run_input_data(RUN_FOLDER, "kcm/")
atb_inputs = data_utils.load_run_input_data(RUN_FOLDER, "atb/")

In [None]:
importlib.reload(shape_utils)

kcm_grid = shape_utils.get_grid(kcm_inputs['train_traces'], resolution=32, timestep=120, bbox=[47.420752,-122.440000,47.827248,-122.110950])
# shape_utils.save_grid_anim(kcm_grid, "grid_speeds_kcm.mp4")
print(kcm_grid.shape) # (xsteps, ysteps, tsteps, channels)
print(sys.getsizeof(kcm_grid)*1e-9)

In [None]:
atb_grid = shape_utils.get_grid(atb_inputs['train_traces'], resolution=32, timestep=120, bbox=[63.333176,10.293082,63.464390,10.558090])
# shape_utils.save_grid_anim(kcm_grid, "grid_speeds_atb.mp4")
print(atb_grid.shape) # (xsteps, ysteps, tsteps, channels)
print(sys.getsizeof(atb_grid)*1e-9)

In [None]:
# Looking at just kcm
train_traces, test_traces, config, gtfs_data, tte_train_chunks, tte_test = [kcm_inputs[key] for key in kcm_inputs.keys()]
# Work with a single trip
shingle_data = test_traces[test_traces['shingle_id']==5885]
shingle_data.head()
# Plot overview of the shingle
plot_data = shingle_data
fig, axes = plt.subplots(1,1)
shape_utils.plot_gtfs_trip(axes, plot_data['trip_id'].iloc[0], gtfs_data)
shape_utils.plot_gtfsrt_trip(axes, plot_data)

In [None]:
# Histogram of scheduled travel times
plot_data = kcm_inputs['train_traces']
sns.histplot(plot_data.scheduled_time_s)
plt.title(f"Scheduled Travel Time (s) (KCM) [{np.min(plot_data['scheduled_time_s'])}, {np.max(plot_data['scheduled_time_s'])}]")
plt.xlabel("Travel Time (s)")
plt.savefig("../plots/kcm_scheduled_time_dist.png")

In [None]:
# Histogram of scheduled travel times
plot_data = atb_inputs['train_traces']
sns.histplot(plot_data.scheduled_time_s)
plt.title(f"Scheduled Travel Time (s) (AtB) [{np.min(plot_data['scheduled_time_s'])}, {np.max(plot_data['scheduled_time_s'])}]")
plt.xlabel("Travel Time (s)")
plt.savefig("../plots/atb_scheduled_time_dist.png")

In [None]:
# Histogram of bus travel times in schedule
gtfs_data = kcm_inputs['gtfs_data']
x = gtfs_data[['trip_id','arrival_s']]
y = gtfs_data[['trip_id','arrival_s']].shift()
y.columns = [colname+"_shift" for colname in y.columns]
z = pd.concat([x,y], axis=1)
z = z[z['trip_id']==z['trip_id_shift']]
z['tt'] = z['arrival_s'] - z['arrival_s_shift']
z = z[z['tt']<=250]
z = z.dropna()
sns.histplot(z.tt)
plt.title(f"Stop Arrival Gaps (KCM) [{np.min(z['tt'])}, {np.max(z['tt'])}]")
plt.xlabel("Travel Time (s)")
plt.axvline(30, 0.0, 20000, color="black")
plt.savefig("../plots/kcm_gtfs_arrival_gaps.png")

In [None]:
# Histogram of bus travel times in schedule
gtfs_data = atb_inputs['gtfs_data']

x = gtfs_data[['trip_id','arrival_s']]
y = gtfs_data[['trip_id','arrival_s']].shift()
y.columns = [colname+"_shift" for colname in y.columns]
z = pd.concat([x,y], axis=1)
z = z[z['trip_id']==z['trip_id_shift']]
z['tt'] = z['arrival_s'] - z['arrival_s_shift']
z = z[z['tt']<=250]
z = z.dropna()
sns.histplot(z.tt)
plt.title(f"Stop Arrival Gaps (AtB) [{np.min(z['tt'])}, {np.max(z['tt'])}]")
plt.xlabel("Travel Time (s)")
plt.axvline(30, 0.0, 20000, color="black")
plt.savefig("../plots/atb_gtfs_arrival_gaps.png")

In [None]:
# Shingle distance
metric = kcm_inputs['train_traces'].groupby('shingle_id').sum(numeric_only=True)['dist_calc_m']
sns.histplot(metric)
plt.title(f"Shingle Distances (KCM) [{np.min(metric)}, {np.round(np.max(metric))}]")
plt.xlabel("Travel Dist (m)")
plt.savefig("../plots/kcm_shingle_dists.png")

In [None]:
# Shingle distance
metric = atb_inputs['train_traces'].groupby('shingle_id').sum(numeric_only=True)['dist_calc_m']
sns.histplot(metric)
plt.title(f"Shingle Distances (AtB) [{np.min(metric)}, {np.round(np.max(metric))}]")
plt.xlabel("Travel Dist (m)")
plt.savefig("../plots/atb_shingle_dists.png")

In [None]:
# Shingle travel time
metric = kcm_inputs['train_traces'].groupby('shingle_id').last()['time_cumulative_s']
sns.histplot(metric)
plt.title(f"Shingle Travel Times (KCM) [{np.min(metric)}, {np.round(np.max(metric))}]")
plt.xlabel("Travel Time (s)")
plt.savefig("../plots/kcm_shingle_times.png")

In [None]:
# Shingle travel time
metric = atb_inputs['train_traces'].groupby('shingle_id').last()['time_cumulative_s']
sns.histplot(metric)
plt.title(f"Shingle Travel Times (AtB) [{np.min(metric)}, {np.round(np.max(metric))}]")
plt.xlabel("Travel Time (s)")
plt.savefig("../plots/atb_shingle_times.png")

In [None]:
# Points per trajectory
metric = kcm_inputs['train_traces'].groupby(['shingle_id']).count()['lat']
sns.histplot(metric)
plt.title(f"Observations per Shingle (KCM) [{np.min(metric)}, {np.round(np.max(metric))}]")
plt.xlabel("Observations (n)")
plt.savefig("../plots/kcm_shingle_n.png")

In [None]:
# Points per trajectory
metric = atb_inputs['train_traces'].groupby(['shingle_id']).count()['lat']
sns.histplot(metric)
plt.title(f"Observations per Shingle (AtB) [{np.min(metric)}, {np.round(np.max(metric))}]")
plt.xlabel("Observations (n)")
plt.savefig("../plots/atb_shingle_n.png")