In [None]:
import os
import pickle
import sys
sys.path.append("../")

import contextily as cx
import importlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from sklearn import metrics
import statsmodels.api as sm


from openbustools import plotting, standardfeeds
from openbustools.traveltime import data_loader, model_utils

In [None]:
kcm_res, kcm_out = model_utils.load_results("../results/kcm/")
atb_res, atb_out = model_utils.load_results("../results/atb/")
mix_res, mix_out = model_utils.load_results("../results/mix/")

In [None]:
kcm_res['run'] = 'kcm'
atb_res['run'] = 'atb'
mix_res['run'] = 'mix'
all_res = pd.concat([kcm_res, atb_res, mix_res])
kcm_out['run'] = 'kcm'
atb_out['run'] = 'atb'
mix_out['run'] = 'mix'
all_out = pd.concat([kcm_out, atb_out, mix_out])
all_res

### Baseline Results

In [None]:
fig, axes = plt.subplots(1,1)
subset = all_res[all_res['metric']=='mape'].copy()
subset = subset[subset['is_tuned']==False]
subset = subset[subset['experiment_name'].isin(['same_city'])]
subset = subset[subset['model_archetype'].isin(['AVGM','FF','GRU'])]
sns.barplot(ax=axes, data=subset[subset['is_tuned']==False], x='value', y='model', hue='run', palette=plotting.PALETTE).set_title('Baseline Models')
fig.tight_layout()
axes.set_xlim(0,.5)

### Tuning Results

In [None]:
fig, axes = plt.subplots(2,1)
axes = axes.flatten()
subset = all_res[all_res['metric']=='mape'].copy()
subset = subset[subset['run']=='kcm']
subset = subset[subset['model'].isin(['AVGM', 'FF', 'FF_TUNED', 'GRU', 'GRU_TUNED'])]
subset.loc[subset['model']=='FF_TUNED', 'model'] = 'FF'
subset.loc[subset['model']=='GRU_TUNED', 'model'] = 'GRU'
sns.barplot(ax=axes[0], data=subset[subset['experiment_name']=='same_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Same City')
sns.barplot(ax=axes[1], data=subset[subset['experiment_name']=='diff_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Different City')
fig.suptitle("Models Trained on KCM")
fig.tight_layout()
axes[0].set_xlim(0,1)
axes[1].set_xlim(0,1)

In [None]:
fig, axes = plt.subplots(2,1)
axes = axes.flatten()
subset = all_res[all_res['metric']=='mape'].copy()
subset = subset[subset['run']=='atb']
subset = subset[subset['model'].isin(['AVGM', 'FF', 'FF_TUNED', 'GRU', 'GRU_TUNED'])]
subset.loc[subset['model']=='FF_TUNED', 'model'] = 'FF'
subset.loc[subset['model']=='GRU_TUNED', 'model'] = 'GRU'
sns.barplot(ax=axes[0], data=subset[subset['experiment_name']=='same_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Same City')
sns.barplot(ax=axes[1], data=subset[subset['experiment_name']=='diff_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Different City')
fig.suptitle("Models Trained on AtB")
fig.tight_layout()
axes[0].set_xlim(0,1)
axes[1].set_xlim(0,1)

In [None]:
fig, axes = plt.subplots(2,1)
axes = axes.flatten()
subset = all_res[all_res['metric']=='mape'].copy()
subset = subset[subset['run']=='mix']
subset = subset[subset['model'].isin(['AVGM', 'FF', 'FF_TUNED', 'GRU', 'GRU_TUNED'])]
subset.loc[subset['model']=='FF_TUNED', 'model'] = 'FF'
subset.loc[subset['model']=='GRU_TUNED', 'model'] = 'GRU'
sns.barplot(ax=axes[0], data=subset[subset['experiment_name']=='same_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Same City')
sns.barplot(ax=axes[1], data=subset[subset['experiment_name']=='diff_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Different City')
fig.suptitle("Models Trained on both KCM and AtB")
fig.tight_layout()
axes[0].set_xlim(0,1)
axes[1].set_xlim(0,1)

### Holdout Results

In [None]:
gtfs = standardfeeds.get_gtfs_shapes_lookup("../data/kcm_gtfs/2023_01_23/")
gtfs_shapes = standardfeeds.get_gtfs_shapes("../data/kcm_gtfs/2023_01_23/", epsg=32148)

fig, axes = plt.subplots(1,2)
fig.set_figheight(5)
fig.set_figwidth(10)
axes = axes.flatten()

gtfs_shapes[(gtfs_shapes['service_id']==gtfs_shapes['service_id'].iloc[0]) & (gtfs_shapes['direction_id']==0)].plot(ax=axes[0])
gtfs_shapes[gtfs_shapes['route_id'].isin(data_loader.HOLDOUT_ROUTES)].plot(column='route_id', ax=axes[0])
cx.add_basemap(ax=axes[0], crs=gtfs_shapes.crs.to_string(), alpha=0.6, source=cx.providers.MapBox(accessToken=os.getenv(key="MAPBOX_TOKEN")))

subset = all_res[all_res['metric']=='mape']
subset = subset[subset['run']=='kcm']
subset = subset[subset['is_tuned']==False]
subset = subset[subset['model_archetype'].isin(['AVGM','GRU'])]
subset = subset[subset['experiment_name'].isin(['same_city','holdout'])]
sns.barplot(subset, x='value', y='model', hue='experiment_name', palette=plotting.PALETTE, ax=axes[1])

fig.suptitle("Holdout Routes KCM")
fig.tight_layout()
axes[1].set_xlim(0,.5)

In [None]:
gtfs = standardfeeds.get_gtfs_shapes_lookup("../data/atb_gtfs/2023_03_12/")
gtfs_shapes = standardfeeds.get_gtfs_shapes("../data/atb_gtfs/2023_03_12/", epsg=32632, stop_dist_filter=10_000)

fig, axes = plt.subplots(1,2)
fig.set_figheight(5)
fig.set_figwidth(10)
axes = axes.flatten()

grid_bounds=[550869,7012847,579944,7039521]
trondheim_routes = gtfs_shapes.cx[grid_bounds[0]:grid_bounds[2], grid_bounds[1]:grid_bounds[3]]
trondheim_routes = trondheim_routes.groupby('route_id').nth(0)
trondheim_routes.plot(ax=axes[0])
holdouts = gtfs_shapes[gtfs_shapes['route_id'].isin(data_loader.HOLDOUT_ROUTES)]
holdouts = holdouts.groupby('route_id').nth(0)
holdouts.plot(column='route_id', ax=axes[0])
cx.add_basemap(ax=axes[0], crs=gtfs_shapes.crs.to_string(), alpha=0.6, source=cx.providers.MapBox(accessToken=os.getenv(key="MAPBOX_TOKEN")))

subset = all_res[all_res['metric']=='mape']
subset = subset[subset['run']=='kcm']
subset = subset[subset['is_tuned']==False]
subset = subset[subset['model_archetype'].isin(['AVGM','GRU'])]
subset = subset[subset['experiment_name'].isin(['same_city','holdout'])]
sns.barplot(subset, x='value', y='model', hue='experiment_name', palette=plotting.PALETTE, ax=axes[1])

fig.suptitle("Holdout Routes AtB")
fig.tight_layout()
axes[1].set_xlim(0,.5)

### Residuals

In [None]:
residuals_model = "GRU"

In [None]:
subset = all_out[all_out['model']==residuals_model]
subset = subset[subset['experiment_name']=='same_city']
subset = subset.sample(10000)
subset

In [None]:
fig, axes = plt.subplots(3,3)
fig.set_figheight(8)
fig.set_figwidth(12)

for i, run_name in enumerate(['kcm','atb','mix']):
    plot_df = subset[subset['run']==run_name]
    sns.residplot(plot_df, ax=axes[i,0], x='labels', y='preds', lowess=True, scatter_kws={'marker': '.'}, line_kws={'color': 'red'})
    sm.qqplot(plot_df['residuals'], ax=axes[i,1], dist=stats.t, distargs=(len(plot_df)-1,), line='45', fit=True)
    sns.histplot(plot_df['residuals'], ax=axes[i,2], bins=100)
    axes[i,0].set_xlim(0,3000)
    axes[i,1].set_ylim(-10,10)
    axes[i,2].set_xlim(-500,500)
    axes[i,0].set_title(run_name)

fig.suptitle(f"Same City Residuals - {residuals_model}")
fig.tight_layout()

In [None]:
subset = all_out[all_out['model']==residuals_model]
subset = subset[subset['experiment_name']=='diff_city']
subset = subset.sample(10000)
subset

In [None]:
fig, axes = plt.subplots(3,3)
fig.set_figheight(8)
fig.set_figwidth(12)

for i, run_name in enumerate(['kcm','atb','mix']):
    plot_df = subset[subset['run']==run_name]
    sns.residplot(plot_df, ax=axes[i,0], x='labels', y='preds', lowess=True, scatter_kws={'marker': '.'}, line_kws={'color': 'red'})
    sm.qqplot(plot_df['residuals'], ax=axes[i,1], dist=stats.t, distargs=(len(plot_df)-1,), line='45', fit=True)
    sns.histplot(plot_df['residuals'], ax=axes[i,2], bins=100)
    axes[i,0].set_xlim(0,3000)
    axes[i,1].set_ylim(-10,10)
    axes[i,2].set_xlim(-500,500)
    axes[i,0].set_title(run_name)

fig.suptitle(f"Different City Residuals - {residuals_model}")
fig.tight_layout()

In [None]:
subset = all_out[all_out['model']==residuals_model]
subset = subset[subset['experiment_name']=='holdout']
subset = subset.sample(10000)
subset

In [None]:
fig, axes = plt.subplots(3,3)
fig.set_figheight(8)
fig.set_figwidth(12)

for i, run_name in enumerate(['kcm','atb','mix']):
    plot_df = subset[subset['run']==run_name]
    sns.residplot(plot_df, ax=axes[i,0], x='labels', y='preds', lowess=True, scatter_kws={'marker': '.'}, line_kws={'color': 'red'})
    sm.qqplot(plot_df['residuals'], ax=axes[i,1], dist=stats.t, distargs=(len(plot_df)-1,), line='45', fit=True)
    sns.histplot(plot_df['residuals'], ax=axes[i,2], bins=100)
    axes[i,0].set_xlim(0,3000)
    axes[i,1].set_ylim(-10,10)
    axes[i,2].set_xlim(-500,500)
    axes[i,0].set_title(run_name)

fig.suptitle(f"Holdout Residuals - {residuals_model}")
fig.tight_layout()