In [None]:
import os
import pickle
import sys
sys.path.append("../")

import contextily as cx
import importlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from sklearn import metrics
import statsmodels.api as sm


from openbustools import plotting, standardfeeds
from openbustools.traveltime import data_loader, model_utils

In [None]:
kcm_res, kcm_out = model_utils.load_results("../results/kcm/")
# atb_res, atb_out = model_utils.load_results("../results/atb/")
# mix_res, mix_out = model_utils.load_results("../results/mix/")

In [None]:
kcm_res[(kcm_res['model']=='GRU') & (kcm_res['experiment_name']=='same_city') & (kcm_res['fold']==0)]

In [None]:
plot_subset = ['AVG','FF','GRU','CONV','TRSF','DEEPTTE']
plot_metric = 'mape'
plot_bounds = [0, 1.0]
residuals_model = "GRU"

In [None]:
# kcm_res['run'] = 'kcm'
# atb_res['run'] = 'atb'
# mix_res['run'] = 'mix'
# all_res = pd.concat([kcm_res, atb_res, mix_res])
# kcm_out['run'] = 'kcm'
# atb_out['run'] = 'atb'
# mix_out['run'] = 'mix'
# all_out = pd.concat([kcm_out, atb_out, mix_out])

# all_res.loc[all_res['model']=='FF_TUNED', 'model'] = 'FF'
# all_res.loc[all_res['model']=='GRU_TUNED', 'model'] = 'GRU'
# all_res.loc[all_res['model']=='CONV_TUNED', 'model'] = 'CONV'
# all_res.loc[all_res['model']=='TRSF_TUNED', 'model'] = 'TRSF'
# all_res.loc[all_res['model']=='DEEPTTE_TUNED', 'model'] = 'DEEPTTE'

# all_res

In [None]:
kcm_res['run'] = 'kcm'
kcm_out['run'] = 'kcm'
all_res = kcm_res
all_out = kcm_out

all_res.loc[all_res['model']=='FF_TUNED', 'model'] = 'FF'
all_res.loc[all_res['model']=='GRU_TUNED', 'model'] = 'GRU'
all_res.loc[all_res['model']=='CONV_TUNED', 'model'] = 'CONV'
all_res.loc[all_res['model']=='TRSF_TUNED', 'model'] = 'TRSF'
all_res.loc[all_res['model']=='DEEPTTE_TUNED', 'model'] = 'DEEPTTE'

all_res

### Baseline Results

In [None]:
fig, axes = plt.subplots(1,1)
subset = all_res[all_res['metric']==plot_metric].copy()
subset = subset[subset['is_tuned']==False]
subset = subset[subset['experiment_name'].isin(['same_city'])]
subset = subset[subset['model_archetype'].isin(plot_subset)]
sns.barplot(ax=axes, data=subset[subset['is_tuned']==False], x='value', y='model', hue='run', palette=plotting.PALETTE).set_title('Baseline Models')
fig.tight_layout()
axes.set_xlim(plot_bounds)

### Tuning Results

In [None]:
fig, axes = plt.subplots(2,1)
axes = axes.flatten()
subset = all_res[all_res['metric']==plot_metric].copy()
subset = subset[subset['run']=='kcm']
subset = subset[subset['model_archetype'].isin(plot_subset)]
sns.barplot(ax=axes[0], data=subset[subset['experiment_name']=='same_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Same City')
sns.barplot(ax=axes[1], data=subset[subset['experiment_name']=='diff_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Different City')
fig.suptitle("Models Trained on KCM")
fig.tight_layout()
axes[0].set_xlim(plot_bounds)
axes[1].set_xlim(plot_bounds)

In [None]:
fig, axes = plt.subplots(2,1)
axes = axes.flatten()
subset = all_res[all_res['metric']==plot_metric].copy()
subset = subset[subset['run']=='atb']
subset = subset[subset['model'].isin(plot_subset)]
sns.barplot(ax=axes[0], data=subset[subset['experiment_name']=='same_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Same City')
sns.barplot(ax=axes[1], data=subset[subset['experiment_name']=='diff_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Different City')
fig.suptitle("Models Trained on AtB")
fig.tight_layout()
axes[0].set_xlim(plot_bounds)
axes[1].set_xlim(plot_bounds)

In [None]:
fig, axes = plt.subplots(2,1)
axes = axes.flatten()
subset = all_res[all_res['metric']==plot_metric].copy()
subset = subset[subset['run']=='mix']
subset = subset[subset['model'].isin(plot_subset)]
sns.barplot(ax=axes[0], data=subset[subset['experiment_name']=='same_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Same City')
sns.barplot(ax=axes[1], data=subset[subset['experiment_name']=='diff_city'], x='value', y='model', hue='is_tuned', palette=plotting.PALETTE).set_title('Different City')
fig.suptitle("Models Trained on both KCM and AtB")
fig.tight_layout()
axes[0].set_xlim(plot_bounds)
axes[1].set_xlim(plot_bounds)

### Holdout Results

In [None]:
gtfs = standardfeeds.get_gtfs_shapes_lookup("../data/kcm_gtfs/2023_01_23/")
gtfs_shapes = standardfeeds.get_gtfs_shapes("../data/kcm_gtfs/2023_01_23/", epsg=32148)

fig, axes = plt.subplots(1,2)
fig.set_figheight(5)
fig.set_figwidth(10)
axes = axes.flatten()

gtfs_shapes.drop_duplicates('route_id').plot(ax=axes[0])
gtfs_shapes[gtfs_shapes['route_id'].isin(data_loader.HOLDOUT_ROUTES)].plot(column='route_id', ax=axes[0])
cx.add_basemap(ax=axes[0], crs=gtfs_shapes.crs.to_string(), alpha=0.6, source=cx.providers.MapBox(accessToken=os.getenv(key="MAPBOX_TOKEN")))

subset = all_res[all_res['metric']==plot_metric]
subset = subset[subset['run']=='kcm']
subset = subset[subset['is_tuned']==False]
subset = subset[subset['model_archetype'].isin(plot_subset)]
subset = subset[subset['experiment_name'].isin(['same_city','holdout'])]
sns.barplot(subset, x='value', y='model', hue='experiment_name', palette=plotting.PALETTE, ax=axes[1])

fig.suptitle("Holdout Routes KCM")
fig.tight_layout()
axes[1].set_xlim(plot_bounds)

### Residuals

In [None]:
fig, axes = plt.subplots(3,3)
fig.set_figheight(8)
fig.set_figwidth(12)

subset = all_out[all_out['model']==residuals_model]
subset = subset[subset['experiment_name']=='same_city']
subset = subset.sample(10000)

for i, run_name in enumerate(['kcm','atb','mix']):
    plot_df = subset[subset['run']==run_name]
    sns.residplot(plot_df, ax=axes[i,0], x='labels', y='preds', lowess=True, scatter_kws={'marker': '.'}, line_kws={'color': 'red'})
    sm.qqplot(plot_df['residuals'], ax=axes[i,1], dist=stats.t, distargs=(len(plot_df)-1,), line='45', fit=True)
    sns.histplot(plot_df['residuals'], ax=axes[i,2], bins=100)
    axes[i,0].set_xlim(0,3000)
    axes[i,1].set_ylim(-10,10)
    axes[i,2].set_xlim(-500,500)
    axes[i,0].set_title(run_name)

fig.suptitle(f"Same City Residuals - {residuals_model}")
fig.tight_layout()

In [None]:
fig, axes = plt.subplots(3,3)
fig.set_figheight(8)
fig.set_figwidth(12)

subset = all_out[all_out['model']==residuals_model]
subset = subset[subset['experiment_name']=='diff_city']
subset = subset.sample(10000)

for i, run_name in enumerate(['kcm','atb','mix']):
    plot_df = subset[subset['run']==run_name]
    sns.residplot(plot_df, ax=axes[i,0], x='labels', y='preds', lowess=True, scatter_kws={'marker': '.'}, line_kws={'color': 'red'})
    sm.qqplot(plot_df['residuals'], ax=axes[i,1], dist=stats.t, distargs=(len(plot_df)-1,), line='45', fit=True)
    sns.histplot(plot_df['residuals'], ax=axes[i,2], bins=100)
    axes[i,0].set_xlim(0,3000)
    axes[i,1].set_ylim(-10,10)
    axes[i,2].set_xlim(-500,500)
    axes[i,0].set_title(run_name)

fig.suptitle(f"Different City Residuals - {residuals_model}")
fig.tight_layout()

In [None]:
fig, axes = plt.subplots(3,3)
fig.set_figheight(8)
fig.set_figwidth(12)

subset = all_out[all_out['model']==residuals_model]
subset = subset[subset['experiment_name']=='holdout']
subset = subset.sample(10000)

for i, run_name in enumerate(['kcm','atb','mix']):
    plot_df = subset[subset['run']==run_name]
    sns.residplot(plot_df, ax=axes[i,0], x='labels', y='preds', lowess=True, scatter_kws={'marker': '.'}, line_kws={'color': 'red'})
    sm.qqplot(plot_df['residuals'], ax=axes[i,1], dist=stats.t, distargs=(len(plot_df)-1,), line='45', fit=True)
    sns.histplot(plot_df['residuals'], ax=axes[i,2], bins=100)
    axes[i,0].set_xlim(0,3000)
    axes[i,1].set_ylim(-10,10)
    axes[i,2].set_xlim(-500,500)
    axes[i,0].set_title(run_name)

fig.suptitle(f"Holdout Residuals - {residuals_model}")
fig.tight_layout()