In [7]:
import importlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from utils import data_utils

RUN_FOLDER_KCM = "../results/param_search/kcm/"
RUN_FOLDER_ATB = "../results/param_search/atb/"

[{'n_param_samples': 0,
  'batch_size': 512,
  'hidden_size': 16,
  'num_layers': 2,
  'dropout_rate': 0.4}]

In [None]:
# Hyperparam results from 3 runs per model per network:
# CONV (512, 64, 4, 0.4)
# FF (512, 32, 3, 0.2)
# GRU (512, 16, 2, 0.4)
# TRSF (512, 32, 4, 0.4)

# # KCM
# 512, 16, 2, 0.4 (GRU, TRSF)
# 512, 16, 5, 0.1
# 512 128, 4, 0.4 (CONV, FF)

# Model Epochs TestLoss
# CONV_0	10.000000	18.066327
# FF_0	11.333333	168.559759
# GRU_0	16.000000	18.540478
# TRSF_0	10.000000	22.919503

# CONV_0	10.000000	17.851481
# FF_0	11.333333	157.528422
# GRU_0	10.000000	22.917446
# TRSF_0	10.000000	22.915722

# CONV_0	12.0	15.852690
# FF_0	14.0	153.020960
# GRU_0	10.0	22.916604
# TRSF_0	10.0	22.912308

# AtB
# 512, 32, 3, 0.2 (FF)
# 512, 32, 5, 0.4 (CONV, TRSF)
# 512, 16, 2, 0.4 (GRU)

# CONV_0	12.666667	7.069618
# FF_0	19.333333	63.465285
# GRU_0	17.333333	9.175895
# TRSF_0	16.000000	9.075986

# CONV_0	12.000000	6.991179
# FF_0	18.000000	67.876277
# GRU_0	16.666667	9.161330
# TRSF_0	20.000000	9.001094

# CONV_0	14.000000	7.567914
# FF_0	23.333333	66.901993
# GRU_0	20.000000	9.058873
# TRSF_0	16.000000	9.404305

In [32]:
# Base results
kcm_model_results = data_utils.extract_lightning_results(f"{RUN_FOLDER_KCM}all_logs/logs/", "Seattle")
atb_model_results = data_utils.extract_lightning_results(f"{RUN_FOLDER_ATB}all_logs/logs2/", "Trondheim")
# kcm_model_results = data_utils.load_pkl(RUN_FOLDER_KCM + "model_results.pkl")
# atb_model_results = data_utils.load_pkl(RUN_FOLDER_ATB + "atb/" + "model_results.pkl")
# kcm_result_df, kcm_loss_df, kcm_time_df = data_utils.extract_results("Seattle", kcm_model_results)
# atb_result_df, atb_loss_df, atb_time_df = data_utils.extract_results("Trondheim", atb_model_results)

In [None]:
# Model training and validation curves
kcm_model_curves = kcm_model_results[kcm_model_results["Loss Set"].isin(["Train","Valid"])]
atb_model_curves = atb_model_results[atb_model_results["Loss Set"].isin(["Train","Valid"])]
model_curves_df = pd.concat([kcm_model_curves, atb_model_curves], axis=0)
model_curves_df = model_curves_df.sort_values(["City","Fold","Model","Epoch","Loss Set"])
model_curves_df.head()

In [None]:
# Model final performance on holdout test set of training network
kcm_model_performances = kcm_model_results[kcm_model_results["Loss Set"]=="Test"]
atb_model_performances = atb_model_results[atb_model_results["Loss Set"]=="Test"]
model_performances_df = pd.concat([kcm_model_performances, atb_model_performances], axis=0)
model_performances_df = model_performances_df.sort_values(["City","Fold","Model"])
model_performances_df.head()

In [None]:
# # Include generalization results
# kcm_gen_results = data_utils.load_pkl(RUN_FOLDER_KCM + "kcm/" + "model_generalization_results.pkl")
# atb_gen_results = data_utils.load_pkl(RUN_FOLDER_ATB + "atb/" + "model_generalization_results.pkl")
# kcm_gen_df = data_utils.extract_gen_results("Seattle", kcm_gen_results)
# atb_gen_df = data_utils.extract_gen_results("Trondheim", atb_gen_results)

In [None]:
# # Do not include DeepTTE results:
# performances_df = pd.concat([kcm_result_df, atb_result_df], axis=0).sort_values(['Model','City','Fold'])
# model_curves_df = pd.concat([kcm_loss_df, atb_loss_df], axis=0)
# times_df = pd.concat([kcm_time_df, atb_time_df], axis=0)

# # Include DeepTTE results:
# kcm_deeptte_result_df, kcm_deeptte_loss_df, kcm_deeptte_time_df = data_utils.extract_deeptte_results("Seattle", RUN_FOLDER_KCM + "kcm/deeptte_results/result/")
# atb_deeptte_result_df, atb_deeptte_loss_df, atb_deeptte_time_df = data_utils.extract_deeptte_results("Trondheim", RUN_FOLDER_ATB + "atb/deeptte_results/result/")
# performances_df = pd.concat([kcm_result_df, atb_result_df, kcm_deeptte_result_df, atb_deeptte_result_df], axis=0).sort_values(['Model','City','Fold'])
# model_curves_df = pd.concat([kcm_loss_df, atb_loss_df, kcm_deeptte_loss_df, atb_deeptte_loss_df], axis=0)
# times_df = pd.concat([kcm_deeptte_time_df, atb_deeptte_time_df, kcm_time_df, atb_time_df], axis=0)

In [None]:
# # Do not include DeepTTE generalization results
# gens_df = pd.concat([kcm_gen_df, atb_gen_df], axis=0)

# # Include DeepTTE generalization results
# kcm_deeptte_gen_results = data_utils.extract_deeptte_gen_results("Seattle", RUN_FOLDER_KCM + "kcm/deeptte_results/generalization/")
# atb_deeptte_gen_results = data_utils.extract_deeptte_gen_results("Trondheim", RUN_FOLDER_ATB + "atb/deeptte_results/generalization/")
# gens_df = pd.concat([kcm_deeptte_gen_results, atb_deeptte_gen_results, kcm_gen_df, atb_gen_df]).sort_values(['Fold','Model'])

In [None]:
# # Baselines for comparing between original evaluation and generalization
# kcm_baseline_df = performances_df[performances_df['City']=="Seattle"].copy()
# kcm_baseline_df['Loss'] = "Network Baseline"
# kcm_baseline_df = kcm_baseline_df[['Model','City','Loss','Fold','MAPE','RMSE','MAE']]
# atb_baseline_df = performances_df[performances_df['City']=="Trondheim"].copy()
# atb_baseline_df['Loss'] = "Network Baseline"
# atb_baseline_df = atb_baseline_df[['Model','City','Loss','Fold','MAPE','RMSE','MAE']]

# # Deterioration for comparing change in losses in train network
# kcm_deterioration_df = gens_df[gens_df['City']=="Seattle"]
# kcm_deterioration_df = kcm_deterioration_df[kcm_deterioration_df['Loss'].isin(['Extract_Train_Losses'])]
# atb_deterioration_df = gens_df[gens_df['City']=="Trondheim"]
# atb_deterioration_df = atb_deterioration_df[atb_deterioration_df['Loss'].isin(['Extract_Train_Losses'])]

In [None]:
# print(f"All Unique Models: {pd.unique(performances_df['Model'])}")
# print(f"All NN Models: {pd.unique(model_curves_df['Model'])}")

### Model Performance

In [None]:
# plot_data = performances_df[performances_df['City']=="Seattle"]
# fig, axes = plt.subplots(3,1)
# fig.set_figheight(10)
# fig.set_figwidth(10)
# sns.barplot(plot_data, x="MAPE", y="Model", ax=axes[0])
# axes[0].set_xlim([0, 1.0])
# sns.barplot(plot_data, x="RMSE", y="Model", ax=axes[1])
# axes[1].set_xlim([0, 1200])
# sns.barplot(plot_data, x="MAE", y="Model", ax=axes[2])
# axes[2].set_xlim([0, 800])
# fig.suptitle('KCM Model Performance', fontsize=16)
# fig.tight_layout()
# plt.savefig("../plots/model_performances_kcm.png", dpi=1800, bbox_inches='tight')

plot_data = model_performances_df[model_performances_df['City']=="Seattle"]
fig, axes = plt.subplots(1,1)
fig.set_figheight(10)
fig.set_figwidth(10)
sns.barplot(plot_data, x="Loss", y="Model", ax=axes)
axes.set_xlim([0, 500])
fig.suptitle('KCM Model Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_performances_kcm.png", dpi=1800, bbox_inches='tight')

In [None]:
# plot_data = performances_df[performances_df['City']=="Trondheim"]
# fig, axes = plt.subplots(3,1)
# fig.set_figheight(10)
# fig.set_figwidth(10)
# sns.barplot(plot_data, x="MAPE", y="Model", ax=axes[0])
# axes[0].set_xlim([0, 1.0])
# sns.barplot(plot_data, x="RMSE", y="Model", ax=axes[1])
# axes[1].set_xlim([0, 1200])
# sns.barplot(plot_data, x="MAE", y="Model", ax=axes[2])
# axes[2].set_xlim([0, 800])
# fig.suptitle('AtB Model Performance', fontsize=16)
# fig.tight_layout()
# plt.savefig("../plots/model_performances_atb.png", dpi=1800, bbox_inches='tight')

plot_data = model_performances_df[model_performances_df['City']=="Trondheim"]
fig, axes = plt.subplots(1,1)
fig.set_figheight(10)
fig.set_figwidth(10)
sns.barplot(plot_data, x="Loss", y="Model", ax=axes)
axes.set_xlim([0, 500])
fig.suptitle('AtB Model Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_performances_atb.png", dpi=1800, bbox_inches='tight')

### Training Time

In [None]:
plot_data = times_df[times_df['City']=="Seattle"]
fig, axes = plt.subplots(1,1)
fig.set_figheight(6)
fig.set_figwidth(12)
sns.barplot(plot_data, x="Time", y="Model", hue="Model", dodge=False)
axes.set_ylabel("Model")
axes.set_xlabel("Training Time (s)")
fig.suptitle('Per-Epoch Model Training Time On KCM', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_train_time_kcm.png", dpi=1800, bbox_inches='tight')

In [None]:
plot_data = times_df[times_df['City']=="Trondheim"]
fig, axes = plt.subplots(1,1)
fig.set_figheight(6)
fig.set_figwidth(12)
sns.barplot(plot_data, x="Time", y="Model", hue="Model", dodge=False)
axes.set_ylabel("Model")
axes.set_xlabel("Training Time (s)")
fig.suptitle('Per-Epoch Model Training Time On AtB', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_train_time_atb.png", dpi=1800, bbox_inches='tight')

### Training Curves

In [None]:
model_list = pd.unique(model_curves_df['Model'])
plot_data = model_curves_df[model_curves_df['Model'].isin(model_list[:3])]
g = sns.FacetGrid(data=plot_data, col="City", row="Model", hue="Loss Set", hue_order=["Train","Valid"], height=4, sharex=False, sharey=False)
g.map(sns.lineplot, "Epoch", "Loss")
g.add_legend()
plt.savefig(f"../plots/model_curves_1.png", dpi=1800, bbox_inches='tight')

In [None]:
model_list = pd.unique(model_curves_df['Model'])
plot_data = model_curves_df[model_curves_df['Model'].isin(model_list[3:6])]
g = sns.FacetGrid(data=plot_data, col="City", row="Model", hue="Loss Set", hue_order=["Train","Valid"], height=4, sharex=False, sharey=False)
g.map(sns.lineplot, "Epoch", "Loss")
g.add_legend()
plt.savefig(f"../plots/model_curves_1.png", dpi=1800, bbox_inches='tight')

In [None]:
model_list = pd.unique(model_curves_df['Model'])
plot_data = model_curves_df[model_curves_df['Model'].isin(model_list[6:])]
g = sns.FacetGrid(data=plot_data, col="City", row="Model", hue="Loss Set", hue_order=["Train","Valid"], height=4, sharex=False, sharey=False)
g.map(sns.lineplot, "Epoch", "Loss")
g.add_legend()
plt.savefig(f"../plots/model_curves_1.png", dpi=1800, bbox_inches='tight')

### Generalization (Across Networks)

In [None]:
plot_data = gens_df[gens_df['City']=="Trondheim"]
plot_data = plot_data[plot_data['Loss'].isin(['Test_Losses'])]
plot_data = pd.concat([plot_data, kcm_baseline_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Test_Losses"],["Model Trained on AtB"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(5)
fig.set_figwidth(7)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('KCM Network Generalization Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_generalization_kcm.png", dpi=1800, bbox_inches='tight')

In [None]:
plot_data = gens_df[gens_df['City']=="Seattle"]
plot_data = plot_data[plot_data['Loss'].isin(['Test_Losses'])]
plot_data = pd.concat([plot_data, atb_baseline_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Test_Losses"],["Model Trained on KCM"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(5)
fig.set_figwidth(7)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('AtB Network Generalization Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_generalization_atb.png", dpi=1800, bbox_inches='tight')

### Generalization (Within Network)

In [None]:
plot_data = gens_df[gens_df['City']=="Seattle"]
plot_data = plot_data[plot_data['Loss'].isin(['Holdout_Losses'])]
plot_data = pd.concat([plot_data, kcm_baseline_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Holdout_Losses"],["Holdout Routes"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(5)
fig.set_figwidth(7)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('KCM Network Holdout Route Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_holdout_route_kcm.png", dpi=1800, bbox_inches='tight')

In [None]:
plot_data = gens_df[gens_df['City']=="Trondheim"]
plot_data = plot_data[plot_data['Loss'].isin(['Holdout_Losses'])]
plot_data = pd.concat([plot_data, atb_baseline_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Holdout_Losses"],["Holdout Routes"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(5)
fig.set_figwidth(7)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('AtB Model Holdout Route Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_holdout_route_atb.png", dpi=1800, bbox_inches='tight')

### Fine Tuning/Feature Extraction

In [None]:
plot_data = gens_df[gens_df['City']=="Trondheim"]
plot_data = plot_data[plot_data['Loss'].isin(['Tune_Test_Losses','Test_Losses'])]
plot_data = pd.concat([plot_data, kcm_baseline_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Tune_Test_Losses","Test_Losses"],["Model Trained on AtB + Tuning", "Model Trained on AtB"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(10)
fig.set_figwidth(10)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('KCM Tuned Network Generalization Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_tuning_kcm.png", dpi=1800, bbox_inches='tight')

In [None]:
plot_data = gens_df[gens_df['City']=="Seattle"]
plot_data = plot_data[plot_data['Loss'].isin(['Tune_Test_Losses', 'Test_Losses'])]
plot_data = pd.concat([plot_data, atb_baseline_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Tune_Test_Losses","Test_Losses"],["Model Trained on KCM + Tuning", "Model Trained on KCM"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(10)
fig.set_figwidth(10)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('AtB Tuned Network Generalization Performance', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_tuning_atb.png", dpi=1800, bbox_inches='tight')

In [None]:
plot_data = gens_df[gens_df['City']=="Trondheim"]
plot_data = plot_data[plot_data['Loss'].isin(['Tune_Test_Losses','Test_Losses'])]
plot_data = pd.concat([plot_data, kcm_baseline_df, kcm_deterioration_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Tune_Test_Losses","Test_Losses"],["Model Trained on AtB + Tuning", "Model Trained on AtB"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(10)
fig.set_figwidth(10)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('KCM Network Deterioration', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_deterioration_kcm.png", dpi=1800, bbox_inches='tight')

In [None]:
plot_data = gens_df[gens_df['City']=="Seattle"]
plot_data = plot_data[plot_data['Loss'].isin(['Tune_Test_Losses','Test_Losses'])]
plot_data = pd.concat([plot_data, atb_baseline_df, atb_deterioration_df])
plot_data = plot_data[plot_data['Model'].isin(['AVG','FF','GRU','TRSF'])]
plot_data['Loss'] = plot_data['Loss'].replace(["Tune_Test_Losses","Test_Losses"],["Model Trained on KCM + Tuning", "Model Trained on KCM"])
fig, axes = plt.subplots(1,1)
fig.set_figheight(10)
fig.set_figwidth(10)
sns.barplot(plot_data, x="MAE", y="Model", hue="Loss", dodge=True)
axes.set_xlim([0, 500])
axes.set_ylabel("Model")
axes.set_xlabel("Model Performance (MAE)")
fig.suptitle('AtB Network Deterioration', fontsize=16)
fig.tight_layout()
plt.savefig("../plots/model_deterioration_atb.png", dpi=1800, bbox_inches='tight')