In [None]:
import os
import importlib
import json
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import pyproj
import seaborn as sns
import lightning.pytorch as pl
import numpy as np
from shapely.geometry import Point
from sklearn import metrics
import torch
from torch.utils.data import DataLoader, SequentialSampler
from dotenv import load_dotenv
load_dotenv()

from models import grids
from utils import data_loader, data_utils, model_utils

run_folder = "../results/debug/"
atb_network_folder = "atb/"

In [None]:
with open(f"{run_folder}{atb_network_folder}deeptte_formatted/train_summary_config.json", "r") as f:
    atb_config = json.load(f)
atb_crs = pyproj.crs.CRS.from_epsg(atb_config['epsg'][0])

px.set_mapbox_access_token(os.getenv("MAPBOX_TOKEN"))
default_crs = pyproj.crs.CRS.from_epsg(4326)

fold_num = 4
grid_s_size = 500
model_type="GRU"
skip_gtfs=False
num_workers=0
pin_memory=False

# Define embedded variables for network models
embed_dict = {
    'timeID': {
        'vocab_size': 1440,
        'embed_dims': 27
    },
    'weekID': {
        'vocab_size': 7,
        'embed_dims': 4
    }
}
hyperparameter_dict = {
    'FF': {
        'batch_size': 1024,
        'hidden_size': 128,
        'num_layers': 2,
        'dropout_rate': .2
    },
    'CONV': {
        'batch_size': 1024,
        'hidden_size': 64,
        'num_layers': 3,
        'dropout_rate': .1
    },
    'GRU': {
        'batch_size': 1024,
        'hidden_size': 64,
        'num_layers': 2,
        'dropout_rate': .05
    },
    'TRSF': {
        'batch_size': 1024,
        'hidden_size': 64,
        'num_layers': 3,
        'dropout_rate': .1
    },
    'DEEPTTE': {
        'batch_size': 1024
    }
}

### Inference on Sample of Shingles

In [None]:
# Set up model
atb_base_model_list, atb_nn_model = model_utils.make_one_model(model_type, hyperparameter_dict=hyperparameter_dict, embed_dict=embed_dict, config=atb_config, skip_gtfs=skip_gtfs, load_weights=True, weight_folder=f"{run_folder}{atb_network_folder}models/{model_type}/logs/{model_type}/version_{fold_num}/checkpoints/", fold_num=4)
print(f"Evaluating: {atb_nn_model.model_name}")
# Set up dataset
atb_dataset = data_loader.LoadSliceDataset(f"{run_folder}{atb_network_folder}deeptte_formatted/test", atb_config, skip_gtfs=skip_gtfs)
atb_ngrid = grids.NGridBetter(atb_config['grid_bounds'][0], grid_s_size)
atb_ngrid.add_grid_content(atb_dataset.get_all_samples(keep_cols=['shingle_id','locationtime','x','y','speed_m_s','bearing']), trace_format=True)
atb_ngrid.build_cell_lookup()
atb_dataset.grid = atb_ngrid
atb_dataset.add_grid_features = atb_nn_model.requires_grid
loader = DataLoader(atb_dataset, sampler=SequentialSampler(atb_dataset), collate_fn=atb_nn_model.collate_fn, batch_size=atb_nn_model.batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False)
trainer = pl.Trainer(
    accelerator="cpu",
    logger=False
)
preds_and_labels = trainer.predict(model=atb_nn_model, dataloaders=loader)

In [None]:
# Extract predictions
atb_preds = np.concatenate([x['out'][x['mask']] for x in preds_and_labels])
atb_labels = np.concatenate([x['y'][x['mask']] for x in preds_and_labels])
# Extract data points and connect to predictions
atb_data = [x for x in atb_dataset]
atb_feats = np.concatenate([x['samp'] for x in atb_data])
atb_ys = np.array([x['norm_label'] for x in atb_data])
atb_res = pd.DataFrame(atb_feats, columns=atb_dataset.col_names)
atb_res['preds'] = atb_preds
atb_res['labels'] = atb_labels
# Get geometries and other features for every prediciton point
atb_res['pred_speeds'] = atb_res['dist_calc_km']*1000 / atb_res['preds']
atb_res['label_speeds'] = atb_res['dist_calc_km']*1000 / atb_res['labels']
atb_res['absolute_error'] = abs(atb_res['preds'] - atb_res['labels'])
atb_res['hour'] = atb_res['timeID']//60
points = gpd.points_from_xy(atb_res['lon'], atb_res['lat'], crs="EPSG:4326")
atb_res = gpd.GeoDataFrame(atb_res, geometry=points)
atb_res = atb_res.sample(100000)
# Overall accuracy on data points
print(f"MAE: {metrics.mean_absolute_error(atb_labels,atb_preds)}")
print(f"MAPE: {metrics.mean_absolute_percentage_error(atb_labels,atb_preds)}")
atb_res

In [None]:
# axes = geoplot.pointplot(atb_res, projection=geoplot.crs.AlbersEqualArea(), s=0.1)
# geoplot.kdeplot(atb_res, fill=True, cmap='coolwarm', alpha=0.5, bw_adjust=0.5, ax=axes)
# axes.set_title(f"Point Heatmap(atb)")
# plt.savefig("../plots/model_spatial_performance_atb.png")

# axes = geoplot.pointplot(atb_res, projection=geoplot.crs.AlbersEqualArea(), s=0.1)
# geoplot.kdeplot(atb_res, fill=True, cmap='coolwarm', alpha=0.5, bw_adjust=0.5, ax=axes)
# axes.set_title(f"Point Heatmap (AtB)")
# plt.savefig("../plots/model_spatial_performance_atb.png")

In [None]:
fig = ff.create_hexbin_mapbox(
    data_frame=atb_res,
    lat="lat",
    lon="lon",
    nx_hexagon=30,
    opacity=0.7,
    labels={"color": "SD of Predictions"},
    color="pred_speeds",
    agg_func=np.std,
    color_continuous_scale="Icefire",
    range_color=[0,10]
)
fig.show()
fig.write_image(f"../plots/within_sd_preds_hexbin_atb.png")

In [None]:
fig = ff.create_hexbin_mapbox(
    data_frame=atb_res,
    lat="lat",
    lon="lon",
    nx_hexagon=30,
    opacity=0.7,
    labels={"color": "SD of Labels"},
    color="label_speeds",
    agg_func=np.std,
    color_continuous_scale="Icefire",
    range_color=[0,10]
)
fig.show()
fig.write_image(f"../plots/within_sd_labels_hexbin_atb.png")

In [None]:
fig = ff.create_hexbin_mapbox(
    data_frame=atb_res,
    lat="lat",
    lon="lon",
    nx_hexagon=30,
    opacity=0.7,
    labels={"color": "MAE"},
    color="absolute_error",
    agg_func=np.mean,
    color_continuous_scale="Icefire",
    range_color=[0,100]
)
fig.show()
fig.write_image(f"../plots/within_mean_mae_hexbin_atb.png")

In [None]:
fig, axes = plt.subplots(3,2)
fig.tight_layout()
axes = axes.flatten()
fig.set_figheight(10)
fig.set_figwidth(8)
sns.histplot(atb_res, x="pred_speeds", ax=axes[0])
axes[0].set_title(f"Predicted Speeds(atb)")
axes[0].set_xlabel("Speed (m/s)")
sns.histplot(atb_res, x="label_speeds", ax=axes[2])
axes[2].set_title(f"Label Speeds (atb)")
axes[2].set_xlabel("Speed (m/s)")
sns.histplot(atb_res, x="absolute_error", ax=axes[4])
axes[4].set_title(f"Absolute Error (atb)")
axes[4].set_xlabel("Error (s)")
plt.savefig("../plots/model_prediction_distribution_comparison.png")

In [None]:
fig, axes = plt.subplots(3,2)
fig.tight_layout()
axes = axes.flatten()
fig.set_figheight(10)
fig.set_figwidth(8)
sns.lineplot(atb_res, x="hour", y="pred_speeds", ax=axes[0])
axes[0].set_title(f"Predicted Speeds (atb)")
axes[0].set_xlabel("Hour of Day")
axes[0].set_ylabel("Speed (m/s)")
axes[0].set_xlim(0,24)
axes[0].set_ylim(0,50)
sns.lineplot(atb_res, x="hour", y="label_speeds", ax=axes[2])
axes[2].set_title(f"Label Speeds (atb)")
axes[2].set_xlabel("Hour of Day")
axes[2].set_ylabel("Speed (m/s)")
axes[2].set_xlim(0,24)
axes[2].set_ylim(0,50)
sns.lineplot(atb_res, x="hour", y="absolute_error", ax=axes[4])
axes[4].set_title(f"Mean Absolute Error (atb)")
axes[4].set_xlabel("Hour of Day")
axes[4].set_ylabel("MAE (s)")
axes[4].set_xlim(0,24)
axes[4].set_ylim(0,50)
plt.savefig("../plots/model_hourly_comparison.png")

### Inference on Entire Network

In [None]:
# Create grid of regularly spaced fake shingles to feed model
inference_shingles = data_utils.create_grid_of_shingles(100, atb_config['grid_bounds'][0], atb_config['coord_ref_center'][0])

In [None]:
# Make predictions for the fake shingles
atb_dataset = data_loader.ContentDataset(inference_shingles, atb_config, skip_gtfs=True)
loader = DataLoader(atb_dataset, sampler=SequentialSampler(atb_dataset), collate_fn=atb_nn_model.collate_fn, batch_size=atb_nn_model.batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False)
trainer = pl.Trainer(
    accelerator="cpu",
    logger=False
)
preds_and_labels = trainer.predict(model=atb_nn_model, dataloaders=loader)
atb_preds = np.concatenate([x['out'][x['mask']] for x in preds_and_labels])
atb_labels = np.concatenate([x['y'][x['mask']] for x in preds_and_labels])

In [None]:
# Extract predictions
atb_preds = np.concatenate([x['out'][x['mask']] for x in preds_and_labels])
atb_labels = np.concatenate([x['y'][x['mask']] for x in preds_and_labels])
# Extract data points and connect to predictions
atb_data = [x for x in atb_dataset]
atb_feats = np.concatenate([x['samp'] for x in atb_data])
atb_ys = np.array([x['norm_label'] for x in atb_data])
atb_res = pd.DataFrame(atb_feats, columns=atb_dataset.col_names)
atb_res['preds'] = atb_preds
atb_res['labels'] = atb_labels
# Get geometries and other features for every prediciton point
atb_res['pred_speeds'] = atb_res['dist_calc_km']*1000 / atb_res['preds']
atb_res['label_speeds'] = atb_res['dist_calc_km']*1000 / atb_res['labels']
atb_res['absolute_error'] = abs(atb_res['preds'] - atb_res['labels'])
atb_res['hour'] = atb_res['timeID']//60
points = gpd.points_from_xy(atb_res['lon'], atb_res['lat'], crs="EPSG:4326")
atb_res = gpd.GeoDataFrame(atb_res, geometry=points)
# Transform x and y to replace dummy lat and lon for mapping
transformer = pyproj.Transformer.from_crs(atb_crs, default_crs)
atb_res['lat'], atb_res['lon'] = transformer.transform(atb_res['x'], atb_res['y'])
points = gpd.points_from_xy(atb_res['lon'], atb_res['lat'], crs=default_crs)
atb_res = gpd.GeoDataFrame(atb_res, geometry=points)
atb_res

In [None]:
fig = ff.create_hexbin_mapbox(
    data_frame=atb_res,
    lat="lat",
    lon="lon",
    nx_hexagon=50,
    opacity=0.4,
    labels={"color": "Mean Speed (m/s)"},
    color="pred_speeds",
    agg_func=np.mean,
    color_continuous_scale="Icefire_r",
)
fig.show()
fig.write_image(f"../plots/mesh_mean_pred_speeds_hexbin_atb.png")

In [None]:
fig = ff.create_hexbin_mapbox(
    data_frame=atb_res,
    lat="lat",
    lon="lon",
    nx_hexagon=50,
    opacity=0.4,
    labels={"color": "SD Speed (m/s)"},
    color="pred_speeds",
    agg_func=np.std,
    color_continuous_scale="Icefire_r",
)
fig.show()
fig.write_image(f"../plots/mesh_sd_pred_speeds_hexbin_atb.png")