In [None]:
import pickle
import sys
from zoneinfo import ZoneInfo
sys.path.append("../")
from datetime import datetime

from dotenv import load_dotenv
load_dotenv()
import geopandas as gpd
import importlib
import copy
import logging
import contextily as cx
import gtfs_kit as gk
import fastsim as fsim
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import lightning.pytorch as pl
import rasterio as rio
import seaborn as sns
from rasterio.plot import show
import seaborn as sns
from sklearn.cluster import KMeans
import shapely
import statsmodels.api as sm
from torch.utils.data import DataLoader

from openbustools.traveltime.models import rnn
from openbustools import plotting, spatial, standardfeeds
from openbustools.traveltime import data_loader, model_utils
from openbustools.drivecycle import trajectory, busnetwork
from openbustools.drivecycle.physics import conditions, energy, vehicle

### Energy Use for KCM Network

In [None]:
network_name = "kcm"
res_dir = Path("..","results","energy", network_name)
epsg = 32148

file = open(res_dir / "trajectories_updated.pkl", "rb")
trajectories_updated = pickle.load(file)
file.close()

file = open(res_dir / "depot_locations.pkl", "rb")
depot_locations = pickle.load(file)
file.close()

file = open(res_dir / "cycles.pkl", "rb")
cycles = pickle.load(file)
file.close()

file = open(res_dir / "network_energy.pkl", "rb")
network_energy = pickle.load(file)
file.close()

file = open(res_dir / "network_charging.pkl", "rb")
network_charging = pickle.load(file)
file.close()

In [None]:
# Drive cycle reported trip economy
fig, axes = plt.subplots(1,1, figsize=(8,6))
sns.histplot(network_energy['economy_kwh_mi'], kde=True, ax=axes)
axes.axvline(x=np.mean(network_energy['economy_kwh_mi']), linestyle='dashed', color='r')
axes.set_xlabel("Economy (kWh/mi)")
fig.suptitle(f"BEB Trip Economy Distribution\n{len(network_energy)} Trips - (Avg. {np.mean(network_energy['economy_kwh_mi']):.2} kWh/mi)")
fig.tight_layout()
fig.savefig(Path("..", "plots", "kcm_economy_distribution.png"))
plt.show()

In [None]:
block_energy_df = network_energy.groupby('block_id').first()

fig, axes = plt.subplots(1,1, figsize=(8,6))
sns.ecdfplot(block_energy_df['Energy used estimate'], ax=axes)
axes.axvline(x=466, linestyle='dashed', color='r')
axes.text(475, 0.4, "New Flyer XE40", color='black', fontsize=12)
axes.set_xlabel("Block Required Energy (kWh)")
axes.set_ylabel("Proportion of Total Blocks")
fig.suptitle(f"BEB Block Energy Distribution\n{len(block_energy_df)} Blocks - (Avg. {np.mean(block_energy_df['Energy used estimate']):.0f} kWh)")
fig.tight_layout()
fig.savefig(Path("..", "plots", "kcm_energy_distribution.png"))
plt.show()

In [None]:
# Depot locations
block_starts_df = network_energy.groupby('block_id').first()
block_starts_df = gpd.GeoDataFrame(block_starts_df, geometry=gpd.points_from_xy([t.x for t in block_starts_df['start_loc']], [t.y for t in block_starts_df['start_loc']])).set_crs(epsg)
depot_df = gpd.GeoDataFrame(depot_locations, geometry=gpd.points_from_xy(depot_locations['depot_x'], depot_locations['depot_y'])).set_crs(epsg)

fig, axes = plt.subplots(1,1, figsize=(10,10))
block_starts_df.plot(ax=axes, column='depot_id', markersize=10, cmap='tab20')
depot_df.plot(ax=axes, marker='x', markersize=100, color="blue", linewidth=3)
cx.add_basemap(ax=axes, crs=block_starts_df.crs.to_string(), source=cx.providers.CartoDB.Positron)
axes.set_xticks([])
axes.set_yticks([])
fig.suptitle("Clustered Depot Locations")
fig.tight_layout()
plt.show()
fig.savefig(Path("..", "plots", "kcm_depot_locations.png"))

### Validate Block Energy w/KCM Report

In [None]:
# Load the daily summaries from KCM report
summary_data = standardfeeds.clean_parametrix("../data/bebdatafollowup/Viriciti_Energy_Reports-2023.csv")
summary_data = summary_data[summary_data['DateTime'] >= datetime(2023, 12, 1)]
summary_data['realtime_filename'] = summary_data['DateTime'].dt.strftime("%Y_%m_%d")
summary_data = summary_data.groupby(['realtime_filename','vehicle_id','metric']).agg({'value':'mean'}).reset_index().sort_values(['realtime_filename', 'vehicle_id', 'metric'])

# Load the most recent static feed
static_path = Path("..","data","kcm_static","2023_09_27")
static = gk.read_feed(static_path, dist_units='km')

# Load realtime data from all BEB vehicle IDs in the KCM report
realtime_path = Path("..","data","kcm_realtime","processed", "analysis")
beb_ids = summary_data['vehicle_id'].unique()
beb_dates = summary_data['realtime_filename'].unique()
all_realtime_data = []
for d in beb_dates:
    realtime_data = pd.read_pickle(Path(f"../data/kcm_realtime/processed/analysis/{d}.pkl"))
    realtime_data = realtime_data[realtime_data['vehicle_id'].isin(beb_ids)]
    realtime_data['realtime_filename'] = realtime_data['realtime_filename'].str[:-4]
    all_realtime_data.append(realtime_data)
all_realtime_data = pd.concat(all_realtime_data).sort_values(['realtime_filename','vehicle_id','trip_id','locationtime'])

# Map (day, vehicle_id) > trip_ids using the realtime data
trip_id_lookup = all_realtime_data[['realtime_filename','vehicle_id','trip_id']].drop_duplicates().copy()
# Map trip_id > (service_id, block_id) using the static data
block_id_lookup = static.get_trips()[['service_id','block_id','trip_id']].drop_duplicates().copy()
block_id_lookup = pd.merge(trip_id_lookup, block_id_lookup, on='trip_id')
block_id_lookup = block_id_lookup[['realtime_filename','vehicle_id','service_id','block_id']].drop_duplicates().copy()
# Join energy summaries to their block_ids; note there are days where the vehicle was tracked on multiple blocks in the realtime
summary_data = pd.merge(summary_data, block_id_lookup, on=['realtime_filename','vehicle_id'])
# Get comparison metrics for each block
summary_data_means = summary_data.groupby(['block_id','metric'], as_index=False).agg({'value': 'mean'}).pivot(index='block_id', columns='metric', values='value')
summary_data_stds = summary_data.groupby(['block_id','metric'], as_index=False).agg({'value': 'std'}).pivot(index='block_id', columns='metric', values='value').mean()
summary_data_means

In [None]:
# Comparison data from modeled blocks
modeled_energy_data = network_energy.groupby('block_id', as_index=False).agg({
    'Energy charged': 'first',
    'Energy used': 'first',
    'Energy used estimate': 'first',
    'Energy used in service': 'first',
    'Energy used not in service': 'first',
    'Energy idled in service': 'first',
    'Energy idled not in service': 'first',
    'Energy driven': 'first',
    'Energy consumed driving': 'first',
    'Energy regenerated driving': 'first'})
modeled_energy_data['Energy idled'] = modeled_energy_data['Energy idled in service'] + modeled_energy_data['Energy idled not in service']
energy_comparison = pd.merge(summary_data_means, modeled_energy_data, on='block_id', how='inner')

metrics = ['Energy used', 'Energy consumed driving', 'Energy regenerated driving', 'Energy idled']
lims = [[0,800], [0, 800], [0, 150], [0, 150]]
fig, axes = plt.subplots(2, 2, figsize=(10,8))
axes = axes.flatten()
for i, metric in enumerate(metrics):
    sns.scatterplot(data=energy_comparison, x=f"{metric}_x", y=f"{metric}_y", ax=axes[i])
    axes[i].set_title(f"{metric}")
    axes[i].set_xlabel("Reported")
    axes[i].set_ylabel("Modeled")
    axes[i].set_xlim(lims[i])
    axes[i].set_ylim(lims[i])
    axes[i].axline([0,0], [1,1], color='red', linestyle='--', alpha=.8)
    axes[i].axline([0, 0+summary_data_stds[metric]], [1, 1+summary_data_stds[metric]], color='red', alpha=.3)
    axes[i].axline([0, 0-summary_data_stds[metric]], [1, 1-summary_data_stds[metric]], color='red', alpha=.3)
fig.tight_layout()
plt.show()
fig.savefig(Path("..", "plots", "parametrix_compare.png"))

### Charging for KCM Network

In [None]:
# Minimum charging rate to cover block energy
print(np.quantile(network_charging['min_charge_rate'], [.90, .95, .98]))

# fig, axes = plt.subplots(1,1, figsize=(12,6))
# sns.ecdfplot(network_charging['min_charge_rate'], ax=axes)
# axes.set_xlabel("Charging Rate (kW)")
# axes.set_ylabel("Proportion of Total Blocks")
# axes.set_xlim(0, 350)
# fig.suptitle(f"Block Minimum Charging Rate\n{len(block_energy_df)} Blocks")
# fig.tight_layout()
# plt.show()

In [None]:
network_charging.sort_values('t_min_of_day')

In [None]:
t_mins = np.arange(0, 1440+1440*2)
veh_status_df = pd.DataFrame({
    't_min_of_day': t_mins,
    'tot_veh_active': [len(network_charging[(network_charging['t_min_of_day']<=t) & (network_charging['t_min_of_day_end']>=t)]) for t in t_mins],
    'tot_veh_charging_unmanaged': [len(network_charging[(network_charging['t_charge_start_min']<=t) & (network_charging['t_charge_end_min']>=t)]) for t in t_mins],
    'tot_veh_arriving': [len(network_charging[network_charging['t_min_of_day_end']==t]) for t in t_mins],
    'tot_veh_departing': [len(network_charging[network_charging['t_min_of_day']==t]) for t in t_mins],
    'tot_energy_departing': [network_charging[network_charging['t_min_of_day']==t]['Energy used estimate'].sum() for t in t_mins],
    'tot_power_unmanaged': [network_charging[(network_charging['t_charge_start_min']<=t) & (network_charging['t_charge_end_min']>=t)]['plug_power_kw'].sum() for t in t_mins],
})
# Reset time to 0-1440
veh_status_df.loc[veh_status_df['t_min_of_day'] >= 2*1440, 't_min_of_day'] -= 2*1440
veh_status_df.loc[veh_status_df['t_min_of_day'] >= 1440, 't_min_of_day'] -= 1440
veh_status_df = veh_status_df.groupby('t_min_of_day', as_index=False).sum()
veh_status_df.sort_values('t_min_of_day')

In [None]:
fig, axes = plt.subplots(1,1, figsize=(10,5))
axes2 = plt.twinx()

sns.lineplot(veh_status_df, x='t_min_of_day', y='tot_veh_active', ax=axes, color=sns.color_palette()[0], label="Active")
axes.set_ylim(0,1000)
axes.set_xlabel("Time of Day (minutes)")
axes.set_ylabel("Number of Vehicles")
axes.legend().remove()

sns.lineplot(veh_status_df, x='t_min_of_day', y='tot_power_unmanaged', ax=axes2, color=sns.color_palette()[1], label="Power Unmanaged")
axes2.axhline(y=veh_status_df['tot_power_unmanaged'].max(), linestyle='dashed', color='red')
axes2.text(160, veh_status_df['tot_power_unmanaged'].max()-.05*veh_status_df['tot_power_unmanaged'].max(), f"Peak Power Demand: {veh_status_df['tot_power_unmanaged'].max()/1000:.1f} MW", color='black', fontsize=12)
axes2.set_xlabel("Time of Day (minutes)")
axes2.set_ylabel("Power Usage (kW)")
axes2.legend().remove()

fig.suptitle("Vehicle Status by Time of Day")
fig.tight_layout()
plt.show()
fig.savefig(Path("..","plots","kcm_veh_status.png"))

In [None]:
min_bins = np.arange(0, 1440, 15)
veh_status_df['t_min_of_day_bin'] = np.digitize(veh_status_df['t_min_of_day'], min_bins) * 15
plot_df = veh_status_df.groupby('t_min_of_day_bin').sum()

fig, axes = plt.subplots(1,1, figsize=(10,5))
axes2 = plt.twinx()

sns.lineplot(plot_df, x='t_min_of_day_bin', y='tot_veh_arriving', ax=axes, color=sns.color_palette()[0], label="Vehicles Arriving (15min)")
sns.lineplot(plot_df, x='t_min_of_day_bin', y='tot_veh_departing', ax=axes, color=sns.color_palette()[1], label="Vehicles Departing (15min)")
axes.set_xlabel("Time of Day (minutes)")
axes.set_ylabel("Number of Vehicles")

sns.lineplot(plot_df, x='t_min_of_day_bin', y='tot_energy_departing', ax=axes2, color=sns.color_palette()[2], linestyle='dashed', label="Power Unmanaged")
axes2.set_xlabel("Time of Day (minutes)")
axes2.set_ylabel("Departing Energy Needs (kWh)")
axes2.legend().remove()

fig.suptitle("Vehicle Availability and Needs by Time of Day")
fig.tight_layout()
plt.show()
fig.savefig(Path("..","plots","kcm_block_pullout.png"))

### KCM Sensitivity Analysis and Performance Metrics

In [None]:
kcm_sensitivity = pd.read_pickle(Path("..","results","energy","kcm","network_sensitivity.pkl"))

In [None]:
# Compare metrics across sensitivity parameters
fig, axes = plt.subplots(len(kcm_sensitivity['metric'].unique())//2, 2, figsize=(20,15))
axes = axes.flatten()
for i, metric in enumerate(kcm_sensitivity['metric'].unique()):
    sns.boxplot(kcm_sensitivity[kcm_sensitivity['metric']==metric], x='sensitivity_parameter', y='value', ax=axes[i])
    axes[i].set_title(metric)
    axes[i].set_xlabel("")
    axes[i].set_ylabel(metric)
    axes[i].set_title("")
    # Rotate x labels
    for tick in axes[i].get_xticklabels():
        tick.set_rotation(20)
fig.tight_layout()
plt.show()
fig.savefig(Path("..", "plots", "kcm_sensitivity.png"))

### All Networks Sensitivity Analysis and Performance Metrics

In [None]:
# cleaned_sources = pd.read_csv("../data/cleaned_sources.csv")

# all_network_res = []
# for i,row in cleaned_sources.iloc[:1].iterrows():
#     try:
#         network_sensitivity = pd.read_pickle(Path("..","results","energy",row['uuid'],"network_sensitivity.pkl"))
#         all_network_res.append(network_sensitivity)
#     except Exception as e:
#         continue
# all_network_res = pd.concat(all_network_res)

In [None]:
# # Compare metrics across sensitivity parameters
# fig, axes = plt.subplots(len(all_network_res['metric'].unique()), 1, figsize=(10,30))
# for i, metric in enumerate(all_network_res['metric'].unique()):
#     sns.boxplot(all_network_res[all_network_res['metric']==metric], x='sensitivity_parameter', y='value', ax=axes[i])
#     axes[i].set_title(metric)
#     axes[i].set_xlabel("")
#     axes[i].set_ylabel(metric)
#     axes[i].set_title("")
#     # Rotate x labels
#     for tick in axes[i].get_xticklabels():
#         tick.set_rotation(20)
# fig.tight_layout()
# plt.show()