In [None]:
import pickle
import sys
from zoneinfo import ZoneInfo
sys.path.append("../")
from datetime import datetime

from dotenv import load_dotenv
load_dotenv()
import geopandas as gpd
import importlib
import copy
import logging
import contextily as cx
import gtfs_kit as gk
import fastsim as fsim
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import lightning.pytorch as pl
import rasterio as rio
import seaborn as sns
from rasterio.plot import show
import seaborn as sns
import shapely
import statsmodels.api as sm
from torch.utils.data import DataLoader

from openbustools.traveltime.models import rnn
from openbustools import plotting, spatial, standardfeeds
from openbustools.traveltime import data_loader, model_utils
from openbustools.drivecycle import trajectory, busnetwork
from openbustools.drivecycle.physics import conditions, energy, vehicle

### Draw Energy Predictions for Full KCM Network

In [None]:
network_name = "kcm"

res_dir = Path("..","results","energy", network_name)

file = open(res_dir / "trajectories.pkl", "rb")
trajectories = pickle.load(file)
file.close()

file = open(res_dir / "cycles.pkl", "rb")
cycles = pickle.load(file)
file.close()

In [None]:
efficiencies = np.array([1/cycle.electric_kwh_per_mi for cycle in cycles])
fig, ax = plt.subplots()
sns.histplot(efficiencies[efficiencies<5], kde=True, ax=ax)
ax.set_title(f"BEB Shape Efficiency Distribution\n{len(efficiencies)} Routes")
ax.set_xlabel("Efficiency (mi/kWh)")
plt.show()

In [None]:
static_dir = Path("..","data","kcm_static","2023_09_27")
target_day = "2024_01_03"
t_day_of_week = datetime.strptime(target_day, "%Y_%m_%d").weekday()
static_feed = gk.read_feed(static_dir, dist_units="km").restrict_to_dates([str.replace(target_day,'_','')])
active_service_ids = static_feed.calendar[static_feed.calendar.iloc[:,t_day_of_week+1]==1]['service_id'].to_numpy()
trips = static_feed.get_trips()
trips = trips[trips['service_id'].isin(active_service_ids)]
trip_start_end = standardfeeds.get_trip_start_and_end_times(static_feed)
trips = pd.merge(trips, trip_start_end, on='trip_id')
trips

In [None]:
blocks = trips.sort_values(['block_id', 't_min_of_day_first']).groupby('block_id')
blocks = {k:v for k,v in blocks}
blocks[list(blocks.keys())[0]]

In [None]:
traj_block_ids = [traj.traj_attr['block_id'] for traj in trajectories]
traj_block_ids = traj_block_ids.index(list(blocks.keys())[100])
trajectories[traj_block_ids]

In [None]:
block_size = [len(blocks[block]) for block in blocks]
block_size

In [None]:
traj_block_ids = [traj.traj_attr['block_id'] for traj in trajectories]
np.unique(traj_block_ids).shape
# len(trajectories)

### Draw Energy Predictions for All Networks

In [None]:
# Does a higher GTFS-RT sampling frequency improve energy estimates of the model?
# Which blocks are BEBs viable on, and what is the cost in infrastructure of electrifying them?

# How accurately can bus energy consumption be predicted using a combination of route
# geometry, elevation and models trained on standardized open bus data to inform system
# design for transit agencies transitioning to BEBs?
# 2. Given demand estimates from different sources (AVL, GTFS-RT, onboard logger) com-
# bined with a power consumption modeling framework:
# (a) Which blocks are BEBs viable on?
# (b) How do design parameters such as battery and charger sizing affect the decision
# of which blocks to electrify?
# (c) How does the availability of data sources (e.g., static only, static and realtime
# sample) impact energy predictions?

# Calculate operating parameters from GTFS + TT model
# Given GTFS energy, operating profile, design strategy, cost assumptions: Compare energy needs/electrification barriers across wide array of agencies
# Calculate drive cycle/energy use for full GTFS feed
# Growth in GTFS-RT, GTFS-Flex, API sampling resolutions, Simulate/propose change in GTFS-RT sampling frequency
# Where is GPS precision high/low in network? Energy implications?
# Energy models as constraint in blocking software
# BEB abatement curve?

In [None]:
cleaned_sources = pd.read_csv(Path('..', 'data', 'cleaned_sources.csv'))
all_network_trajs = []
all_network_cycles = []
for i, row in cleaned_sources.iterrows():
    print(i)
    network_name = row['uuid']
    res_dir = Path("results","energy", network_name)

    file = open(res_dir / "trajectories.pkl", "rb")
    trajectories = pickle.load(file)
    file.close()
    all_network_trajs.append(trajectories)

    file = open(res_dir / "cycles.pkl", "rb")
    cycles = pickle.load(file)
    file.close()
    all_network_cycles.append(cycles)

In [None]:
df = []
for i, network_cycles in enumerate(all_network_cycles):
    for cycle in network_cycles:
        df.append({"efficiency": 1/cycle.electric_kwh_per_mi, "network": cleaned_sources.iloc[i]['provider'], "num_shapes": len(network_cycles)})
df = pd.DataFrame(df)
df_sorted = df.groupby("network").median().sort_values("efficiency")
df

In [None]:
fig, axes = plt.subplots(1,1, figsize=(18,5))
sns.boxplot(x="network", y="efficiency", data=df[df['efficiency']<1], order=df_sorted.index)
plt.xticks(rotation=90)
plt.xlabel("")
plt.ylabel("Efficiency (mi/kWh)")
plt.show()

In [None]:
# test_network = cleaned_sources[cleaned_sources['provider'] == 'Massachusetts Bay Transportation Authority (MBTA)']
# test_idx = test_network.index[0]
# static_dir = data_dir / "other_feeds" / f"{test_network['uuid'].values[0]}_static"
# latest_static_file = standardfeeds.latest_available_static("2024_02_06", static_dir)
# latest_static_file = static_dir / latest_static_file
# test_network

In [None]:
# test_trajs = all_network_trajs[test_idx]
# test_cycles = all_network_cycles[test_idx]
# test_static = gk.read_feed(latest_static_file, dist_units="km")

In [None]:
# shape_id = [traj.traj_attr['shape_id'] for traj in test_trajs]
# efficiency = [1/cycle.electric_kwh_per_mi for cycle in test_cycles]
# energy_consumed = [np.sum(cycle.ess_kw_out_ach)/3600 for cycle in test_cycles]
# test_df = pd.DataFrame({"shape_id": shape_id, "efficiency": efficiency, "energy_consumed": energy_consumed})
# test_df = pd.merge(test_static.trips, test_df, on="shape_id")[['service_id','route_id','direction_id','trip_id','shape_id','block_id','efficiency','energy_consumed']].dropna()

In [None]:
# block_energy = test_df.groupby('block_id').agg({'efficiency': 'mean', 'energy_consumed': 'sum'}).sort_values('energy_consumed')