In [1]:
import sys
sys.path.append("../")

from dotenv import load_dotenv
load_dotenv()
import geopandas as gpd
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import lightning.pytorch as pl
import rasterio as rio
from rasterio.plot import show
import seaborn as sns
import shapely
import statsmodels.api as sm
from torch.utils.data import DataLoader

from openbustools import plotting, spatial, standardfeeds
from openbustools.traveltime import data_loader, model_utils
from openbustools.drivecycle import trajectory
from openbustools.drivecycle.physics import conditions, energy, vehicle

In [2]:
energy_model = energy.DriveCycleEnergyModel(vehicle.TransitBus(), conditions.AmbientConditions())

### Phone vs. Resampled-Predicted

In [75]:
importlib.reload(trajectory)
# Load phone data
data_phone = pd.read_csv("../data/kcm_sensor/8124-33-2022-10-17_20-52-26/Location.csv")
short_name, veh_id = str.split(pd.read_csv("../data/kcm_sensor/8124-33-2022-10-17_20-52-26/Annotation.csv").iloc[0].text,"-")
data_phone['calc_time_s'] = data_phone['seconds_elapsed'] - data_phone['seconds_elapsed'].shift(1)

# Create trajectory then drive cycle
phone_traj = trajectory.Trajectory(data_phone.longitude, data_phone.latitude, 300, 1, [386910,69022], 32148, known_times=data_phone['calc_time_s'].to_numpy()[1:])
phone_cycle = phone_traj.to_drivecycle("../data/kcm_spatial/usgs10m_dem_32148.tif")
cycle_df = phone_cycle.to_df()
cycle_df['F_aero'], cycle_df['F_grav'], cycle_df['F_roll'], cycle_df['F_acc'], cycle_df['F_tot'] = energy_model.calcTotalLoad(phone_cycle, combine=False)
cycle_df['P_motor'], cycle_df['P_regen'], cycle_df['P_aux'], cycle_df['P_tot'] = energy_model.calcTotalPower(phone_cycle, combine=False)
energy_model.printSummary(phone_cycle)
cycle_df

3.935688771017813 kWh
1.5150166882396825 mi
2.597785754816167 Avg. kWh/mi


Unnamed: 0,Distance,Time,Elevation,Velocity,Acceleration,Slope,Theta,F_aero,F_grav,F_roll,F_acc,F_tot,P_motor,P_regen,P_aux,P_tot
0,0.664845,1.000000,37.365097,0.664845,1.330057,0.0,0.0,1.612286,0.0,1187.283398,19332.380329,20521.276013,17418.480837,0.000000,5154.639175,22573.120012
1,1.994903,1.000000,37.365097,1.994902,0.007115,0.0,0.0,14.515927,0.0,1189.351439,103.411451,1307.278817,3329.473378,0.000000,5154.639175,8484.112553
2,2.002018,1.000001,37.365097,2.002017,0.493119,0.0,0.0,14.619652,0.0,1189.368063,7167.489585,8371.477300,21397.131071,0.000000,5154.639175,26551.770246
3,2.495136,1.000000,37.365097,2.495136,-1.184952,0.0,0.0,22.708586,0.0,1190.664460,-17223.277348,-16009.904303,-0.000000,-13867.878275,5154.639175,-8713.239100
4,1.310184,1.000000,37.365097,1.310184,-0.099262,0.0,0.0,6.261328,0.0,1188.028491,-1442.774178,-248.484360,-0.000000,-113.020838,5154.639175,5041.618337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
585,0.959696,1.000028,8.068244,0.959669,-0.593228,0.0,0.0,3.359268,0.0,1187.563383,-8622.575422,-7431.652771,-0.000000,-2475.904871,5154.639175,2678.734304
586,0.366435,1.000025,8.068244,0.366426,0.843616,0.0,0.0,0.489750,0.0,1187.103491,12261.951476,13449.544717,6291.867954,0.000000,5154.639175,11446.507129
587,1.210095,1.000026,8.068244,1.210064,0.608402,0.0,0.0,5.340943,0.0,1187.880982,8843.121115,10036.343041,15504.915761,0.000000,5154.639175,20659.554936
588,1.818506,1.000017,8.068244,1.818476,-1.361013,0.0,0.0,12.061922,0.0,1188.958140,-19782.317814,-18581.297751,-0.000000,-11730.338558,5154.639175,-6575.699383


In [89]:
importlib.reload(trajectory)
# Create resampled trajectory
phone_traj = trajectory.Trajectory(data_phone.longitude, data_phone.latitude, 322, 0, [386910,69022], 32148, resample_len=20)
model = model_utils.load_model('../logs/', 'kcm', 'GRU', 0)
phone_traj.update_predicted_time(model)

# Create trajectory then drive cycle
phone_cycle = phone_traj.to_drivecycle("../data/kcm_spatial/usgs10m_dem_32148.tif")
cycle_df = phone_cycle.to_df()
cycle_df['F_aero'], cycle_df['F_grav'], cycle_df['F_roll'], cycle_df['F_acc'], cycle_df['F_tot'] = energy_model.calcTotalLoad(phone_cycle, combine=False)
cycle_df['P_motor'], cycle_df['P_regen'], cycle_df['P_aux'], cycle_df['P_tot'] = energy_model.calcTotalPower(phone_cycle, combine=False)
energy_model.printSummary(phone_cycle)
cycle_df

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/zack/Desktop/open_bus_tools/obt_venv/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 140.10it/s]
17.99593606683438 kWh
1.275571597182067 mi
14.10813482096196 Avg. kWh/mi


Unnamed: 0,Distance,Time,Elevation,Velocity,Acceleration,Slope,Theta,F_aero,F_grav,F_roll,F_acc,F_tot,P_motor,P_regen,P_aux,P_tot
0,15.344439,671.173584,37.796268,0.022862,-2.9e-05,0.0,0.0,0.001906,0.0,1187.025306,-0.422086,1186.605126,34.634435,0.0,5154.639175,5189.27361
1,2.262998,671.173584,37.796268,0.003372,0.00047,0.008597,0.008596,4.1e-05,1224.48417,1187.025007,6.82613,2418.335348,10.410022,0.0,5154.639175,5165.049197
2,213.820894,671.173584,39.634407,0.318578,-0.000217,0.021966,0.021962,0.370196,3128.112247,1187.084331,-3.158918,4312.407856,1753.964699,0.0,5154.639175,6908.603874
3,115.918572,671.173584,42.180645,0.17271,-0.000108,0.02191,0.021907,0.108802,3120.209953,1187.042438,-1.5654,4305.795793,949.417715,0.0,5154.639175,6104.05689
4,67.403132,671.173584,43.657463,0.100426,0.000106,0.02637,0.026364,0.036787,3754.869078,1187.030896,1.535324,4943.472085,633.815774,0.0,5154.639175,5788.454949
5,114.986442,671.173584,46.689617,0.171321,-5e-06,-0.013864,-0.013864,0.107059,-1974.710253,1187.042158,-0.074932,-787.635968,-0.0,-46.845121,5154.639175,5107.794054
6,112.664109,671.173584,45.127586,0.167861,0.000287,-0.018452,-0.01845,0.102779,-2627.960492,1187.041472,4.170054,-1436.646187,-0.0,-83.719688,5154.639175,5070.919488
7,241.903927,671.173584,40.663891,0.360419,-0.000295,-0.017919,-0.017917,0.473825,-2551.965545,1187.100939,-4.291679,-1368.682461,-0.0,-171.252852,5154.639175,4983.386324
8,108.894653,671.173584,38.712654,0.162245,0.000329,-0.010666,-0.010666,0.096016,-1519.280711,1187.040388,4.785528,-327.358779,-0.0,-18.438379,5154.639175,5136.200796
9,257.209457,671.173584,35.969131,0.383223,-0.000389,0.013449,0.013449,0.53568,1915.591965,1187.110852,-5.649637,3097.58886,1515.519699,0.0,5154.639175,6670.158874


In [36]:
# plotting.formatted_trajectory_lineplot(pred_df)

In [None]:
# GTFS shapes
shape_lookup = standardfeeds.get_gtfs_shapes_lookup(f"../data/kcm_gtfs/{static_date}/")
shapes = standardfeeds.get_gtfs_shapes(f"../data/kcm_gtfs/{static_date}/").to_crs("EPSG:32148")
shapes.plot()

In [None]:
route_ids = pd.unique(data_gtfs[(data_gtfs['route_short_name']==short_name) & (data_gtfs['direction_id']==0)].route_id)
phone_shape = shapes[(shapes['route_id'].isin(route_ids)) & (shapes['direction_id']==0) & (shapes['service_id']==21133)]

In [None]:
# Get one shape to work with
sample_service_id, sample_route_id, sample_direction_id = data_gtfsrt.groupby(['service_id','route_id','direction_id']).count().index[0]
print(sample_service_id, sample_route_id, sample_direction_id)

# GTFS-RT
sample_realtime = data_gtfsrt[(data_gtfsrt['service_id']==sample_service_id) & (data_gtfsrt['route_id']==sample_route_id) & (data_gtfsrt['direction_id']==sample_direction_id)].copy()

# Shape
sample_shape = shapes[(shapes['service_id']==sample_service_id) & (shapes['route_id']==sample_route_id) & (shapes['direction_id']==sample_direction_id)].copy()
sample_shape.plot()

In [None]:
# Get distance along shape
sample_realtime['dist_along_line'] = sample_realtime['geometry'].apply(lambda pt: shapely.line_locate_point(sample_shape.geometry, pt))
# sample_static['dist_along_line'] = sample_static['geometry'].apply(lambda pt: shapely.line_locate_point(sample_shape.geometry, pt))

# Also get a timestamp column on the samples
sample_realtime['t'] = pd.to_datetime(sample_realtime['locationtime'], unit='s')
sample_realtime = sample_realtime.set_index('t')

In [None]:
# ax = sns.lineplot(sample_static, x='dist_along_line', y='calc_speed_m_s', hue='trip_id', legend=False).set_ylim(0,25)

In [None]:
sns.lineplot(sample_realtime, x='dist_along_line', y='calc_speed_m_s', hue='trip_id', legend=False).set_ylim(0,25)

In [None]:
plotting.formatted_basemap_scatterplot(sample_realtime)

In [None]:
plotting.formatted_basemap_scatterplot(sample_shape)

In [None]:
filtered = sm.nonparametric.lowess(endog=sample_realtime['calc_speed_m_s'], exog=sample_realtime['dist_along_line'], frac=0.01, it=1)
axes = sns.lineplot(sample_realtime, x='dist_along_line', y='calc_speed_m_s', hue='trip_id', legend=False)
sns.lineplot(x=filtered[:,0], y=filtered[:,1], ax=axes, color='black')

In [None]:
# Compute the 95% confidence interval
eval_x = np.linspace(0, max(sample_realtime['dist_along_line'].values))
smoothed, bottom, top = plotting.lowess_with_confidence_bounds(sample_realtime['dist_along_line'].values, sample_realtime['calc_speed_m_s'].values, eval_x, lowess_kw={"frac": 0.01})
axes = sns.lineplot(sample_realtime, x='dist_along_line', y='calc_speed_m_s', hue='trip_id', legend=False)
# axes.set_xlim(0,5000)
# axes.set_ylim(0,20)
sns.lineplot(x=eval_x, y=smoothed, color='black', ax=axes)
sns.lineplot(x=eval_x, y=top, color='blue', ax=axes)
sns.lineplot(x=eval_x, y=bottom, color='blue', ax=axes)

In [None]:
data_phone['dist_along_line'] = data_phone['geometry'].apply(lambda pt: shapely.line_locate_point(phone_shape.geometry, pt))


In [None]:
fig, axes = plotting.formatted_basemap_scatterplot(data_phone)
phone_shape.plot(ax=axes, color='black')

In [None]:
sns.lineplot(data_phone, x='dist_along_line', y='calc_speed_m_s')

### Trip GPS Points

In [None]:
data_phone = pd.read_csv("../data/kcm_sensor/8124-33-2022-10-17_20-52-26/Location.csv")
short_name, veh_id = str.split(pd.read_csv("../data/kcm_sensor/8124-33-2022-10-17_20-52-26/Annotation.csv").iloc[0].text,"-")
data_phone = gpd.GeoDataFrame(data_phone, geometry=gpd.points_from_xy(data_phone.longitude, data_phone.latitude), crs="EPSG:4326").to_crs("EPSG:32148")
_, _, data_phone['calc_speed_m_s'] = spatial.calculate_speed(data_phone, 'seconds_elapsed')
plotting.formatted_basemap_scatterplot(data_phone, "Phone Basemap")

In [None]:
data_gtfs = data_utils.merge_gtfs_files("../data/kcm_gtfs/2023_05_14/", epsg="32148", coord_ref_center=[386910,69022])
gtfs_calendar = pd.read_csv("../data/kcm_gtfs/2023_05_14/calendar.txt")
filtered_gtfs, remaining_trip_ids = data_utils.filter_gtfs_w_phone(data_phone, data_gtfs, short_name, gtfs_calendar)
filtered_gtfs = gpd.GeoDataFrame(filtered_gtfs, geometry=gpd.points_from_xy(filtered_gtfs.stop_lon, filtered_gtfs.stop_lat), crs="EPSG:4326").to_crs("EPSG:32148")
filtered_gtfs['seconds_elapsed'] = filtered_gtfs['arrival_s'] - filtered_gtfs['arrival_s'].iloc[0]
_, _, filtered_gtfs['calc_speed_m_s'] = spatial.calculate_speed(filtered_gtfs, 'seconds_elapsed')
plotting.formatted_basemap_scatterplot(filtered_gtfs, "GTFS Basemap")

In [None]:
data_gtfsrt = pd.read_pickle("../data/kcm_realtime/2023_05_20.pkl")
data_gtfsrt['locationtime'] = pd.to_numeric(data_gtfsrt['locationtime'])
filtered_gtfsrt_daily = data_gtfsrt[data_gtfsrt['trip_id'].isin(remaining_trip_ids)]
filtered_gtfsrt_one = filtered_gtfsrt_daily[filtered_gtfsrt_daily['trip_id']==remaining_trip_ids[0]]
filtered_gtfsrt_one = gpd.GeoDataFrame(filtered_gtfsrt_one, geometry=gpd.points_from_xy(filtered_gtfsrt_one.lon, filtered_gtfsrt_one.lat), crs="EPSG:4326").to_crs("EPSG:32148")
filtered_gtfsrt_one['seconds_elapsed'] = filtered_gtfsrt_one['locationtime'] - filtered_gtfsrt_one['locationtime'].iloc[0]
_, _, filtered_gtfsrt_one['calc_speed_m_s'] = spatial.calculate_speed(filtered_gtfsrt_one, 'locationtime')
plotting.formatted_basemap_scatterplot(filtered_gtfsrt_one, "GTFS-RT Basemap One Trip")

In [None]:
filtered_gtfsrt_daily = gpd.GeoDataFrame(filtered_gtfsrt_daily, geometry=gpd.points_from_xy(filtered_gtfsrt_daily.lon, filtered_gtfsrt_daily.lat), crs="EPSG:4326").to_crs("EPSG:32148")
filtered_gtfsrt_daily['seconds_elapsed'] = filtered_gtfsrt_daily['locationtime'] - filtered_gtfsrt_daily['locationtime'].iloc[0]
_, _, filtered_gtfsrt_daily['calc_speed_m_s'] = spatial.calculate_speed(filtered_gtfsrt_daily, 'locationtime')
plotting.formatted_basemap_scatterplot(filtered_gtfsrt_daily, "GTFS-RT Basemap Daily Trips")

### Drive Cycles

In [None]:
data_melt_phone = pd.melt(data_phone, id_vars=['seconds_elapsed'], value_vars=['speed','calc_speed_m_s','altitudeAboveMeanSeaLevel'])
plotting.formatted_rel_lineplot(data_melt_phone, x_var='seconds_elapsed', y_var='value', rel_var='variable', title_text='Phone Speed')

In [None]:
filtered_melt_gtfs = pd.melt(filtered_gtfs, id_vars=['seconds_elapsed'], value_vars=['calc_speed_m_s'])
plotting.formatted_rel_lineplot(filtered_melt_gtfs, x_var='seconds_elapsed', y_var='value', rel_var='variable', title_text='GTFS Speed', xlim=(0,2000), ylim=(0,35))

In [None]:
filtered_melt_gtfsrt_one = pd.melt(filtered_gtfsrt_one, id_vars=['seconds_elapsed'], value_vars=['calc_speed_m_s'])
plotting.formatted_rel_lineplot(filtered_melt_gtfsrt_one, x_var='seconds_elapsed', y_var='value', rel_var='variable', title_text='GTFS-RT Drive Cycle One Trip', xlim=(0,2000), ylim=(0,35))

In [None]:
filtered_melt_gtfsrt_daily = pd.melt(filtered_gtfsrt_daily, id_vars=['seconds_elapsed'], value_vars=['calc_speed_m_s'])
plotting.formatted_rel_lineplot(filtered_melt_gtfsrt_daily, x_var='seconds_elapsed', y_var='value', rel_var='variable', title_text='GTFS-RT Drive Cycle Daily Trips')