## Scheduled vs, RT total trip time

In [1]:
import _rt_scheduled_utils as rt_scheduled_utils
import _threshold_utils as threshold_utils
import altair as alt

import pandas as pd
from segment_speed_utils.project_vars import RT_SCHED_GCS, SEGMENT_GCS, analysis_date
from shared_utils import calitp_color_palette as cp



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Deliverable 1 -Dataframe

In [4]:
# Final df - saved to GCS as well
final = rt_scheduled_utils.final_df(rt_scheduled_utils.find_metrics(analysis_date), analysis_date)

In [5]:
final.shape

(98105, 21)

In [6]:
# No more negative trips
len(final.loc[final['Actual Trip Duration Minutes'] < 0])

0

### Deliverable 2 - Charts 

In [7]:
rt_scheduled_utils.operator_level_visuals(final)

In [8]:
rt_scheduled_utils.create_statewide_visuals(final)

#### Other Charts

In [20]:
alt.data_transformers.enable('default', max_rows=None)

DataTransformerRegistry.enable('default')

In [21]:
exclude_none = final.loc[final['Gtfs Dataset Name'] != 'None'].reset_index(drop = True)

In [22]:
exclude_none["Ping Category"] = exclude_none.apply(pings, axis=1)

In [23]:
rule = alt.Chart(exclude_none[['Trip Category', 'Rt Data Proportion Percentage']]).mark_rule(color='red', strokeDash=[10, 7]).encode(
    y='mean(Rt Data Proportion Percentage):Q'
)

In [24]:
bar = alt.Chart(exclude_none[['Trip Category', 'Rt Data Proportion Percentage']]).mark_bar().encode(
     x=alt.X('Trip Category:O', axis=alt.Axis(labelAngle = -45)),
    y='mean(Rt Data Proportion Percentage):Q',
    color=alt.Color('Trip Category:O', scale=alt.Scale(range =cp.CALITP_CATEGORY_BRIGHT_COLORS)),
).properties(title = "Average % of Realtime vs. Scheduled Data")

In [25]:
threshold_utils.chart_size((bar+rule), 500,400)

In [26]:
agg2 = (exclude_none
        .groupby(['Ping Category', 'Rt Category'])
        .agg({'Trip Id':'count'})
        .reset_index()
        .rename(columns = {'Trip Id':'Total Trips'})
       )

In [27]:
threshold_utils.chart_size(alt.Chart(agg2).mark_rect().encode(
    x=alt.X('Ping Category:O', axis=alt.Axis(labelAngle = -45)),
    y='Rt Category:O',
    color=alt.Color('Total Trips:Q', scale=alt.Scale(range =cp.CALITP_SEQUENTIAL_COLORS)),
    tooltip = agg2.columns.tolist()).properties(title = "Total Trips by RT vs. Scheduled % and Pings per Minute"),500,400)

In [33]:
agg3 = (exclude_none
        .groupby(['Ping Category', 'Trip Category'])
        .agg({'Rt Data Proportion Percentage':'mean'})
        .rename(columns = {'Rt Data Proportion Percentage':'Average Rt Data Proportion Percentage'})
        .reset_index()
       )

In [38]:
threshold_utils.chart_size(alt.Chart(agg3).mark_rect().encode(
    x=alt.X('Ping Category:O', axis=alt.Axis(labelAngle = -45)),
    y='Trip Category:O',
    color=alt.Color('Average Rt Data Proportion Percentage:Q', scale=alt.Scale(range =cp.CALITP_SEQUENTIAL_COLORS)),
    tooltip = agg3.columns.tolist())
    .properties(title = "Relationship between Trip Duration, Pings per Minute, and Average RT vs. Scheduled %"),500,400)