# {name} 

## Stop-to-stop segments with speed and delay
* Explore how stop-level metrics look by operator
* Play with different plots to see what's most readable
* Explore this: https://github.com/justinbois/altair-catplot?

In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp_data_analysis.magics
import dask.dataframe as dd
import geopandas as gpd
import pandas as pd

from IPython.display import HTML, Markdown

from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date
from shared_utils import calitp_color_palette as cp

alt.data_transformers.enable('default', max_rows=None)

In [2]:
#parameters cell
name = "Big Blue Bus VehiclePositions"

In [3]:
%%capture_parameters
name

{"name": "Big Blue Bus VehiclePositions"}


In [None]:
#operators = pd.read_parquet(
#    f"./scripts/data/stop_metrics_by_hour_{analysis_date}.parquet",
#    columns = ["_gtfs_dataset_name"]
#).sort_values("_gtfs_dataset_name"
#             ).drop_duplicates()._gtfs_dataset_name.tolist()


#one_operator = "Big Blue Bus VehiclePositions"

In [4]:
def stop_avg_by_peak_off_peak(gdf): 
    """
    Aggregate to peak/offpeak
    """
    # Calculate weighted average
    gdf = gdf.assign(
        speed_multiplied_trips = gdf.speed_mph * gdf.n_trips
    )
    
    agg_df = (
        gdf.groupby(["gtfs_dataset_key", "_gtfs_dataset_name", 
                    "route_id", "direction_id", "route_short_name",
                     "stop_id",
                    "stop_name", "peak"])
        .agg({"speed_multiplied_trips": "sum",
              "n_trips": "sum",
              "stop_sequence": "max" # get max in case there are differences
             })
        .reset_index()
    )
    
    agg_df = agg_df.assign(
        avg_speed_mph = agg_df.speed_multiplied_trips.divide(agg_df.n_trips)
    ).drop(columns = "speed_multiplied_trips")
    
    return agg_df

In [5]:
def subset_for_operator(operator_name: str):
    gdf = gpd.read_parquet(
        f"./scripts/data/stop_metrics_by_hour_{analysis_date}.parquet",
        filters = [[("_gtfs_dataset_name", "==", operator_name)]]
    )
    
    gdf = gdf.assign(
        speed_mph = gdf.speed_mph.round(1),
        peak = gdf.apply(
            lambda x: 
            "peak" if x.time_of_day in ["AM Peak", "PM Peak"]
            else "off peak", axis=1
        )  
    )
    
    peak_df = stop_avg_by_peak_off_peak(gdf)
    peak_df = peak_df.assign(
        avg_speed_mph = peak_df.avg_speed_mph.round(1)
    )
    
    return gdf, peak_df

In [17]:
def get_operator_route_dropdown(df):
    """
    Get altair selection dropdown for routes.
    """
    route_list = df.route_short_name.unique().tolist()
    initialize_first_route = sorted(route_list)[0]
    
    input_dropdown = alt.binding_select(
        options=sorted(route_list), name='Route ')

    select_route = alt.selection_single(
        name="Route", fields=['route_short_name'],
        bind=input_dropdown, init={"route_short_name": initialize_first_route}
    )
    
    return select_route

In [18]:
operator_df, operator_peak_df = subset_for_operator(name)

In [12]:
def boxplot(df, filter_for_dropdown):
    
    chart = (
        alt.Chart(df)
        .mark_boxplot(ticks=True)
        .encode(
            x=alt.X('speed_mph:Q', title="speed"),
            y=alt.Y("stop_name:N", title="Stop"),
            tooltip=["speed_mph", "stop_name", 
                     "stop_id", "peak", 
                     "route_id", "route_short_name"
                    ],
        ).interactive()
    )
     
    #transform_filter won't work with boxplot - known issue    
    #https://github.com/altair-viz/altair/issues/2255
    return chart.transform_filter(filter_for_dropdown)

In [13]:
def boxplot_by_operator(operator_df):
    
    select_operator_route = get_operator_route_dropdown(operator_df)
    
    chart0 = (
        boxplot(
            operator_df[operator_df.direction_id==0])
        .add_selection(select_operator_route)
        .properties(height=700, width=200)
    )           

    chart1 = (
        boxplot(
            operator_df[operator_df.direction_id==1])
        .add_selection(select_operator_route)
        .properties(height=700, width=200)
    )
    
    chart = alt.hconcat(
        chart0.transform_filter(select_operator_route), 
        chart1.transform_filter(select_operator_route)
    ).properties(
        title = "Boxplot Speed Variation for ", 
    ).configure(padding={'top': 10})
    
    
    display(HTML("""
    <style>
    form.vega-bindings {
      position: absolute;
      left: 215px;
      top: 7px;
    }
    </style>
    """))

    display(chart)

In [None]:
#boxplot_by_operator(operator_df)

In [19]:
def strip_tick_plot_by_peak(df):
    chart = (
        alt.Chart(df)
        .mark_tick()
        .encode(
            x=alt.X('speed_mph:Q', title="speed"),
            y=alt.Y("stop_name:N", title="Stop"),
            #column=alt.Column("peak:O"),
            #color=alt.Color("peak:O", 
            #                scale=alt.Scale(
            #                    range=[
            #                        cp.CALITP_CATEGORY_BOLD_COLORS[3],
            #                        cp.CALITP_CATEGORY_BOLD_COLORS[5]
            #                          ]
            #                )
            #               ),
            ).interactive()
    )
        
    return chart

In [20]:
def strip_tick_plot_by_operator(operator_df):
    
    select_operator_route = get_operator_route_dropdown(operator_df)

    chart0 = (
        strip_tick_plot_by_peak(
            operator_df[operator_df.direction_id==0])
        .add_selection(select_operator_route)
        .properties(height=700, width=200)
    )
    
    chart1 = (
        strip_tick_plot_by_peak(
            operator_df[operator_df.direction_id==1])
        .add_selection(select_operator_route)
        .properties(height=700, width=200)
    )
    
    chart = alt.hconcat(
        chart0.transform_filter(select_operator_route), 
        chart1.transform_filter(select_operator_route)
    ).properties(
        title = "Strip Tick Speed Variation for ", 
    ).configure(padding={'top': 10})

    
    display(HTML("""
    <style>
    form.vega-bindings {
      position: absolute;
      left: 225px;
      top: 7px;
    }
    </style>
    """))

    display(chart)

In [21]:
strip_tick_plot_by_operator(operator_df)

In [22]:
def stripplot_base(df: pd.DataFrame) -> alt.Chart:
    chart = (
        alt.Chart(df)
        .encode(
            # horiz jitter 
            y=alt.Y('jitter:Q', title=None,
                axis=alt.Axis(values=[0], ticks=True, 
                              grid=False, labels=False),
                    scale=alt.Scale(), #stack='zero',
            ),
        ).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(-0.5*log(random()))*cos(2*PI*random())'
        ).interactive()
    )

    return chart


def stripplot_by_time_of_day(
    df: pd.DataFrame, 
    x_col: str,
    grouping_col: str
) ->alt.Chart: 
        
    # altair chart can't take geometry
    df = df.drop(columns = ["actual_minus_scheduled_sec", "geometry"])
    
    base = stripplot_base(df)

    chart = (base
        .mark_point(size=10, opacity=0.9, strokeWidth=1.1)
        .encode( 
            x=alt.X(f"{x_col}:Q"),
        color = alt.Color(
            "time_of_day:N", title="Time of Day",
            scale = alt.Scale(range=cp.CALITP_CATEGORY_BRIGHT_COLORS)
        ),
        row = alt.Row(f"{grouping_col}:O", 
                      sort=alt.EncodingSortField(field="stop_sequence", 
                                                order="ascending"),
                      header=alt.Header(labelAngle=0)),
        )
    )
        
    return chart

In [23]:
def speed_stripplot_for_operator(operator_df):
    
    select_operator_route = get_operator_route_dropdown(operator_df)

    speed_chart0 = (stripplot_by_time_of_day(
        operator_df[operator_df.direction_id==0],
        x_col = "speed_mph",
        grouping_col = "stop_sequence" 
    ).encode(tooltip=["_gtfs_dataset_name",
        "stop_name", "speed_mph", 
        "route_id", "direction_id"])
    .add_selection(select_operator_route)
    .properties(
        title=f"Speed Variation Direction 0 ", width=150, height=30)
    )
    
    speed_chart1 = (stripplot_by_time_of_day(
        operator_df[operator_df.direction_id==1],
        x_col = "speed_mph",
        grouping_col = "stop_sequence" 
    ).encode(tooltip=["_gtfs_dataset_name",
        "stop_name", "speed_mph", 
        "route_id", "direction_id"])
    .add_selection(select_operator_route)
    .properties(
        title=f"Speed Variation Direction 1 ", width=150, height=30)
    )
    
    chart = (alt.hconcat(
        speed_chart0.transform_filter(select_operator_route), 
        speed_chart1.transform_filter(select_operator_route))
             .configure_facet(spacing=0)
             .configure_view(stroke=None)
             .configure_axis(labelFontSize=12, titleFontSize=12)
             .configure(padding={'top': 30})
             #https://github.com/altair-viz/altair/issues/1993
            )
    
    display(
        HTML("""
            <style>
            form.vega-bindings {
              position: absolute;
              left: 10px;
              top: 4px;
            }
            </style>
            """
            )
    )
    display(chart)
    #display(chart.transform_filter(select_operator_route))


In [24]:
def offpeak_peak_avg_chart(df): 
    # https://altair-viz.github.io/gallery/ranged_dot_plot.html
    line = (
        alt.Chart(df)
        .mark_line(
            color=cp.CALITP_CATEGORY_BRIGHT_COLORS[4]
        ).encode(
            x=alt.X(
                'avg_speed_mph:Q', title = "Speed (mph)",
                #scale=alt.Scale(domain=[])
                ),
            y=alt.Y("stop_name:N", title="Stop"),
            detail="stop_name:N"
        )
    )
        
    # Add points 
    points = (
        alt.Chart(df)
        .mark_point(size=100, opacity=1, filled=True)
        .encode(
            x=alt.X('avg_speed_mph:Q', title="Speed (mph)"),
            y=alt.Y('stop_name:N', title="Stop"),
            color=alt.Color('peak:O',
                scale=alt.Scale(
                    range=[
                        cp.CALITP_CATEGORY_BRIGHT_COLORS[4], 
                        cp.CALITP_CATEGORY_BOLD_COLORS[0]
                    ]),
                ),
            tooltip=["route_short_name", "stop_name", 
                     "stop_id", "avg_speed_mph", "peak"]
        ).interactive()
    )
    chart = (line + points)
        
    return chart

In [25]:
def avg_peak_off_peak_by_operator(operator_peak_df):
    
    select_operator_route = get_operator_route_dropdown(operator_peak_df)

    chart0 = (
        offpeak_peak_avg_chart(
            operator_peak_df[operator_peak_df.direction_id==0])
        .add_selection(select_operator_route)
        .properties(height=700, width=200)
    )
    
    chart1 = (
        offpeak_peak_avg_chart(
            operator_peak_df[operator_peak_df.direction_id==1])
        .add_selection(select_operator_route)
        .properties(height=700, width=200)
    )
    
    
    
    chart = alt.hconcat(
        chart0.transform_filter(select_operator_route), 
        chart1.transform_filter(select_operator_route)
    ).properties(
        title = "Offpeak vs Peak Speed Variation for ", 
                ).configure(padding={'top': 10})

    
    display(HTML("""
    <style>
    form.vega-bindings {
      position: absolute;
      left: 225px;
      top: 7px;
    }
    </style>
    """))

    display(chart)

In [26]:
avg_peak_off_peak_by_operator(operator_peak_df)

In [None]:
'''
import branca
ZERO_THIRTY_COLORSCALE = branca.colormap.step.RdYlGn_10.scale(vmin=0, vmax=30)

def make_map(gdf, y_col):
    if y_col == "speed_mph":
        cmap= ZERO_THIRTY_COLORSCALE
    elif y_col == "actual_minus_scheduled_min":
        cmap = "viridis"
    
    m = gdf.explore(
        f"{y_col}",
        tiles = "CartoDB Positron",
        cmap = cmap
    )
    
    return m
'''

In [None]:
#for operator in operators:
    #display(Markdown(f"## {operator}"))
    #test_operator = operators[28] # BBB
    #operator_df, operator_peak_df = subset_for_operator(operator)

    #speed_stripplot_for_operator(operator_df)
    #avg_peak_off_peak_by_operator(operator_peak_df)

In [None]:
speed_stripplot_for_operator(operator_df)

In [None]:
#make_map(operator_df, "actual_minus_scheduled_min")

In [None]:
#make_map(operator_df, "speed_mph")