# Altair Demo

Trying out the charts here before moving to streamlit

In [1]:
import altair as alt
import pandas as pd
from pathlib import Path

In [2]:
fp = Path("../data/tor_labelled.pkl")
# with open(fp, "rb") as f:
#     tor = pickle.load(f)
tor = pd.read_pickle(fp)

In [3]:
fp = Path("../data/det_labelled.pkl")
# with open(fp, "rb") as f:
#     det = pickle.load(f)
det = pd.read_pickle(fp)

## Scatterplot

Scatterplot will be a broad overview of the season's stats:

* FG2A vs FG2M, color=PTS
* FG3A vs FG3M, color=PTS
* FG3A vs FG2A, color=PFD
* AST vs TOV, 
* BLK vs STL, color=PF

In [4]:
tor.columns

Index(['PLAYER_NAME', 'TEAM_ABBREVIATION', 'AGE', 'GP_merge', 'MIN_merge',
       'FG3M_merge', 'FG3A_merge', 'FTM_merge', 'FTA_merge', 'OREB_merge',
       'DREB_merge', 'AST_merge', 'TOV_merge', 'STL_merge', 'BLK_merge',
       'BLKA_merge', 'PF_merge', 'PFD_merge', 'PTS_merge', 'PLUS_MINUS_merge',
       'FG2M_merge', 'FG2A_merge', 'GP_RANK', 'MIN_RANK', 'FG3M_RANK',
       'FG3A_RANK', 'FTM_RANK', 'FTA_RANK', 'OREB_RANK', 'DREB_RANK',
       'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK',
       'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'FG2M_RANK', 'FG2A_RANK',
       'gametime_threshold', 'label_pred', 'label_names'],
      dtype='object')

In [12]:
tor.groupby(by="label_pred").agg("count")

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ABBREVIATION,AGE,GP_merge,MIN_merge,FG3M_merge,FG3A_merge,FTM_merge,FTA_merge,OREB_merge,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,FG2M_RANK,FG2A_RANK,gametime_threshold,label_names
label_pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,121,121,121,121,121,121,121,121,121,121,...,121,121,121,121,121,121,121,121,121,121
1,12,12,12,12,12,12,12,12,12,12,...,12,12,12,12,12,12,12,12,12,12
2,397,397,397,397,397,397,397,397,397,397,...,397,397,397,397,397,397,397,397,397,397


In [13]:
# from example gallery
# https://altair-viz.github.io/gallery/scatter_linked_table.html

src = tor[tor["gametime_threshold"]][tor["label_pred"] == 2]
brush = alt.selection(type="interval")

# scatterplot
points = (
    alt.Chart(src)
    .mark_point()
    .encode(
        x="FG3A_merge:Q",
        y="FG2A_merge:Q",
        color=alt.condition(brush, "PTS_merge:Q", alt.value("grey")),
    )
    .add_selection(brush)
)
# points

# base chart for data tables
ranked_text = (
    alt.Chart(src)
    .mark_text()
    .encode(y=alt.Y("row_number:O", axis=None))
    .transform_window(row_number="row_number()")
    .transform_filter(brush)
    .transform_window(rank="rank(row_number)")
    .transform_filter(alt.datum.rank < 20)
)

# encoding our data table onto the base
player_name = ranked_text.encode(text="PLAYER_NAME:N").properties(title="Name")
team = ranked_text.encode(text="TEAM_ABBREVIATION:N").properties(title="Team")
pts = ranked_text.encode(text="PTS_merge:Q").properties(title="Points")
text = alt.hconcat(player_name, team, pts)

# build chart
alt.hconcat(
    points,
    text,
).resolve_legend(color="independent")

  src = tor[tor["gametime_threshold"]][tor["label_pred"] == 2]


The click and drag selection feature is kind of amazing. Perhaps I can further categorize by the cluster labels?

Use `st.checkbox('label_')` for user to select which labels to plot, and perhaps encode via shape, if multiple labels are selected

## Violinplot

Showcase the distribution of each stat for the two seasons in a shotgun array. Each subplot will have two violins, one for each season.

Let's plot FG2A and FG3A

In [19]:
tor["season"] = tor.apply(lambda x: "2018-19", axis=1)
det["season"] = det.apply(lambda x: "2004-05", axis=1)
src = pd.concat([tor, det], axis=0)
src.sample(10)

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ABBREVIATION,AGE,GP_merge,MIN_merge,FG3M_merge,FG3A_merge,FTM_merge,FTA_merge,OREB_merge,...,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,FG2M_RANK,FG2A_RANK,gametime_threshold,label_pred,label_names,season
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1629057,Robert Williams III,BOS,21.0,27.421053,240.181053,0.0,0.0,1.826316,2.5,3.305263,...,0.207547,0.611321,0.832075,0.74717,0.39434,0.650943,False,0,Derrick Favors-Clint Capela-Myles Turner,2018-19
1709,Michael Olowokandi,MIN,30.0,62.0,1217.636667,0.0,0.0,1.4,2.0,3.1,...,0.127155,0.068966,0.646552,0.517241,0.310345,0.301724,True,0,Derrick Favors-Clint Capela-Myles Turner,2004-05
201565,Derrick Rose,MIN,30.0,51.0,1392.161667,1.4,3.8,3.5,4.1,0.9,...,0.94717,0.34717,0.060377,0.328302,0.045283,0.026415,True,2,Brook Lopez-James Harden-Pascal Siakam,2018-19
1563,Kevin Ollie,PHI,32.0,26.0,160.086667,0.0,0.0,1.3,2.0,0.7,...,0.517241,0.18319,0.939655,0.269397,0.842672,0.760776,False,2,Brook Lopez-James Harden-Pascal Siakam,2004-05
201588,George Hill,MIL,33.0,45.0,999.705,1.466667,4.266667,2.1,2.533333,1.2,...,0.618868,0.49434,0.532075,0.107547,0.537736,0.660377,True,2,Brook Lopez-James Harden-Pascal Siakam,2018-19
468,Jon Barry,HOU,35.0,58.542169,1279.687711,1.76747,4.06747,1.483133,1.666265,0.53253,...,0.885776,0.849138,0.625,0.077586,0.898707,0.928879,True,2,Brook Lopez-James Harden-Pascal Siakam,2004-05
2873,Billy Thomas,NJN,29.0,21.827586,302.211839,1.465517,5.0,0.603448,0.775862,0.603448,...,0.836207,0.702586,0.857759,0.993534,0.952586,0.702586,False,1,Vincent Edwards-Okaro White-Gary Clark,2004-05
1629059,Elie Okobo,PHX,21.0,53.0,958.19,1.5,5.0,1.4,1.8,0.5,...,0.241509,0.756604,0.775472,0.867925,0.69434,0.7,True,2,Brook Lopez-James Harden-Pascal Siakam,2018-19
1628982,Melvin Frazier Jr.,ORL,22.0,7.375,33.233333,0.0,3.0625,1.4375,3.8375,2.4375,...,0.556604,0.4,0.681132,0.028302,0.232075,0.120755,False,0,Derrick Favors-Clint Capela-Myles Turner,2018-19
201163,Wilson Chandler,LAC,32.0,44.627119,1023.934887,1.650847,5.084746,0.884746,1.057627,1.29661,...,0.273585,0.918868,0.886792,0.173585,0.862264,0.89434,True,2,Brook Lopez-James Harden-Pascal Siakam,2018-19


In [24]:
def make_longform(df):
    """Altair prefers longform structures in its grammar"""
    player_bios = [
        "PLAYER_NAME",
        "TEAM_ABBREVIATION",
        "season",
        "label_names",
        "gametime_threshold",
    ]
    longform = df[df["gametime_threshold"]].melt(
        id_vars=player_bios, value_vars=df.drop(player_bios, axis="columns").columns
    )
    return longform


src_long = make_longform(src)
src_long.sample(5)

Unnamed: 0,PLAYER_NAME,TEAM_ABBREVIATION,season,label_names,gametime_threshold,variable,value
2646,Shandon Anderson,MIA,2004-05,Derrick Favors-Clint Capela-Myles Turner,True,FG3M_merge,0.160976
21271,Bob Sura,HOU,2004-05,Brook Lopez-James Harden-Pascal Siakam,True,BLK_RANK,0.883621
13147,Wesley Matthews,IND,2018-19,Brook Lopez-James Harden-Pascal Siakam,True,FG2A_merge,4.850649
1566,Malik Monk,CHA,2018-19,Brook Lopez-James Harden-Pascal Siakam,True,MIN_merge,1258.095
6264,Kyle Korver,UTA,2018-19,Brook Lopez-James Harden-Pascal Siakam,True,AST_merge,1.974359


In [26]:
src_long.shape

(26960, 7)

In [61]:
vars = ["FG2A_merge", "FG3A_merge", "PFD_merge"]
# src_a = tor[tor["gametime_threshold"]]
# src_b = det[det["gametime_threshold"]]


def make_violins(df, var, gametime_threshold: bool = True):
    # use .facet()
    if gametime_threshold:
        df = df.loc[df["gametime_threshold"]]

    hover = alt.selection_single(on="mouseover", nearest=True, empty="none")

    base = (
        alt.Chart(df)
        .transform_density(
            density=var,
            as_=[var, "density"],
            groupby=["season"],  # don't put :N here
        )
        .mark_area(orient="horizontal")
        .encode(
            y=f"{var}:Q",
            color="season:N",
            x=alt.X(
                "density:Q",
                stack="center",
                impute=None,
                title=None,
                axis=alt.Axis(labels=False, values=[0], grid=False, ticks=False),
            ),
            column=alt.Column(
                "season:N",
                header=alt.Header(
                    titleOrient="bottom",
                    labelAnchor="end",
                    labelOrient="bottom",
                    labelAngle=-30,
                    labelPadding=0,
                ),
            ),
        )
        .properties(width=80)
    )

    return base

In [63]:
# alt.data_transformers.disable_max_rows()
# alt.data_transformers.enable('json')
violin_fg3 = make_violins(src, "FG3A_merge")
violin_fg2 = make_violins(src, "FG2A_merge")
# violin_fg3 | violin_fg2
foo = alt.hconcat(violin_fg2, violin_fg3)
bar = foo
foo & bar

In [32]:
src_long[
    (src_long["variable"] == "FG2A_merge") | (src_long["variable"] == "FG3A_merge")
].shape

(1348, 7)