# Altair Demo

Trying out the charts here before moving to streamlit

In [2]:
import altair as alt
import pandas as pd
from pathlib import Path

In [3]:
fp = Path("../data/leaguedash_labelled_2018-19.pkl")
# with open(fp, "rb") as f:
#     tor = pickle.load(f)
tor = pd.read_pickle(fp)

In [4]:
fp = Path("../data/leaguedash_labelled_2004-05.pkl")
# with open(fp, "rb") as f:
#     det = pickle.load(f)
det = pd.read_pickle(fp)

## Scatterplot

Scatterplot will be a broad overview of the season's stats:

* FG2A vs FG2M, color=PTS
* FG3A vs FG3M, color=PTS
* FG3A vs FG2A, color=PFD
* AST vs TOV, 
* BLK vs STL, color=PF

In [4]:
tor.columns

Index(['PLAYER_NAME', 'TEAM_ABBREVIATION', 'AGE', 'GP_merge', 'MIN_merge',
       'FG3M_merge', 'FG3A_merge', 'FTM_merge', 'FTA_merge', 'OREB_merge',
       'DREB_merge', 'AST_merge', 'TOV_merge', 'STL_merge', 'BLK_merge',
       'BLKA_merge', 'PF_merge', 'PFD_merge', 'PTS_merge', 'PLUS_MINUS_merge',
       'FG2M_merge', 'FG2A_merge', 'GP_RANK', 'MIN_RANK', 'FG3M_RANK',
       'FG3A_RANK', 'FTM_RANK', 'FTA_RANK', 'OREB_RANK', 'DREB_RANK',
       'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK',
       'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'FG2M_RANK', 'FG2A_RANK',
       'gametime_threshold', 'label_pred', 'label_names'],
      dtype='object')

In [5]:
tor.groupby(by="label_pred").agg("count")

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ABBREVIATION,AGE,GP_merge,MIN_merge,FG3M_merge,FG3A_merge,FTM_merge,FTA_merge,OREB_merge,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,FG2M_RANK,FG2A_RANK,gametime_threshold,label_names
label_pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,135,135,135,135,135,135,135,135,135,135,...,135,135,135,135,135,135,135,135,135,135
1,253,253,253,253,253,253,253,253,253,253,...,253,253,253,253,253,253,253,253,253,253
2,142,142,142,142,142,142,142,142,142,142,...,142,142,142,142,142,142,142,142,142,142


In [5]:
# from example gallery
# https://altair-viz.github.io/gallery/scatter_linked_table.html

src = tor[tor["gametime_threshold"]][tor["label_pred"] == 2]
brush = alt.selection(type="interval")

# scatterplot
points = (
    alt.Chart(src)
    .mark_point()
    .encode(
        x="FG3A_merge:Q",
        y="FG2A_merge:Q",
        color=alt.condition(brush, "PTS_merge:Q", alt.value("grey")),
    )
    .add_selection(brush)
)
# points

# base chart for data tables
ranked_text = (
    alt.Chart(src)
    .mark_text()
    .encode(y=alt.Y("row_number:O", axis=None))
    .transform_window(row_number="row_number()")
    .transform_filter(brush)
    .transform_window(rank="rank(row_number)")
    .transform_filter(alt.datum.rank < 20)
)

# encoding our data table onto the base
player_name = ranked_text.encode(text="PLAYER_NAME:N").properties(title="Name")
team = ranked_text.encode(text="TEAM_ABBREVIATION:N").properties(title="Team")
pts = ranked_text.encode(text="PTS_merge:Q").properties(title="Points")
text = alt.hconcat(player_name, team, pts)

# build chart
alt.hconcat(
    points,
    text,
).resolve_legend(color="independent")

  src = tor[tor["gametime_threshold"]][tor["label_pred"] == 2]


The click and drag selection feature is kind of amazing. Perhaps I can further categorize by the cluster labels?

Use `st.checkbox('label_')` for user to select which labels to plot, and perhaps encode via shape, if multiple labels are selected

## Violinplot

Showcase the distribution of each stat for the two seasons in a shotgun array. Each subplot will have two violins, one for each season.

Let's plot FG2A and FG3A

In [6]:
tor["season"] = tor.apply(lambda x: "2018-19", axis=1)
det["season"] = det.apply(lambda x: "2004-05", axis=1)
src = pd.concat([tor, det], axis=0)
src.sample(10)

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ABBREVIATION,AGE,GP_merge,MIN_merge,FG3M_merge,FG3A_merge,FTM_merge,FTA_merge,OREB_merge,...,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,FG2M_RANK,FG2A_RANK,gametime_threshold,label_pred,label_names,season
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1629003,Shake Milton,PHI,22.0,20.0,268.225,1.9,5.9,0.7,0.9,1.2,...,0.281132,0.94717,0.741509,0.409434,0.720755,0.709434,False,1,Cory Joseph-Eric Gordon-Jae Crowder,2018-19
2839,James Thomas,ATL,24.0,11.0,113.181667,0.0,0.0,0.6,1.9,5.4,...,0.387931,0.793103,0.846983,0.971983,0.538793,0.80819,False,2,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,2004-05
2038,Joel Przybilla,POR,25.0,76.0,1857.93,0.0,0.0,1.7,3.4,3.4,...,0.265086,0.857759,0.75431,0.68319,0.489224,0.799569,True,2,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,2004-05
2544,LeBron James,LAL,34.0,55.0,1937.401667,2.1,6.1,5.2,7.8,1.1,...,0.933962,0.037736,0.015094,0.239623,0.033962,0.049057,True,0,Damian Lillard-Khris Middleton-Paul George,2018-19
445,Wesley Person,DEN,34.0,34.959184,569.989286,2.167347,4.897959,0.418367,0.585714,0.281633,...,0.989224,0.357759,0.37931,0.515086,0.670259,0.775862,False,1,Bruce Bowen-Jeff McInnis-Shane Battier,2004-05
467,Jason Kidd,NJN,32.0,59.297297,2204.594324,1.932432,5.364865,2.002703,2.735135,1.464865,...,0.971983,0.769397,0.346983,0.206897,0.715517,0.69181,True,0,Dirk Nowitzki-Kevin Garnett-Shawn Marion,2004-05
1626169,Stanley Johnson,NOP,23.0,66.0,1207.37,1.8,6.4,1.5,1.9,1.0,...,0.433962,0.70566,0.54717,0.803774,0.592453,0.611321,True,1,Cory Joseph-Eric Gordon-Jae Crowder,2018-19
1628415,Dillon Brooks,MEM,23.0,18.0,329.895,1.6,4.4,2.4,3.3,1.0,...,0.073585,0.503774,0.454717,0.530189,0.498113,0.377358,False,1,Cory Joseph-Eric Gordon-Jae Crowder,2018-19
2397,Yao Ming,HOU,24.0,69.12766,2126.235355,0.0,0.0,5.819149,7.538298,3.104255,...,0.295259,0.353448,0.032328,0.282328,0.012931,0.05819,True,2,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,2004-05
2072,Michael Redd,MIL,25.0,75.0,2852.163333,1.3,3.7,4.7,5.5,0.9,...,0.956897,0.084052,0.034483,0.594828,0.0625,0.056034,True,0,Dirk Nowitzki-Kevin Garnett-Shawn Marion,2004-05


In [8]:
def make_longform(df):
    """Altair prefers longform structures in its grammar"""
    player_bios = [
        "PLAYER_NAME",
        "TEAM_ABBREVIATION",
        "season",
        "label_names",
        "gametime_threshold",
    ]
    longform = df[df["gametime_threshold"]].melt(
        id_vars=player_bios,
        value_vars=df.drop(player_bios, axis="columns").columns,
        var_name="variable",
        value_name="value",
        ignore_index=False,
    )
    longform = longform.set_index("season", append=True)
    # longform.set_index()
    return longform


src_long = make_longform(src)
src_long.sample(5)

# src_long.set_index('season', append=True).xs("2018-19", level="season").head()
# src_long.set_index('season', append=True).loc[(slice(None) ,"2018-19"),:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,PLAYER_NAME,TEAM_ABBREVIATION,label_names,gametime_threshold,variable,value
PLAYER_ID,season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1938,2004-05,Manu Ginobili,SAS,Dirk Nowitzki-Kevin Garnett-Shawn Marion,True,PF_merge,3.138333
1629006,2018-19,Josh Okogie,MIN,Cory Joseph-Eric Gordon-Jae Crowder,True,PF_merge,3.4
2736,2004-05,Luol Deng,CHI,Dirk Nowitzki-Kevin Garnett-Shawn Marion,True,BLKA_RANK,0.247845
1628995,2018-19,Kevin Knox II,NYK,Damian Lillard-Khris Middleton-Paul George,True,TOV_RANK,0.422642
1905,2004-05,Andrei Kirilenko,UTA,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,True,PF_RANK,0.814655


In [9]:
vars = ["FG2A_merge", "FG3A_merge", "PFD_merge"]
# src_a = tor[tor["gametime_threshold"]]
# src_b = det[det["gametime_threshold"]]


def make_violins(df, var, gametime_threshold: bool = True):
    """Make violin charts of var stat grouped by seasons

    Parameters:
    -----------

    df: dataframe, wide-format

    var: str
        column name for which the values will be density transformed

    Returns:
    ---------

    violin: alt.Chart() object
    """
    # use .facet()
    if gametime_threshold:
        df = df.loc[df["gametime_threshold"]]

    # hover = alt.selection_single(on="mouseover", nearest=True, empty="none")

    base = (
        alt.Chart(df)
        .transform_density(
            density=var,
            as_=[var, "density"],
            groupby=["season"],  # don't put :N here
        )
        .mark_area(orient="horizontal")
        .encode(
            y=f"{var}:Q",
            color="season:N",
            x=alt.X(
                "density:Q",
                stack="center",
                impute=None,
                title=None,
                axis=alt.Axis(labels=False, values=[0], grid=False, ticks=False),
            ),
            column=alt.Column(
                "season:N",
                header=alt.Header(
                    titleOrient="bottom",
                    labelAnchor="end",
                    labelOrient="bottom",
                    labelAngle=-30,
                    labelPadding=0,
                ),
            ),
        )
        .properties(width=80)
    )

    return base

In [19]:
foo = {1: "a", 2: "b", 3: "c"}
while foo:
    print(foo.popitem()[1])

c
b
a


In [10]:
alt.data_transformers.disable_max_rows()
alt.data_transformers.enable("json")

DataTransformerRegistry.enable('json')

In [21]:
merge_stats = [stat for stat in src.columns if "merge" in stat]
violins = {stat: make_violins(df=src, var=stat) for stat in merge_stats}
# violin_fg3 = make_violins(src, "FG3A_merge")
# violin_fg2 = make_violins(src, "FG2A_merge")
# violin_fg3 | violin_fg2
chart = alt.vconcat()
while violins:
    rows = alt.hconcat()
    for i in range(4):
        if violins:
            rows |= violins.popitem()[1]
    chart &= rows

chart

In [12]:
violins["FG2M_merge"]

NameError: name 'src' is not defined

In [52]:
src_long.head()

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ABBREVIATION,season,label_names,gametime_threshold,variable,value
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,Aaron Gordon,ORL,2018-19,Damian Lillard-Khris Middleton-Paul George,True,AGE,23.0
1,Aaron Holiday,IND,2018-19,Cory Joseph-Eric Gordon-Jae Crowder,True,AGE,22.0
2,Abdel Nader,OKC,2018-19,Cory Joseph-Eric Gordon-Jae Crowder,True,AGE,25.0
3,Al Horford,BOS,2018-19,Damian Lillard-Khris Middleton-Paul George,True,AGE,33.0
4,Al-Farouq Aminu,POR,2018-19,Cory Joseph-Eric Gordon-Jae Crowder,True,AGE,28.0


In [49]:
violin_stat_filter = src_long.apply(lambda x: "merge" in x["variable"], axis=1)
src_long[violin_stat_filter]

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ABBREVIATION,season,label_names,gametime_threshold,variable,value
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
674,Aaron Gordon,ORL,2018-19,Damian Lillard-Khris Middleton-Paul George,True,GP_merge,69.704545
675,Aaron Holiday,IND,2018-19,Cory Joseph-Eric Gordon-Jae Crowder,True,GP_merge,44.964286
676,Abdel Nader,OKC,2018-19,Cory Joseph-Eric Gordon-Jae Crowder,True,GP_merge,55.805970
677,Al Horford,BOS,2018-19,Damian Lillard-Khris Middleton-Paul George,True,GP_merge,55.651163
678,Al-Farouq Aminu,POR,2018-19,Cory Joseph-Eric Gordon-Jae Crowder,True,GP_merge,62.592920
...,...,...,...,...,...,...,...
13475,Zach Randolph,POR,2004-05,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,True,FG2A_merge,16.500000
13476,Zarko Cabarkapa,GSW,2004-05,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,True,FG2A_merge,10.700000
13477,Zaza Pachulia,MIL,2004-05,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,True,FG2A_merge,9.100000
13478,Zeljko Rebraca,LAC,2004-05,Udonis Haslem-Zydrunas Ilgauskas-Rasheed Wallace,True,FG2A_merge,9.100000


In [51]:
violin_stat_filter.head(5)

PLAYER_ID
0    False
1    False
2    False
3    False
4    False
dtype: bool

In [34]:
alt.Chart(
    src_long[src_long["variable"] in set(vars)],
    width=100,
    height=100,
).transform_density(
    "value", groupby=["season", "variable"], as_=["value", "density"]
).mark_area(
    orient="horizontal"
).encode(
    y="value:Q",
    x=alt.X(
        "density:Q",
        stack="center",
        impute=None,
        title=None,
        axis=alt.Axis(labels=False, values=[0], grid=False, ticks=False),
    ),
).facet(
    "variable:N",
    columns=5,
)

TypeError: unhashable type: 'Series'