In [13]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

# Save a vega-lite spec and a PNG blob for each plot in the notebook
alt.renderers.enable('mimetype')
# Handle large data sets without embedding them in the notebook
alt.data_transformers.enable('data_server')

DataTransformerRegistry.enable('data_server')

In [3]:
full_data = pd.read_csv("players_stats_by_season_full_details.csv")

In [9]:
# filter data to only include NBA
NBA_data = full_data[full_data["League"] == "NBA"]
# Regular season data
NBA_reg = NBA_data[NBA_data['Stage'] == "Regular_Season"]
# Playoff data
NBA_playoff = NBA_data[NBA_data['Stage'] == 'Playoffs']

In [12]:
NBA_data.columns

Index(['League', 'Season', 'Stage', 'Player', 'Team', 'GP', 'MIN', 'FGM',
       'FGA', '3PM', '3PA', 'FTM', 'FTA', 'TOV', 'PF', 'ORB', 'DRB', 'REB',
       'AST', 'STL', 'BLK', 'PTS', 'birth_year', 'birth_month', 'birth_date',
       'height', 'height_cm', 'weight', 'weight_kg', 'nationality',
       'high_school', 'draft_round', 'draft_pick', 'draft_team'],
      dtype='object')

## Overall NBA trends (not separated by playoffs vs reg season)

In [25]:
# group by the seasons and sum up numerical columns
NBA_seasons_full = NBA_data.groupby('Season').sum().reset_index()

# create column of ratio of field goals made each season that are 3-pointers.
NBA_seasons_full['3PM_ratio'] = (NBA_seasons_full['3PM']/NBA_seasons_full['FGM'])

NBA_seasons_full.head()

Unnamed: 0,Season,GP,MIN,FGM,FGA,3PM,3PA,FTM,FTA,TOV,...,STL,BLK,PTS,birth_year,height_cm,weight,weight_kg,draft_round,draft_pick,3PM_ratio
0,1999 - 2000,17279,468414.0,73403,161880,9840,27376,38595,51070,28745,...,15375,10175,195239,673978.0,68577.0,75075.0,34040.0,382.0,3451.0,0.134054
1,2000 - 2001,15869,445472.0,68913,153979,9496,26605,37087,49172,26655,...,14351,9821,184412,621228.0,63302.0,69446.0,31490.0,351.0,3268.0,0.137797
2,2001 - 2002,15766,443097.0,68398,151741,10228,28367,35052,46226,25160,...,14326,9964,182075,629544.0,64148.0,70838.0,32118.0,343.0,3580.0,0.149537
3,2002 - 2003,18176,492219.0,74278,166750,10953,31100,39433,51559,28726,...,16246,10069,198940,718602.0,73303.0,81094.0,36769.0,394.0,3895.0,0.14746
4,2003 - 2004,16577,459832.1,67904,153347,10242,29455,36009,47816,26980,...,15072,9984,182057,661898.0,67428.0,74750.0,33897.0,357.0,3686.0,0.150831


### Percentage of field goals attempted that were 3-pointers by season

In [26]:
threes_ratio_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('3PM_ratio', axis = alt.Axis(format='%')), alt.X('Season'))

threes_ratio_chart

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


### Points per game by season 

In [117]:
# Create a points per game column for each season
# by multiplying points/minute/player by 5 for 5 players on the court and by 48 for 48 minutes in game. 
NBA_seasons_full['Pts/Game'] = (NBA_seasons_full['PTS']/NBA_seasons_full['MIN'])*(5*48)

ppg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('Pts/Game', scale= alt.Scale(zero=False)), alt.X('Season'))

ppg_chart



<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


### Assists per game by season

In [127]:
# Create a points per game column for each season
# by multiplying assists/minute/player by 5 for 5 players on the court and by 48 for 48 minutes in game. 
NBA_seasons_full['Ast/Game'] = (NBA_seasons_full['AST']/NBA_seasons_full['MIN'])*(5*48)

apg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('Ast/Game', scale= alt.Scale(zero=False)), alt.X('Season'))

apg_chart

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


### Shots per game by season

In [130]:
# Create a points per game column for each season
# by multiplying field goals attempted/minute/player by 5 for 5 players on the court and by 48 for 48 minutes in game. 
NBA_seasons_full['FGA/Game'] = (NBA_seasons_full['FGA']/NBA_seasons_full['MIN'])*(5*48)

fgpg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('FGA/Game', scale= alt.Scale(zero=False)), alt.X('Season'))

fgpg_chart

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


### Rebounds per game by season

It is interesting that total and defensive rebounding has increased, but offensive rebounding has decreased. 



In [133]:
# Create a points per game column for each season
# by multiplying rebounds/minute/player by 5 for 5 players on the court and by 48 for 48 minutes in game. 
NBA_seasons_full['REB/Game'] = (NBA_seasons_full['REB']/NBA_seasons_full['MIN'])*(5*48)

rpg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('REB/Game', scale= alt.Scale(zero=False)), alt.X('Season'))



NBA_seasons_full['DRB/Game'] = (NBA_seasons_full['DRB']/NBA_seasons_full['MIN'])*(5*48)

drpg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('DRB/Game', scale= alt.Scale(zero=False)), alt.X('Season'))

NBA_seasons_full['ORB/Game'] = (NBA_seasons_full['ORB']/NBA_seasons_full['MIN'])*(5*48)

orpg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('ORB/Game', scale= alt.Scale(zero=False)), alt.X('Season'))

drpg_chart + rpg_chart + orpg_chart

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


### Steals per game by season

The large drop in steals per game in 2004-2005 was due to the NBA cracking down on "hand checking" and calling it as a foul. 

In [126]:
# Create a steals per game column for each season
# by multiplying steals/minute/player by 5 for 5 players on the court and by 48 for 48 minutes in game. 
NBA_seasons_full['STL/Game'] = (NBA_seasons_full['STL']/NBA_seasons_full['MIN'])*(5*48)

spg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('STL/Game', scale= alt.Scale(zero=False)), alt.X('Season'))

spg_chart

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


### Blocks per game by season

In [124]:
# Create a steals per game column for each season
# by multiplying steals/minute/player by 5 for 5 players on the court and by 48 for 48 minutes in game. 
NBA_seasons_full['BLK/Game'] = (NBA_seasons_full['BLK']/NBA_seasons_full['MIN'])*(5*48)

bpg_chart = alt.Chart(NBA_seasons_full).mark_line().encode(alt.Y('BLK/Game'), alt.X('Season'))

bpg_chart

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [119]:
NBA_data.columns

Index(['League', 'Season', 'Stage', 'Player', 'Team', 'GP', 'MIN', 'FGM',
       'FGA', '3PM', '3PA', 'FTM', 'FTA', 'TOV', 'PF', 'ORB', 'DRB', 'REB',
       'AST', 'STL', 'BLK', 'PTS', 'birth_year', 'birth_month', 'birth_date',
       'height', 'height_cm', 'weight', 'weight_kg', 'nationality',
       'high_school', 'draft_round', 'draft_pick', 'draft_team'],
      dtype='object')

# Ideas to keep working on

- player weight or BMI over time
- player stats according to age
- NBA stats according to playoffs vs regular season (right now all the stats are for all games regardless of season)
- Draft position vs offensive efficiency