In [1]:
import altair as alt
import numpy as np
import pandas as pd

import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
columns_to_skip = ['birth_year', 'birth_month', 'height','height_cm', 'weight', 'weight_kg', 'nationality', 'high_school', 'draft_round', 'draft_pick', 'draft_team']

player_data = pd.read_csv('players_stats_by_season_full_details.csv', usecols = lambda x: x not in columns_to_skip)
player_data = player_data[player_data['League'] == 'NBA'].drop('League', axis = 1)
player_data['birth_date'] = pd.to_datetime(player_data['birth_date'], format='%b %d, %Y')
player_data['Season'] = pd.to_numeric(player_data['Season'].str.split(expand = True)[0])

player_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7633 entries, 0 to 52115
Data columns (total 22 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Season      7633 non-null   int64         
 1   Stage       7633 non-null   object        
 2   Player      7633 non-null   object        
 3   Team        7633 non-null   object        
 4   GP          7633 non-null   int64         
 5   MIN         7633 non-null   float64       
 6   FGM         7633 non-null   int64         
 7   FGA         7633 non-null   int64         
 8   3PM         7633 non-null   int64         
 9   3PA         7633 non-null   int64         
 10  FTM         7633 non-null   int64         
 11  FTA         7633 non-null   int64         
 12  TOV         7633 non-null   int64         
 13  PF          7633 non-null   int64         
 14  ORB         7633 non-null   int64         
 15  DRB         7633 non-null   int64         
 16  REB         7633 non-nu

# Player Stats

## metrics

In [3]:
df_metrics = player_data[['Player', 'Stage', 'GP', 'MIN', 'FGM', 'FGA', 'FTM', 'FTA', '3PM', '3PA']].copy()
df_metrics = df_metrics.groupby(['Player', 'Stage']).mean().reset_index().copy()

df_metrics['career_FG_%'] = round(df_metrics['FGM'] / df_metrics['FGA'] * 100, 2)
df_metrics['career_FT_%'] = round(df_metrics['FTM'] / df_metrics['FTA'] * 100, 2)
df_metrics['career_3PT_%'] = round(df_metrics['3PM'] / df_metrics['3PA'] * 100, 2)
df_metrics['Minutes_per_game'] = round(df_metrics['MIN'] / df_metrics['GP'], 2)

df_metrics.drop(['GP', 'MIN', 'FGM', 'FGA', 'FTM', 'FTA', '3PM', '3PA'], axis = 1, inplace = True)
df_metrics.head()

Unnamed: 0,Player,Stage,career_FG_%,career_FT_%,career_3PT_%,Minutes_per_game
0,A.C. Green,Playoffs,41.11,69.57,,18.65
1,A.C. Green,Regular_Season,44.59,70.39,10.0,20.37
2,A.J. Price,Playoffs,37.14,90.0,43.75,16.0
3,Aaron Brooks,Playoffs,41.89,75.86,36.36,20.74
4,Aaron Brooks,Regular_Season,41.68,83.95,38.54,24.29


## chart 1

In [4]:
df_chart_1 = player_data[['Player', 'Season', 'Stage', 'GP', 'PTS', 'FGM', 'FTM', '3PM']].copy()
df_chart_1['PTS'] = df_chart_1['PTS'] / df_chart_1['GP']
df_chart_1['3 Point'] = (df_chart_1['3PM'] * 3) / df_chart_1['GP']
df_chart_1['FTM'] = df_chart_1['FTM'] / df_chart_1['GP']
df_chart_1.rename(columns={'FTM': 'Free throws'}, inplace = True)
df_chart_1['2 Point'] = df_chart_1['PTS'] - (df_chart_1['3 Point'] + df_chart_1['Free throws'])
df_chart_1.drop(['PTS', 'GP', 'FGM', '3PM'], axis = 1, inplace = True)

df_chart_1 = df_chart_1.melt(id_vars=['Player', 'Season', 'Stage'], var_name = 'Points_type', value_name = "Points_per_game").copy()
df_chart_1['Points_per_game'] = round(df_chart_1['Points_per_game'], 2)
df_chart_1.head()

Unnamed: 0,Player,Season,Stage,Points_type,Points_per_game
0,Shaquille O'Neal,1999,Regular_Season,Free throws,5.47
1,Vince Carter,1999,Regular_Season,Free throws,5.32
2,Karl Malone,1999,Regular_Season,Free throws,7.18
3,Allen Iverson,1999,Regular_Season,Free throws,6.31
4,Gary Payton,1999,Regular_Season,Free throws,3.79


In [5]:
alt.Chart(df_chart_1[(df_chart_1['Player'] == 'Kobe Bryant') & (df_chart_1['Stage'] == 'Regular_Season')]).mark_bar().encode(
        y = alt.Y('sum(Points_per_game)', title = 'Points'),
        x = alt.X('Season:O'), 
        color = alt.Color('Points_type', legend = alt.Legend(orient = 'bottom', title = "")),
        tooltip=['Player', 'Stage', 'Season', 'Points_type', 'Points_per_game']
        ).properties(title='Average Points by Season', width=300, height = 250)

## chart 2

In [6]:
df_chart_2 = player_data[['Player', 'Season', 'Stage', 'GP', 'AST']].copy()
df_chart_2['Assists_per_game'] = round(df_chart_2['AST'] / df_chart_2['GP'], 2)

df_chart_2.drop(['GP', 'AST'], axis = 1, inplace = True)
df_chart_2.head()

Unnamed: 0,Player,Season,Stage,Assists_per_game
0,Shaquille O'Neal,1999,Regular_Season,3.78
1,Vince Carter,1999,Regular_Season,3.93
2,Karl Malone,1999,Regular_Season,3.71
3,Allen Iverson,1999,Regular_Season,4.69
4,Gary Payton,1999,Regular_Season,8.93


In [7]:
alt.Chart(df_chart_2[(df_chart_2['Player'] == 'Kobe Bryant') & (df_chart_2['Stage'] == 'Regular_Season')]).mark_line().encode(
        y = alt.Y('Assists_per_game', title = 'Assists', scale=alt.Scale(zero=False)),
        x = alt.X('Season:O'),
        tooltip=['Player', 'Stage', 'Season', 'Assists_per_game']
        ).properties(title='Average Assists by Season', width=300, height = 250)

## chart 3

In [8]:
df_chart_3 = player_data[['Player', 'Season', 'Stage', 'GP', 'ORB', 'DRB']].copy()
df_chart_3['Offensive Rebounds'] = round(df_chart_3['ORB'] / df_chart_3['GP'], 2)
df_chart_3['Defensive Rebounds'] = round(df_chart_3['DRB'] / df_chart_3['GP'], 2)
df_chart_3.drop(['GP', 'ORB', 'DRB'], axis = 1, inplace = True)

df_chart_3 = df_chart_3.melt(id_vars=['Player', 'Season', 'Stage'], var_name = 'Rebound_type', value_name = "Rebounds_per_game").copy()
df_chart_3.head()

Unnamed: 0,Player,Season,Stage,Rebound_type,Rebounds_per_game
0,Shaquille O'Neal,1999,Regular_Season,Offensive Rebounds,4.25
1,Vince Carter,1999,Regular_Season,Offensive Rebounds,1.83
2,Karl Malone,1999,Regular_Season,Offensive Rebounds,2.06
3,Allen Iverson,1999,Regular_Season,Offensive Rebounds,1.01
4,Gary Payton,1999,Regular_Season,Offensive Rebounds,1.22


In [9]:
alt.Chart(df_chart_3[(df_chart_3['Player'] == 'Kobe Bryant') & (df_chart_3['Stage'] == 'Regular_Season')]).mark_bar().encode(
        y = alt.Y('sum(Rebounds_per_game)', title = 'Rebounds'),
        x = alt.X('Season:O'), 
        color = alt.Color('Rebound_type', legend = alt.Legend(orient = 'bottom', title = "")),
        tooltip=['Player', 'Stage', 'Season', 'Rebound_type', 'Rebounds_per_game']
        ).properties(title='Average Rebounds by Season', width=300, height = 250)

## chart 4

In [10]:
df_chart_4 = player_data[['Player', 'Season', 'Stage', 'GP', 'BLK', 'STL']].copy()
df_chart_4['Blocks'] = round(df_chart_4['BLK'] / df_chart_4['GP'], 2)
df_chart_4['Steals'] = round(df_chart_4['STL'] / df_chart_4['GP'], 2)
df_chart_4.drop(['GP', 'BLK', 'STL'], axis = 1, inplace = True)

df_chart_4 = df_chart_4.melt(id_vars=['Player', 'Season', 'Stage'], var_name = 'Blocks/Steals', value_name = "per_game").copy()
df_chart_4.head()

Unnamed: 0,Player,Season,Stage,Blocks/Steals,per_game
0,Shaquille O'Neal,1999,Regular_Season,Blocks,3.03
1,Vince Carter,1999,Regular_Season,Blocks,1.12
2,Karl Malone,1999,Regular_Season,Blocks,0.87
3,Allen Iverson,1999,Regular_Season,Blocks,0.07
4,Gary Payton,1999,Regular_Season,Blocks,0.22


In [11]:
alt.Chart(df_chart_4[(df_chart_4['Player'] == 'Kobe Bryant') & (df_chart_4['Stage'] == 'Regular_Season')]).mark_line().encode(
        y = alt.Y('per_game', title = 'Count'),
        x = alt.X('Season:O'),
        color = alt.Color('Blocks/Steals', legend = alt.Legend(orient = 'bottom', title = "")),
        tooltip=['Player', 'Stage', 'Season', 'Blocks/Steals', 'per_game']
        ).properties(title='Average Blocks & Steals by Season', width=450, height = 250)

## chart 5

In [12]:
df_chart_5 = player_data[['Player', 'Season', 'Stage', 'GP', 'TOV', 'PF']].copy()
df_chart_5['Turnovers'] = round(df_chart_5['TOV'] / df_chart_5['GP'], 2)
df_chart_5['Fouls'] = round(df_chart_5['PF'] / df_chart_5['GP'], 2)
df_chart_5.drop(['GP', 'TOV', 'PF'], axis = 1, inplace = True)

df_chart_5 = df_chart_5.melt(id_vars=['Player', 'Season', 'Stage'], var_name = 'Turnovers/Fouls', value_name = "per_game").copy()
df_chart_5.head()

Unnamed: 0,Player,Season,Stage,Turnovers/Fouls,per_game
0,Shaquille O'Neal,1999,Regular_Season,Turnovers,2.82
1,Vince Carter,1999,Regular_Season,Turnovers,2.17
2,Karl Malone,1999,Regular_Season,Turnovers,2.82
3,Allen Iverson,1999,Regular_Season,Turnovers,3.29
4,Gary Payton,1999,Regular_Season,Turnovers,2.73


In [13]:
alt.Chart(df_chart_5[(df_chart_5['Player'] == 'Kobe Bryant') & (df_chart_5['Stage'] == 'Regular_Season')]).mark_line().encode(
        y = alt.Y('per_game', title = 'Count'),
        x = alt.X('Season:O'),
        color = alt.Color('Turnovers/Fouls', legend = alt.Legend(orient = 'bottom', title = "")),
        tooltip=['Player', 'Stage', 'Season', 'Turnovers/Fouls', 'per_game']
        ).properties(title='Average Turnovers & Fouls by Season', width=450, height = 250)

# Advanced Analytics

## chart 11 (chart 1 in second tab)

In [14]:
df_chart_11 = player_data[['Player', 'Season', 'Stage', 'FGM', 'FGA', '3PM', '3PA']].copy()
df_chart_11['2PA'] = df_chart_11['FGA'] - df_chart_11['3PA']
df_chart_11['2PM'] = df_chart_11['FGM'] - df_chart_11['3PM']

df_chart_11 = df_chart_11.groupby(['Player', 'Season', 'Stage']).mean().reset_index().copy()

df_chart_11['2PT_%'] = round(df_chart_11['2PM'] / df_chart_11['2PA'] * 100, 2)
df_chart_11['3PT_%'] = round(df_chart_11['3PM'] / df_chart_11['3PA'] * 100, 2)
df_chart_11['eFG_%'] = round(((df_chart_11['FGM'] + (0.5 * df_chart_11['3PM'])) / df_chart_11['FGA']) * 100, 2)

df_chart_11.drop(['FGM', 'FGA', '3PM', '3PA', '2PM', '2PA'], axis = 1, inplace = True)
df_chart_11 = df_chart_11.melt(id_vars=['Player', 'Season', 'Stage'], var_name = '2PT_3PT_eFG', value_name = "per_game").copy()
df_chart_11.head()

Unnamed: 0,Player,Season,Stage,2PT_3PT_eFG,per_game
0,A.C. Green,1999,Playoffs,2PT_%,41.11
1,A.C. Green,1999,Regular_Season,2PT_%,44.91
2,A.C. Green,2000,Regular_Season,2PT_%,45.28
3,A.J. Price,2010,Playoffs,2PT_%,31.58
4,Aaron Brooks,2008,Playoffs,2PT_%,47.17


In [15]:
alt.Chart(df_chart_11[(df_chart_11['Player'] == 'Stephen Curry') & (df_chart_11['Stage'] == 'Regular_Season')]).mark_line().encode(
        y = alt.Y('per_game', title = 'Shooting Percentage', scale=alt.Scale(zero=False)),
        x = alt.X('Season:O'),
        color = alt.Color('2PT_3PT_eFG', legend = alt.Legend(orient = 'bottom', title = "")),
        tooltip=['Player', 'Stage', 'Season', '2PT_3PT_eFG', 'per_game']
        ).properties(title='Average Shooting Percentages by Season', width=450, height = 250)

## chart 12

TS% - True Shooting Percentage; the formula is PTS / (2 * TSA). True shooting percentage is a measure of shooting efficiency that takes into account field goals, 3-point field goals, and free throws.

TSA - True Shooting Attempts; the formula is FGA + 0.44 * FTA. 

In [16]:
df_chart_12 = player_data[['Player', 'Season', 'Stage', 'GP', 'PTS', 'FGA', 'FTA']].copy()
df_chart_12 ['PTS'] = df_chart_12['PTS'] / df_chart_12['GP']
df_chart_12 ['FGA'] = df_chart_12['FGA'] / df_chart_12['GP']
df_chart_12 ['FTA'] = df_chart_12['FTA'] / df_chart_12['GP']
df_chart_12['True shooting attempts'] = df_chart_12['FGA'] + 0.44 * df_chart_12['FTA']
df_chart_12['True shooting percentage'] = round(df_chart_12['PTS'] / (2 * df_chart_12['True shooting attempts']) * 100, 2)

df_chart_12.drop(['GP', 'PTS', 'FGA', 'FTA', 'True shooting attempts'], axis = 1, inplace = True)
df_chart_12.head()

Unnamed: 0,Player,Season,Stage,True shooting percentage
0,Shaquille O'Neal,1999,Regular_Season,57.8
1,Vince Carter,1999,Regular_Season,54.35
2,Karl Malone,1999,Regular_Season,58.16
3,Allen Iverson,1999,Regular_Season,49.58
4,Gary Payton,1999,Regular_Season,53.51


In [17]:
alt.Chart(df_chart_12[(df_chart_12['Player'] == 'Stephen Curry') & (df_chart_12['Stage'] == 'Regular_Season')]).mark_bar().encode(
        y = alt.Y('True shooting percentage', title = 'True Shooting Percentage', scale=alt.Scale(zero=False)),
        x = alt.X('Season:O'),
        tooltip=['Player', 'Stage', 'Season', 'True shooting percentage']
        ).properties(title='True Shooting Percentage by Season', width=450, height = 250)

## chart 13

GmSc - Game Score; the formula is PTS + 0.4 * FG - 0.7 * FGA - 0.4*(FTA - FT) + 0.7 * ORB + 0.3 * DRB + STL + 0.7 * AST + 0.7 * BLK - 0.4 * PF - TOV. 

Game Score was created by John Hollinger to give a rough measure of a player's productivity for a single game. The scale is similar to that of points scored, (40 is an outstanding performance, 10 is an average performance, etc.). 

In [18]:
df_chart_13 = player_data[['Player', 'Season', 'Stage', 'GP', 'PTS', 'MIN', 'FGM', 'FGA', 'FTM', 'FTA', 'ORB', 'DRB', 'STL', 'AST', 'BLK', 'PF', 'TOV']].copy()
df_chart_13 ['PTS'] = df_chart_13['PTS'] / df_chart_13['GP']
df_chart_13 ['Minutes Played'] = round(df_chart_13['MIN'] / df_chart_13['GP'], 2)
df_chart_13 ['FGM'] = df_chart_13['FGM'] / df_chart_13['GP']
df_chart_13 ['FGA'] = df_chart_13['FGA'] / df_chart_13['GP']
df_chart_13 ['FTM'] = df_chart_13['FTM'] / df_chart_13['GP']
df_chart_13 ['FTA'] = df_chart_13['FTA'] / df_chart_13['GP']
df_chart_13 ['ORB'] = df_chart_13['ORB'] / df_chart_13['GP']
df_chart_13 ['DRB'] = df_chart_13['DRB'] / df_chart_13['GP']
df_chart_13 ['STL'] = df_chart_13['STL'] / df_chart_13['GP']
df_chart_13 ['AST'] = df_chart_13['AST'] / df_chart_13['GP']
df_chart_13 ['BLK'] = df_chart_13['BLK'] / df_chart_13['GP']
df_chart_13 ['PF'] = df_chart_13['PF'] / df_chart_13['GP']
df_chart_13 ['TOV'] = df_chart_13['TOV'] / df_chart_13['GP']

df_chart_13['Game Score'] = round(df_chart_13 ['PTS'] + 0.4 * df_chart_13 ['FGM'] - 0.7 * df_chart_13 ['FGA'] - 0.4 * (df_chart_13 ['FTA'] - df_chart_13 ['FTM']) + 0.7 * df_chart_13 ['ORB'] + 0.3 * df_chart_13 ['DRB'] + df_chart_13 ['STL'] + 0.7 * df_chart_13 ['AST'] + 0.7 * df_chart_13 ['BLK'] - 0.4 * df_chart_13 ['PF'] - df_chart_13 ['TOV'], 2)
df_chart_13.drop(['GP', 'PTS', 'MIN', 'FGM', 'FGA', 'FTM', 'FTA', 'ORB', 'DRB', 'STL', 'AST', 'BLK', 'PF', 'TOV'], axis = 1, inplace = True)

df_chart_13.head()

Unnamed: 0,Player,Season,Stage,Minutes Played,Game Score
0,Shaquille O'Neal,1999,Regular_Season,40.04,24.68
1,Vince Carter,1999,Regular_Season,38.12,18.4
2,Karl Malone,1999,Regular_Season,35.94,19.79
3,Allen Iverson,1999,Regular_Season,40.76,16.96
4,Gary Payton,1999,Regular_Season,41.77,20.14


In [19]:
alt.Chart(df_chart_13[(df_chart_13['Player'] == 'Tim Duncan') & (df_chart_13['Stage'] == 'Regular_Season')]).mark_bar().encode(
        y = alt.Y('Game Score', title = 'Game Score', scale=alt.Scale(zero=False)),
        x = alt.X('Season:O'),
        tooltip=['Player', 'Stage', 'Season', 'Game Score']
        ).properties(title='Average Game Score by Season', width=450, height = 250)

## chart 14

In [20]:
alt.Chart(df_chart_13[df_chart_13['Player'] == 'Tim Duncan']).mark_circle(size=60).encode(
        x = alt.X('Minutes Played', scale=alt.Scale(zero=False)),
        y = alt.Y('Game Score', scale=alt.Scale(zero=False)),
        color = alt.Color('Stage', legend = alt.Legend(orient = 'bottom', title = "")),
        tooltip=['Player', 'Stage', 'Minutes Played', 'Game Score']
        ).properties(title='Minutes Played vs. Game Score', width=450, height = 250)