In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress
from wordcloud import WordCloud
import seaborn as sns
import bar_chart_race as bcr
import datetime as dt

In [2]:
nba_top50 = pd.read_csv("Data/NBA_AllStats.csv", usecols=['player_name','Points','Position','game_date'])
nba_top50.head(10)

Unnamed: 0,player_name,Position,Points,game_date
0,LeBron James,F,19,2016-10-25T23:30:00+00:00
1,James Jones,G-F,5,2016-10-25T23:30:00+00:00
2,J.R. Smith,G-F,8,2016-10-25T23:30:00+00:00
3,Kay Felder,G,0,2016-10-25T23:30:00+00:00
4,Mike Dunleavy,G-F,4,2016-10-25T23:30:00+00:00
5,Mo Williams,G,0,2016-10-25T23:30:00+00:00
6,Kevin Love,F,23,2016-10-25T23:30:00+00:00
7,Richard Jefferson,F,13,2016-10-25T23:30:00+00:00
8,Kyrie Irving,G,29,2016-10-25T23:30:00+00:00
9,Tristan Thompson,C-F,0,2016-10-25T23:30:00+00:00


In [3]:
nba_top50['Date'] = pd.to_datetime(nba_top50['game_date'], errors='coerce')
nba_top50.dtypes

player_name                 object
Position                    object
Points                       int64
game_date                   object
Date           datetime64[ns, UTC]
dtype: object

In [4]:
nba_top50['Date'] = nba_top50['Date'].dt.normalize()

In [5]:
nba_top50['Date'] = nba_top50['Date'].dt.date

In [6]:
nba_top50.head()

Unnamed: 0,player_name,Position,Points,game_date,Date
0,LeBron James,F,19,2016-10-25T23:30:00+00:00,2016-10-25
1,James Jones,G-F,5,2016-10-25T23:30:00+00:00,2016-10-25
2,J.R. Smith,G-F,8,2016-10-25T23:30:00+00:00,2016-10-25
3,Kay Felder,G,0,2016-10-25T23:30:00+00:00,2016-10-25
4,Mike Dunleavy,G-F,4,2016-10-25T23:30:00+00:00,2016-10-25


In [7]:
nba_top10 = nba_top50.groupby('player_name').sum()['Points']
nba_top10 = nba_top10.sort_values(ascending=False)
nba_top10 = nba_top10.head(10)
nba_top10

player_name
Damian Lillard           9787
Russell Westbrook        9699
Giannis Antetokounmpo    9392
Anthony Davis            9300
LeBron James             9262
Bradley Beal             9017
DeMar DeRozan            8832
James Harden             8683
Kemba Walker             8524
Karl-Anthony Towns       8408
Name: Points, dtype: int64

In [8]:
nba_data = nba_top50.pivot_table(index='Date',columns='player_name',values='Points')
nba_data = nba_data[nba_top10.index.values]
nba_data

player_name,Damian Lillard,Russell Westbrook,Giannis Antetokounmpo,Anthony Davis,LeBron James,Bradley Beal,DeMar DeRozan,James Harden,Kemba Walker,Karl-Anthony Towns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-10-25,,,,,19.0,,,,,
2016-10-26,39.0,,,,,,40.0,,,
2016-10-27,,32.0,31.0,50.0,,13.0,,34.0,17.0,21.0
2016-10-28,29.0,,,,21.0,,32.0,,,
2016-10-29,,51.0,,45.0,23.0,,,26.0,26.5,
...,...,...,...,...,...,...,...,...,...,...
2020-08-11,61.0,20.0,,27.0,29.0,,23.0,0.0,19.0,
2020-08-12,,0.0,12.0,,,,,45.0,,
2020-08-13,,,0.0,0.0,17.0,,0.0,,0.0,
2020-08-14,42.0,,,,,,,,,


In [9]:
nba_data = nba_data.fillna(0)
nba_data

player_name,Damian Lillard,Russell Westbrook,Giannis Antetokounmpo,Anthony Davis,LeBron James,Bradley Beal,DeMar DeRozan,James Harden,Kemba Walker,Karl-Anthony Towns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-10-25,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0
2016-10-26,39.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0
2016-10-27,0.0,32.0,31.0,50.0,0.0,13.0,0.0,34.0,17.0,21.0
2016-10-28,29.0,0.0,0.0,0.0,21.0,0.0,32.0,0.0,0.0,0.0
2016-10-29,0.0,51.0,0.0,45.0,23.0,0.0,0.0,26.0,26.5,0.0
...,...,...,...,...,...,...,...,...,...,...
2020-08-11,61.0,20.0,0.0,27.0,29.0,0.0,23.0,0.0,19.0,0.0
2020-08-12,0.0,0.0,12.0,0.0,0.0,0.0,0.0,45.0,0.0,0.0
2020-08-13,0.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0
2020-08-14,42.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
nba_data = nba_data.cumsum()
nba_data

player_name,Damian Lillard,Russell Westbrook,Giannis Antetokounmpo,Anthony Davis,LeBron James,Bradley Beal,DeMar DeRozan,James Harden,Kemba Walker,Karl-Anthony Towns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-10-25,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0
2016-10-26,39.0,0.0,0.0,0.0,19.0,0.0,40.0,0.0,0.0,0.0
2016-10-27,39.0,32.0,31.0,50.0,19.0,13.0,40.0,34.0,17.0,21.0
2016-10-28,68.0,32.0,31.0,50.0,40.0,13.0,72.0,34.0,17.0,21.0
2016-10-29,68.0,83.0,31.0,95.0,63.0,13.0,72.0,60.0,43.5,21.0
...,...,...,...,...,...,...,...,...,...,...
2020-08-11,7692.0,7057.0,7478.0,7082.5,7076.0,7163.0,6760.0,6211.0,6640.0,6300.0
2020-08-12,7692.0,7057.0,7490.0,7082.5,7076.0,7163.0,6760.0,6256.0,6640.0,6300.0
2020-08-13,7692.0,7057.0,7490.0,7082.5,7093.0,7163.0,6760.0,6256.0,6640.0,6300.0
2020-08-14,7734.0,7057.0,7490.0,7082.5,7093.0,7163.0,6760.0,6256.0,6640.0,6300.0


In [11]:
bcr_html = bcr.bar_chart_race(df=nba_data, filename=None)

Exception: You do not have ffmpeg installed on your machine. Download
                            ffmpeg from here: https://www.ffmpeg.org/download.html.
                            
                            Matplotlib's original error message below:

                            Requested MovieWriter (ffmpeg) not available
                            

In [None]:
# colors = dict(zip(
#         ['Damian Lillard','Russell Westbrook','Giannis Antetokounmpo','Anthony Davis','LeBron James','Bradley Beal','DeMar DeRozan','James Harden','Kemba Walker','Karl-Anthony Towns'],
#         ['#adb0ff','#ffb3ff','#90d595','#e48381','#aafbff','#f7bb5f','#eafb50','#6060c0','#ff5668','#cee5ff']
# ))

# colors

In [None]:
# group_lk = nba_data.set_index("player_name")['Points'].to_dict()
# group_lk

In [None]:
# fig, ax = plt.subplots(figsize=(15,8))

# def racing_barchart(game_date):
#     dff =nba_top50[nba_top50['game_date'].eq(game_date)].sort_values(by='Points',ascending=True).tail(10)
#     ax.barh(dff['player_name'],dff['Points'], color=[colors[group_lk[x]] for x in dff['player_name']])
# plt.show()

# racing_barchart(2016-10-25)

In [None]:
# nba_top50.pivot_table(index='STATE',columns='PLAYERS',values='AST')