# NBA Player Movement data for Chord Chart plot

Aim to display "chord charts" for player movement (via trade) around the NBA.

### Methods

Data will be broken up by year, and then split into "in-season" and "off-season" trades.

Metrics tracked are usage (USG%), player efficiency rating (PER), and win-shares per 48 minutes (WS/48). Player must have played a minimum of 5 games at both of the teams (no minutes limit).

Note: once working, expand to include WORP, RPM, etc.

In [1]:
import pandas as pd

# load the created .csv into a Pandas dataframe
df = pd.read_csv("PlayerMovement_16-17_in-season.csv")

In [2]:
df.head()

Unnamed: 0,player_name,team_1,team_2,gp_1,gp_2,usg_1,usg_2,per_1,per_2,ws_per_48_1,ws_per_48_2
0,Nerlens Noel,PHI,DAL,29,22,17.8,17.3,20.8,19.8,0.184,0.178
1,Justin Anderson,DAL,PHI,51,24,23.6,17.2,14.7,12.8,0.067,0.084
2,Taj Gibson,CHI,OKC,55,23,19.2,19.5,15.4,13.8,0.104,0.076
3,Doug McDermott,CHI,OKC,44,22,17.6,13.6,11.3,9.3,0.084,0.08
4,Joffrey Lauvergne,OKC,CHI,50,20,17.8,20.7,12.9,11.6,0.091,0.025


In [3]:
df['usg_1_norm'] = df['usg_1'] * 5
df['usg_2_norm'] = df['usg_2'] * 5

df.head()

Unnamed: 0,player_name,team_1,team_2,gp_1,gp_2,usg_1,usg_2,per_1,per_2,ws_per_48_1,ws_per_48_2,usg_1_norm,usg_2_norm
0,Nerlens Noel,PHI,DAL,29,22,17.8,17.3,20.8,19.8,0.184,0.178,89.0,86.5
1,Justin Anderson,DAL,PHI,51,24,23.6,17.2,14.7,12.8,0.067,0.084,118.0,86.0
2,Taj Gibson,CHI,OKC,55,23,19.2,19.5,15.4,13.8,0.104,0.076,96.0,97.5
3,Doug McDermott,CHI,OKC,44,22,17.6,13.6,11.3,9.3,0.084,0.08,88.0,68.0
4,Joffrey Lauvergne,OKC,CHI,50,20,17.8,20.7,12.9,11.6,0.091,0.025,89.0,103.5


In [4]:
teams = pd.read_csv("team_abbrevs.csv")

teams.head()

Unnamed: 0,Franchise,Acronym
0,Atlanta Hawks,ATL
1,Brooklyn Nets,BKN
2,Boston Celtics,BOS
3,Charlotte Hornets,CHA
4,Chicago Bulls,CHI


In [5]:
df

Unnamed: 0,player_name,team_1,team_2,gp_1,gp_2,usg_1,usg_2,per_1,per_2,ws_per_48_1,ws_per_48_2,usg_1_norm,usg_2_norm
0,Nerlens Noel,PHI,DAL,29,22,17.8,17.3,20.8,19.8,0.184,0.178,89.0,86.5
1,Justin Anderson,DAL,PHI,51,24,23.6,17.2,14.7,12.8,0.067,0.084,118.0,86.0
2,Taj Gibson,CHI,OKC,55,23,19.2,19.5,15.4,13.8,0.104,0.076,96.0,97.5
3,Doug McDermott,CHI,OKC,44,22,17.6,13.6,11.3,9.3,0.084,0.08,88.0,68.0
4,Joffrey Lauvergne,OKC,CHI,50,20,17.8,20.7,12.9,11.6,0.091,0.025,89.0,103.5
5,Anthony Morrow,OKC,CHI,40,9,16.3,17.0,9.2,16.0,0.069,0.19,81.5,85.0
6,Cameron Payne,OKC,CHI,20,11,19.6,23.9,6.1,4.0,-0.012,-0.086,98.0,119.5
7,Tyler Ennis,HOU,LAL,31,22,19.3,19.0,4.0,14.3,-0.061,0.077,96.5,95.0
8,K.J. McDaniels,HOU,BKN,29,20,16.4,19.3,10.2,12.5,0.058,0.046,82.0,96.5
9,P.J. Tucker,PHX,TOR,57,24,11.3,10.9,10.6,10.4,0.069,0.099,56.5,54.5


In [6]:
df['traders'] = df['team_1'] + '_' + df['team_2']
df

Unnamed: 0,player_name,team_1,team_2,gp_1,gp_2,usg_1,usg_2,per_1,per_2,ws_per_48_1,ws_per_48_2,usg_1_norm,usg_2_norm,traders
0,Nerlens Noel,PHI,DAL,29,22,17.8,17.3,20.8,19.8,0.184,0.178,89.0,86.5,PHI_DAL
1,Justin Anderson,DAL,PHI,51,24,23.6,17.2,14.7,12.8,0.067,0.084,118.0,86.0,DAL_PHI
2,Taj Gibson,CHI,OKC,55,23,19.2,19.5,15.4,13.8,0.104,0.076,96.0,97.5,CHI_OKC
3,Doug McDermott,CHI,OKC,44,22,17.6,13.6,11.3,9.3,0.084,0.08,88.0,68.0,CHI_OKC
4,Joffrey Lauvergne,OKC,CHI,50,20,17.8,20.7,12.9,11.6,0.091,0.025,89.0,103.5,OKC_CHI
5,Anthony Morrow,OKC,CHI,40,9,16.3,17.0,9.2,16.0,0.069,0.19,81.5,85.0,OKC_CHI
6,Cameron Payne,OKC,CHI,20,11,19.6,23.9,6.1,4.0,-0.012,-0.086,98.0,119.5,OKC_CHI
7,Tyler Ennis,HOU,LAL,31,22,19.3,19.0,4.0,14.3,-0.061,0.077,96.5,95.0,HOU_LAL
8,K.J. McDaniels,HOU,BKN,29,20,16.4,19.3,10.2,12.5,0.058,0.046,82.0,96.5,HOU_BKN
9,P.J. Tucker,PHX,TOR,57,24,11.3,10.9,10.6,10.4,0.069,0.099,56.5,54.5,PHX_TOR


### Want to make a dictionary of possible trade partners
Can use this to see how often teams trade players/how many players they send. Maybe include directionality.

In [7]:
# import 'team name' csv
team_names = pd.read_csv('team_abbrevs.csv')
team_names.head()

Unnamed: 0,Franchise,Acronym
0,Atlanta Hawks,ATL
1,Brooklyn Nets,BKN
2,Boston Celtics,BOS
3,Charlotte Hornets,CHA
4,Chicago Bulls,CHI


In [8]:
def trading_pairs(dataframe, col_name='Acronym'):
    '''
    Want to achieve something like this with empty lists for each pair (& direction) of trade partners:
    atl_bos = []
    atl_bkn = []
    atl_cha = []
    atl_chi = []
    atl_cle = []
    atl_dal = []
    atl_den = []
    atl_det = []
    atl_gsw = []
    atl_hou = []
    atl_ind = []
    atl_lac = []
    atl_lal = []
    atl_mem = []
    atl_mia = []
    atl_nop = []
    etc...
    '''
    trading_partners = []
    for team1 in dataframe[col_name]:
        for team2 in dataframe[col_name]:
            if team1 == team2:
                continue
            else:
                trading_partners.append(team1 + "_" + team2)
    
    return trading_partners

In [9]:
list_of_trade_partners = trading_pairs(team_names)
list_of_trade_partners[:10]

['ATL_BKN',
 'ATL_BOS',
 'ATL_CHA',
 'ATL_CHI',
 'ATL_CLE',
 'ATL_DAL',
 'ATL_DEN',
 'ATL_DET',
 'ATL_GSW',
 'ATL_HOU']

In [10]:
# Count up all of the trades between teams and assign them to the strings in the
# 'list_of_trade_partners'.
# Note this doesn't count the NUMBER OF TRADES, but rather the number of PLAYERS TRADED.
# Directionality is included (for now).

trade_counter = {}

for pair in list_of_trade_partners:
    trade_counter[pair] = 0
    for traders in df['traders']:
        if traders == pair:
            trade_counter[pair] += 1

# check is collecting the trades properly with a list comprehension!
unique_trade_partners = [val for val in trade_counter.itervalues() if (val != 0)]
players_involved = sum(unique_trade_partners)

print "Number of trade partners: " + str(len(unique_trade_partners))
print "Number of players involved: " + str(players_involved)

Number of trade partners: 22
Number of players involved: 28


That seems to be working well! (note: is only for the 2016-17 season at the moment - hence only 28 players traded)

## Import full player stat dataset

In [11]:
# load the downloaded Players.csv into a Pandas dataframe
players = pd.read_csv("Players.csv")

In [12]:
players.head()

Unnamed: 0.1,Unnamed: 0,Player,height,weight,collage,born,birth_city,birth_state
0,0,Curly Armstrong,180.0,77.0,Indiana University,1918.0,,
1,1,Cliff Barker,188.0,83.0,University of Kentucky,1921.0,Yorktown,Indiana
2,2,Leo Barnhorst,193.0,86.0,University of Notre Dame,1924.0,,
3,3,Ed Bartels,196.0,88.0,North Carolina State University,1925.0,,
4,4,Ralph Beard,178.0,79.0,University of Kentucky,1927.0,Hardinsburg,Kentucky


In [14]:
# load the FULL stats file (downloaded from Kaggle)
full_stats = pd.read_csv('Seasons_Stats.csv')
full_stats.head()

Unnamed: 0.1,Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,0,1950.0,Curly Armstrong,G-F,31.0,FTW,63.0,,,,...,0.705,,,,176.0,,,,217.0,458.0
1,1,1950.0,Cliff Barker,SG,29.0,INO,49.0,,,,...,0.708,,,,109.0,,,,99.0,279.0
2,2,1950.0,Leo Barnhorst,SF,25.0,CHS,67.0,,,,...,0.698,,,,140.0,,,,192.0,438.0
3,3,1950.0,Ed Bartels,F,24.0,TOT,15.0,,,,...,0.559,,,,20.0,,,,29.0,63.0
4,4,1950.0,Ed Bartels,F,24.0,DNN,13.0,,,,...,0.548,,,,20.0,,,,27.0,59.0


In [31]:
# check on the data types in the dataframe
type(full_stats)
full_stats.dtypes

Unnamed: 0      int64
Year          float64
Player         object
Pos            object
Age           float64
Tm             object
G             float64
GS            float64
MP            float64
PER           float64
TS%           float64
3PAr          float64
FTr           float64
ORB%          float64
DRB%          float64
TRB%          float64
AST%          float64
STL%          float64
BLK%          float64
TOV%          float64
USG%          float64
blanl         float64
OWS           float64
DWS           float64
WS            float64
WS/48         float64
blank2        float64
OBPM          float64
DBPM          float64
BPM           float64
VORP          float64
FG            float64
FGA           float64
FG%           float64
3P            float64
3PA           float64
3P%           float64
2P            float64
2PA           float64
2P%           float64
eFG%          float64
FT            float64
FTA           float64
FT%           float64
ORB           float64
DRB       

In [28]:
# look at 'modern NBA' (from 1985 on) only
modern_stats = full_stats.loc[lambda full_stats: full_stats.Year >= 1985.0, :]

In [30]:
modern_stats.tail()

Unnamed: 0.1,Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
24686,24686,2017.0,Cody Zeller,PF,24.0,CHO,62.0,58.0,1725.0,16.7,...,0.679,135.0,270.0,405.0,99.0,62.0,58.0,65.0,189.0,639.0
24687,24687,2017.0,Tyler Zeller,C,27.0,BOS,51.0,5.0,525.0,13.0,...,0.564,43.0,81.0,124.0,42.0,7.0,21.0,20.0,61.0,178.0
24688,24688,2017.0,Stephen Zimmerman,C,20.0,ORL,19.0,0.0,108.0,7.3,...,0.6,11.0,24.0,35.0,4.0,2.0,5.0,3.0,17.0,23.0
24689,24689,2017.0,Paul Zipser,SF,22.0,CHI,44.0,18.0,843.0,6.9,...,0.775,15.0,110.0,125.0,36.0,15.0,16.0,40.0,78.0,240.0
24690,24690,2017.0,Ivica Zubac,C,19.0,LAL,38.0,11.0,609.0,17.0,...,0.653,41.0,118.0,159.0,30.0,14.0,33.0,30.0,66.0,284.0


### As this code was pulled from basketball-reference.com, can I write a function to actually FIND the traded players in the dataset?

i.e., "if player_name has two (or more) entries in the same year, then he must have changed teams that year"

In [39]:
# am trying to groupby the data into PLAYERS, then check to see if the data for each PLAYER 
# contains any double-ups in the YEAR column.

traded_players = []

# grouped_players = modern_stats.groupby(['Player'])
sorted_players = modern_stats.sort_values(by=['Player'])

In [41]:
sorted_players.head(20)

Unnamed: 0.1,Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
13034,13034,1997.0,A.C. Green,PF,33.0,TOT,83.0,73.0,2492.0,12.4,...,0.65,222.0,434.0,656.0,69.0,70.0,16.0,74.0,145.0,597.0
8807,8807,1988.0,A.C. Green,PF,24.0,LAL,82.0,64.0,2636.0,14.5,...,0.773,245.0,465.0,710.0,93.0,87.0,45.0,120.0,204.0,937.0
11060,11060,1993.0,A.C. Green,PF,29.0,LAL,82.0,55.0,2819.0,16.3,...,0.739,287.0,424.0,711.0,116.0,88.0,39.0,116.0,149.0,1051.0
11529,11529,1994.0,A.C. Green,PF,30.0,PHO,82.0,55.0,2825.0,17.0,...,0.735,275.0,478.0,753.0,137.0,70.0,38.0,100.0,142.0,1204.0
14139,14139,1999.0,A.C. Green,PF,35.0,DAL,50.0,35.0,924.0,12.5,...,0.577,82.0,146.0,228.0,25.0,28.0,8.0,19.0,69.0,246.0
13035,13035,1997.0,A.C. Green,PF,33.0,PHO,27.0,19.0,548.0,11.6,...,0.646,33.0,105.0,138.0,17.0,18.0,1.0,20.0,34.0,153.0
9242,9242,1989.0,A.C. Green,PF,25.0,LAL,82.0,82.0,2510.0,17.8,...,0.786,258.0,481.0,739.0,103.0,94.0,55.0,119.0,172.0,1088.0
10617,10617,1992.0,A.C. Green,PF,28.0,LAL,82.0,53.0,2902.0,16.7,...,0.744,306.0,456.0,762.0,117.0,91.0,36.0,111.0,141.0,1116.0
10166,10166,1991.0,A.C. Green,PF,27.0,LAL,82.0,21.0,2164.0,13.8,...,0.738,201.0,315.0,516.0,71.0,59.0,23.0,99.0,117.0,750.0
13600,13600,1998.0,A.C. Green,PF,34.0,DAL,82.0,68.0,2649.0,12.8,...,0.716,219.0,449.0,668.0,123.0,78.0,27.0,68.0,157.0,600.0


## Prepare code for use in D3: Convert to JSON object

In [51]:
### Convert Pandas df to dict-of-dict to conform to JSON readability for use in D3.

# trade_dict = df.set_index('traders').T.to_dict()
trade_dict = df.T.to_dict() # '.T' is used to 'transpose' the df

trade_dict

{0: {'gp_1': 29,
  'gp_2': 22,
  'per_1': 20.8,
  'per_2': 19.8,
  'player_name': 'Nerlens Noel',
  'team_1': 'PHI',
  'team_2': 'DAL',
  'traders': 'PHI_DAL',
  'usg_1': 17.8,
  'usg_1_norm': 89.0,
  'usg_2': 17.3,
  'usg_2_norm': 86.5,
  'ws_per_48_1': 0.184,
  'ws_per_48_2': 0.17800000000000002},
 1: {'gp_1': 51,
  'gp_2': 24,
  'per_1': 14.7,
  'per_2': 12.8,
  'player_name': 'Justin Anderson',
  'team_1': 'DAL',
  'team_2': 'PHI',
  'traders': 'DAL_PHI',
  'usg_1': 23.6,
  'usg_1_norm': 118.0,
  'usg_2': 17.2,
  'usg_2_norm': 86.0,
  'ws_per_48_1': 0.067,
  'ws_per_48_2': 0.084},
 2: {'gp_1': 55,
  'gp_2': 23,
  'per_1': 15.4,
  'per_2': 13.8,
  'player_name': 'Taj Gibson',
  'team_1': 'CHI',
  'team_2': 'OKC',
  'traders': 'CHI_OKC',
  'usg_1': 19.2,
  'usg_1_norm': 96.0,
  'usg_2': 19.5,
  'usg_2_norm': 97.5,
  'ws_per_48_1': 0.10400000000000001,
  'ws_per_48_2': 0.076},
 3: {'gp_1': 44,
  'gp_2': 22,
  'per_1': 11.3,
  'per_2': 9.3,
  'player_name': 'Doug McDermott',
  'team_1'