In [1]:
import numpy as np
import pandas as pd 
from itertools import izip

Offensive Rating is defined as the team points scored per 100 possessions while the player is on the court.

Defensive Rating is defined as the number of points per 100 possessions that the team alows while that individual player is on the court. 

A possession is ended by
 -1 made field goal attempts
 -2 Made final free throw attempt
 -3 Missed final free throw attempt that results in a defensive reboud
 -4 Missed field goal attempt that resutls in a defensive rebound
 -5 turnover
 -6 end of time period

In [2]:
EventCodes = pd.read_csv('Event_Codes.txt', sep='\t')
#Strip excess whitespace that I noticed in Event Message Type Description
EventCodes['Event_Msg_Type_Description'] = EventCodes['Event_Msg_Type_Description'].map(str.strip)
#eventCodes['Action_Type_Description'] = eventCodes['Action_Type_Description'].map(str.strip)

GameLineup = pd.read_csv('Game_Lineup.txt', sep='\t')
PlayByPlay = pd.read_csv('Play_by_Play.txt', sep='\t')

In [3]:
from collections import defaultdict

In [102]:
EventScoringValues = defaultdict(dict) #the point value of a play, for the offensive team
EventPossChange= defaultdict(dict) #whether the team could constitute a change of possesion
EventSub = defaultdict(dict)

In [103]:
for row in EventCodes.iterrows():
    #initialize
    e_type, a_type = row[1]['Event_Msg_Type'], row[1]['Action_Type']
    EventScoringValues[e_type][a_type] = 0
    EventPossChange[e_type][a_type] = False
    EventSub[e_type][a_type] = False

In [104]:
_,idx1 = np.unique(EventCodes['Event_Msg_Type'], return_index=True)
_,idx2 = np.unique(EventCodes['Action_Type'], return_index=True)

In [105]:
idx = np.array(list(set(idx1).union(idx2)))
idx.sort()

In [106]:
for i in idx:
    print EventCodes['Event_Msg_Type'][i], EventCodes['Event_Msg_Type_Description'][i],EventCodes['Action_Type'][i], EventCodes['Action_Type_Description'][i]

1 Made Shot 0 No Shot
1 Made Shot 1 Jump Shot
1 Made Shot 2 Running Jump Shot
1 Made Shot 3 Hook Shot
1 Made Shot 4 Tip Shot
1 Made Shot 5 Layup Shot
1 Made Shot 6 Driving Layup Shot
1 Made Shot 7 Dunk Shot
1 Made Shot 8 Slam Dunk Shot
1 Made Shot 9 Driving Dunk Shot
1 Made Shot 40 Layup Shot
1 Made Shot 41 Running Layup Shot
1 Made Shot 42 Driving Layup Shot
1 Made Shot 43 Alley Oop Layup shot
1 Made Shot 44 Reverse Layup Shot
1 Made Shot 45 Jump Shot
1 Made Shot 46 Running Jump Shot
1 Made Shot 47 Turnaround Jump Shot
1 Made Shot 48 Dunk Shot
1 Made Shot 49 Driving Dunk Shot
1 Made Shot 50 Running Dunk Shot
1 Made Shot 51 Reverse Dunk Shot
1 Made Shot 52 Alley Oop Dunk Shot
1 Made Shot 53 Tip Shot
1 Made Shot 54 Running Tip Shot
1 Made Shot 55 Hook Shot
1 Made Shot 56 Running Hook Shot
1 Made Shot 57 Driving Hook Shot
1 Made Shot 58 Turnaround Hook Shot
1 Made Shot 59 Finger Roll Shot
1 Made Shot 60 Running Finger Roll Shot
1 Made Shot 61 Driving Finger Roll Shot
1 Made Shot 62 Turna

In [107]:
#Hardcore the scoring values of points
for action in EventScoringValues[1]:
    EventScoringValues[1][action] = 2 # i think some of these will be to be 3's
    
for action in EventScoringValues[3]:
    # are all free throw worth 1? technical?
    EventScoringValues[3][action] = 1
    
#all others 0

In [151]:
for action in EventPossChange[1]:
    EventPossChange[1][action] = True

for action in EventPossChange[2]:
    #Instead of checking who has next possession on rebounds, check for every missed shot 
    #(Play by play will have 'false rebounds', for example after a missed ft that isn't the last one)
    EventPossChange[2][action] = None

    
free_throws = EventCodes['Event_Msg_Type'] == 3
for action in EventPossChange[3]:
    if action in (10, 12, 15):
        #Only potential change of possession on last shot of normal fts (not techincal, clear path, or flagrant)
        #Even if ft is made, I believe using the next poss logic should work, but may have to split into made vs missed
        EventPossChange[3][action] = None
        
#for action in EventPossChange[4]:
#    EventPossChange[4][action] = None # all rebounds have to be checked
    
    
for action in EventPossChange[5]:
    EventPossChange[5][action] = True
    
#for action in EventPossChange[10]:
#    EventPossChange[10][action] = None # same with jump balls
    
#for action in EventPossChange[12]:
#    EventPossChange[12][action] = None # same with start and end of periods
           
#for action in EventPossChange[13]:
#    EventPossChange[13][action] = None 

In [109]:
for action in EventSub[8]:
    EventSub[8][action] = True
    
for action in EventPossChange[12]:
    EventSub[12][action] = True

### Order of operations
For Game in Games:

    For possession in Game:
        for player in possession:
            update scoring
    normalize by count

In [160]:
class Game(object):
    
    def __init__(self, game_id, game_lineup, play_by_play):
        
        self.game_id = game_id
        self.play_by_play = self.chunk_pbp(play_by_play[play_by_play['Game_id'] == game_id])
        
        # tuple of lists of each player on the team
        self.teams = self.make_teams(game_lineup[game_lineup['Game_id']==game_id])
        self.poss_team_0 = None
        
        self.lineup_by_period =\
        [game_lineup[np.logical_and(game_lineup['Game_id']==game_id, \
                                    game_lineup['Period'] == i)] for i in xrange(1,5)]
    
    def score_event(self, event):
        
        if self.poss_team_0 is None:
            pass # possession has not been established, no scoring plays will have occurred
        for play in event:
            play_e, play_a = play[2], play[6]
            print play_e
            play_value = EventScoringValues[play_e][play_a]
            
            if play_value == 0:
                continue
            play_value = play[7]
            if (play_e == 3) and (play_value != 1):
                #On free throws, only a made ft if play_value ('Option1' = 1), it is sometimes 2 or another non-zero number
                continue

            for team, offense_or_defense in zip(self.teams, (self.poss_team_0, not self.poss_team_0)):
                for player in team.itervalues():
                    if player.active:
                        if offense_or_defense: #team 0 on offense?
                            player.off_points+=play_value
                        else:
                            player.def_points+=play_value
                        
        
    def possession_change(self, event):
        
        for play in event:
            play_e, play_a = play[2], play[6]
            poss_change = EventPossChange[play_e][play_a]
            
            print ('poss change:', poss_change)
            if poss_change is None: #ambiguous, have to check
                #Don't actually thinks this works in all cases. 
                
                # TODO this assume's were using iter properly
                next_pos = self._get_next_poss()
                print ('next pos:', next_pos)
                if self.poss_team_0 != next_pos:
                    self._add_possessions()
                self.poss_team_0 = next_pos
            
            elif poss_change: #True
                print 'before poss change', self.poss_team_0
                
                #I think this was in wrong order? Should add possessions first before switching
                self._add_possessions()
                self.poss_team_0 = not self.poss_team_0
                
                print 'after poss change', self.poss_team_0
                
            if play_e == 13: # end of period
                # TODO how do periods starting affect possessions
                self._add_possessions()
                
                #don't know possession at start of next quarter so reset poss_team0
                self.poss_team_0 = None
            
    def _get_next_poss(self):
        #lift the doPossessionFix from winston's codce
        for event in self.play_by_play[self._idx+1:]:
            print ('idx is', self._idx)
            for play in event:
                # gonna have to check that this makes sense...
                play_e, play_a = play[2], play[6]
                print ('play_e next poss', play_e)
                print play
                #Only made shot, missed shot, free throw, turnover, and end of period establishes posession
                if play_e in (1, 2, 3, 5, 13):
                    #may need to check on free throw (event 3) that it doesn't correspond to No Shot (action type 0)
                    return play[11] in self.teams[0]
            
    def _add_possessions(self):
        
            for team, offense_or_defense in zip(self.teams, (self.poss_team_0, not self.poss_team_0)):
                for player in team.itervalues():
                    if player.active:
                        if offense_or_defense: #team 0 on offense?
                            player.off_poss+=1
                        else:
                            player.def_poss+=1
                        if player.player_id == '766802a8fda500d7945950de7398c9c6':
                            print self._idx
                            print offense_or_defense
                            print ('team0 poss:', self.poss_team_0)
                            pass
    def substitution(self, event):
        for play in event:
            play_e, play_a = play[2], play[6]
            sub = EventSub[play_e][play_a]
            
            if sub:
                if play_e == 8: #Garden variety substitution
                    outgoing_player, ingoing_player = play[11], play[12]
                    if outgoing_player in self.teams[0]:
                        self.teams[0][outgoing_player].active = False
                        self.teams[0][ingoing_player].active = True

                    else:
                        self.teams[1][outgoing_player].active = False
                        self.teams[1][ingoing_player].active = True
                        
                else: #start of period
                    period = play[3]-1
                    if period == 0:
                        continue # we handle this on startup
                        
                    for team in self.teams:
                        for player in team.itervalues():
                            player.active = False # turn everyone off
                    
                    period_lineup = self.lineup_by_period[period]
                    for _, row in period_lineup.iterrows():
                        player_id = row[2]
                        
                        active = row[4] == 'A'
                        if player_id in self.teams[0]:
                            self.teams[0][player_id].active = active
                        else:
                            self.teams[1][player_id].active = active

    
    def chunk_pbp(self, game_pbp):
        sorted_pbp = self.sort_pbp(game_pbp)
        real_time = sorted_pbp['PC_Time'].as_matrix()+  (4-sorted_pbp['Period'].as_matrix())*7200
        _, unique_idxs = np.unique(real_time, return_index=True)
        return np.split(game_pbp.as_matrix(), indices_or_sections=unique_idxs[::-1] )[1:] # remove the first one thats empty
        
    def sort_pbp(self, game_pbp):
        return game_pbp.sort_values(['Period', 'PC_Time', 'WC_Time', 'Event_Num'],\
                                    ascending=[True, False, True,True])
    
    def __iter__(self):
        self._idx =0
        return self.play_by_play.__iter__()
    
    def __next__(self):
        self._idx+=1
        return self.__next__()
    
    def make_teams(self, game_lineup):
        team1_id, team2_id = np.unique(game_lineup['Team_id'])
        team1, team2 = {},{}
        for team_id, team_arr in izip((team1_id, team2_id), (team1, team2)):
            players_on_team = game_lineup[game_lineup['Team_id']==team_id]['Person_id'].unique()
            for player in players_on_team:

                player_data = game_lineup[game_lineup['Person_id'] == player] #ignore possessions
                active_to_start = 1 in set(player_data['Period'])
                team_arr[player] = Player(player_data.iloc[0], active_to_start)
                
        return team1, team2
    
    def compute_ratings(self):
        for event in self:
            if self.poss_team_0 == None:
                #At start of periods, don't know who has possession so use get_next_poss method to establish
                self.poss_team_0 = self._get_next_poss()
            self.score_event(event)
            self.possession_change(event)
            self.substitution(event)
            
        for team in self.teams:
            for player in team.itervalues():
                player.finalize_ratings()
                #print player.off_points, player.def_points
                print player.player_id, player.off_rating, player.def_rating

In [120]:
class Player(object):
    
    def __init__(self, player_data, active_to_start):
        self.player_id = player_data['Person_id']
        self.team_id = player_data['Team_id']
        self.active = active_to_start
        
        self.off_points = 0
        self.def_points = 0
        
        self.off_poss = 0
        self.def_poss = 0
        
        self.off_rating = None
        self.def_rating = None
        
    def finalize_ratings(self):
        print self.off_points
        print self.off_poss
        
        print self.def_points
        print self.def_poss
        if self.off_poss>0:
            self.off_rating = self.off_points*100.0/self.off_poss
        if self.def_poss>0:
            self.def_rating = self.def_points*100.0/self.def_poss

In [128]:
game0_id = GameLineup.iloc[0]['Game_id']
game0 = Game(game0_id, GameLineup, PlayByPlay)



In [129]:
game0.compute_ratings()

('poss change:', False)
('poss change:', False)
('poss change:', True)
before poss change True
0
True
('team0 poss:', True)
after poss change False
('poss change:', False)
('poss change:', None)
0
False
('team0 poss:', False)
('poss change:', True)
before poss change True
0
True
('team0 poss:', True)
after poss change False
('poss change:', None)
0
False
('team0 poss:', False)
('poss change:', False)
('poss change:', None)
('poss change:', False)
('poss change:', False)
('poss change:', False)
('poss change:', None)
('poss change:', None)
('poss change:', True)
before poss change True
0
True
('team0 poss:', True)
after poss change False
('poss change:', False)
('poss change:', True)
before poss change False
0
False
('team0 poss:', False)
after poss change True
('poss change:', None)
('poss change:', False)
('poss change:', False)
('poss change:', True)
before poss change True
0
True
('team0 poss:', True)
after poss change False
('poss change:', None)
0
False
('team0 poss:', False)
('po

In [114]:
PlayByPlay.iloc[0]

Game_id           006728e4c10e957011e1f24878e6054a
Event_Num                                        2
Event_Msg_Type                                  12
Period                                           1
WC_Time                                     546410
PC_Time                                       7200
Action_Type                                      0
Option1                                          0
Option2                                          0
Option3                                          0
Team_id           1473d70e5646a26de3c52aa1abd85b1f
Person1           0370a0d090da0d0edc6319f120187e0e
Person2           0370a0d090da0d0edc6319f120187e0e
Person3           0370a0d090da0d0edc6319f120187e0e
Team_id_type                                     0
Person1_type                                     0
Person2_type                                     0
Person3_type                                     0
Name: 0, dtype: object

In [115]:
def sortedPlayByPlay(PlayByPlay):
    """Returns PlayByPlay with nested sorted given by hackathon pdf file.
    
    Input:
        PlayByPlay: Pandas Dataframe from reading Play_by_Play csv file"""
    return PlayByPlay.sort_values(['Period', 'PC_Time', 'WC_Time', 'Event_Num'], ascending=[True, False, True,True])

In [125]:
game0PlayByPlay = sortedPlayByPlay(PlayByPlay[PlayByPlay['Game_id'] == game0_id])

In [141]:
game0PlayByPlay[117:146]


Unnamed: 0,Game_id,Event_Num,Event_Msg_Type,Period,WC_Time,PC_Time,Action_Type,Option1,Option2,Option3,Team_id,Person1,Person2,Person3,Team_id_type,Person1_type,Person2_type,Person3_type
117,006728e4c10e957011e1f24878e6054a,158,12,2,564120,7200,0,0,0,0,45ba8fc87f55b1191c50c400dc7ed11c,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,0,0,0
118,006728e4c10e957011e1f24878e6054a,159,1,2,564270,7040,1,2,0,0,45ba8fc87f55b1191c50c400dc7ed11c,4ef3dae16c436459ff05156abca5cebd,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,5,0,0
119,006728e4c10e957011e1f24878e6054a,160,1,2,564450,6860,79,2,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,2ad626904c8b28cceb8e12c624a84240,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,4,0,0
120,006728e4c10e957011e1f24878e6054a,161,2,2,564630,6690,101,2,0,0,45ba8fc87f55b1191c50c400dc7ed11c,44230324724c84f122ac62a5f0918314,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,5,0,0
121,006728e4c10e957011e1f24878e6054a,162,4,2,564670,6640,0,0,0,0,45ba8fc87f55b1191c50c400dc7ed11c,99104de2626f67c1fa2ce70504970c3f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,5,0,0
122,006728e4c10e957011e1f24878e6054a,163,1,2,564670,6640,97,2,0,0,45ba8fc87f55b1191c50c400dc7ed11c,99104de2626f67c1fa2ce70504970c3f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,5,0,0
123,006728e4c10e957011e1f24878e6054a,164,2,2,564740,6570,1,2,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,ae53f8ba6761b64a174051da817785bc,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,4,0,0
124,006728e4c10e957011e1f24878e6054a,166,4,2,564760,6540,0,0,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,99104de2626f67c1fa2ce70504970c3f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,5,0,0
125,006728e4c10e957011e1f24878e6054a,167,1,2,564820,6480,52,2,0,0,45ba8fc87f55b1191c50c400dc7ed11c,99104de2626f67c1fa2ce70504970c3f,4dd3d6a51dc97c651d3a86eec4362a1f,0370a0d090da0d0edc6319f120187e0e,3,5,5,0
126,006728e4c10e957011e1f24878e6054a,169,2,2,565020,6280,80,3,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,c5dd5b2e3b975f0849d9b74e74125cb9,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,4,0,0


In [161]:
game0 = Game(game0_id, GameLineup, game0PlayByPlay[117:140])
game0.compute_ratings()

('idx is', 0)
('play_e next poss', 1L)
['006728e4c10e957011e1f24878e6054a' 159L 1L 2L 564270L 7040L 1L 2L 0L 0L
 '45ba8fc87f55b1191c50c400dc7ed11c' '4ef3dae16c436459ff05156abca5cebd'
 '0370a0d090da0d0edc6319f120187e0e' '0370a0d090da0d0edc6319f120187e0e' 3L
 5L 0L 0L]
12
('poss change:', False)
1
('poss change:', True)
before poss change False
after poss change True
1
('poss change:', True)
before poss change True
after poss change False
2
('poss change:', None)
('idx is', 0)
('play_e next poss', 1L)
['006728e4c10e957011e1f24878e6054a' 159L 1L 2L 564270L 7040L 1L 2L 0L 0L
 '45ba8fc87f55b1191c50c400dc7ed11c' '4ef3dae16c436459ff05156abca5cebd'
 '0370a0d090da0d0edc6319f120187e0e' '0370a0d090da0d0edc6319f120187e0e' 3L
 5L 0L 0L]
('next pos:', False)
4
1
('poss change:', False)
('poss change:', True)
before poss change False
after poss change True
2
('poss change:', None)
('idx is', 0)
('play_e next poss', 1L)
['006728e4c10e957011e1f24878e6054a' 159L 1L 2L 564270L 7040L 1L 2L 0L 0L
 '45ba8fc



In [149]:
1L

1L