In [76]:
import numpy as np
import pandas as pd 
from itertools import izip

Offensive Rating is defined as the team points scored per 100 possessions while the player is on the court.

Defensive Rating is defined as the number of points per 100 possessions that the team alows while that individual player is on the court. 

A possession is ended by
 -1 made field goal attempts
 -2 Made final free throw attempt
 -3 Missed final free throw attempt that results in a defensive reboud
 -4 Missed field goal attempt that resutls in a defensive rebound
 -5 turnover
 -6 end of time period

In [77]:
EventCodes = pd.read_csv('Event_Codes.txt', sep='\t')
#Strip excess whitespace that I noticed in Event Message Type Description
EventCodes['Event_Msg_Type_Description'] = EventCodes['Event_Msg_Type_Description'].map(str.strip)
#eventCodes['Action_Type_Description'] = eventCodes['Action_Type_Description'].map(str.strip)

GameLineup = pd.read_csv('Game_Lineup.txt', sep='\t')
PlayByPlay = pd.read_csv('Play_by_Play.txt', sep='\t')

In [78]:
from collections import defaultdict

In [79]:
EventScoringValues = defaultdict(dict) #the point value of a play, for the offensive team
EventPossChange= defaultdict(dict) #whether the team could constitute a change of possesion
EventSub = defaultdict(dict)

In [80]:
for row in EventCodes.iterrows():
    #initialize
    e_type, a_type = row[1]['Event_Msg_Type'], row[1]['Action_Type']
    EventScoringValues[e_type][a_type] = 0
    EventPossChange[e_type][a_type] = False
    EventSub[e_type][a_type] = False

In [81]:
_,idx1 = np.unique(EventCodes['Event_Msg_Type'], return_index=True)
_,idx2 = np.unique(EventCodes['Action_Type'], return_index=True)

In [82]:
idx = np.array(list(set(idx1).union(idx2)))
idx.sort()

In [83]:
for i in idx:
    print EventCodes['Event_Msg_Type'][i], EventCodes['Event_Msg_Type_Description'][i],EventCodes['Action_Type'][i], EventCodes['Action_Type_Description'][i]

1 Made Shot 0 No Shot
1 Made Shot 1 Jump Shot
1 Made Shot 2 Running Jump Shot
1 Made Shot 3 Hook Shot
1 Made Shot 4 Tip Shot
1 Made Shot 5 Layup Shot
1 Made Shot 6 Driving Layup Shot
1 Made Shot 7 Dunk Shot
1 Made Shot 8 Slam Dunk Shot
1 Made Shot 9 Driving Dunk Shot
1 Made Shot 40 Layup Shot
1 Made Shot 41 Running Layup Shot
1 Made Shot 42 Driving Layup Shot
1 Made Shot 43 Alley Oop Layup shot
1 Made Shot 44 Reverse Layup Shot
1 Made Shot 45 Jump Shot
1 Made Shot 46 Running Jump Shot
1 Made Shot 47 Turnaround Jump Shot
1 Made Shot 48 Dunk Shot
1 Made Shot 49 Driving Dunk Shot
1 Made Shot 50 Running Dunk Shot
1 Made Shot 51 Reverse Dunk Shot
1 Made Shot 52 Alley Oop Dunk Shot
1 Made Shot 53 Tip Shot
1 Made Shot 54 Running Tip Shot
1 Made Shot 55 Hook Shot
1 Made Shot 56 Running Hook Shot
1 Made Shot 57 Driving Hook Shot
1 Made Shot 58 Turnaround Hook Shot
1 Made Shot 59 Finger Roll Shot
1 Made Shot 60 Running Finger Roll Shot
1 Made Shot 61 Driving Finger Roll Shot
1 Made Shot 62 Turna

In [84]:
#Hardcore the scoring values of points
for action in EventScoringValues[1]:
    EventScoringValues[1][action] = 2 # i think some of these will be to be 3's
    
for action in EventScoringValues[3]:
    # are all free throw worth 1? technical?
    EventScoringValues[3][action] = 1
    
#all others 0

In [85]:
for action in EventPossChange[1]:
    EventPossChange[1][action] = True

for action in EventPossChange[2]:
    #Instead of checking who has next possession on rebounds, check for every missed shot 
    #(Play by play will have 'false rebounds', for example after a missed ft that isn't the last one)
    EventPossChange[2][action] = True

    
free_throws = EventCodes['Event_Msg_Type'] == 3
for action in EventPossChange[3]:
    if action in (10, 12, 15):
        #Only potential change of possession on last shot of normal fts (not techincal, clear path, or flagrant)
        #Even if ft is made, I believe using the next poss logic should work, but may have to split into made vs missed
        EventPossChange[3][action] = True
        
#for action in EventPossChange[4]:
#    EventPossChange[4][action] = None # all rebounds have to be checked
    
    
for action in EventPossChange[5]:
    EventPossChange[5][action] = True
    
#for action in EventPossChange[10]:
#    EventPossChange[10][action] = None # same with jump balls
    
#for action in EventPossChange[12]:
#    EventPossChange[12][action] = None # same with start and end of periods
           
#for action in EventPossChange[13]:
#    EventPossChange[13][action] = None 

In [86]:
for action in EventSub[8]:
    EventSub[8][action] = True
    
for action in EventPossChange[12]:
    EventSub[12][action] = True

### Order of operations
For Game in Games:

    For possession in Game:
        for player in possession:
            update scoring
    normalize by count

In [87]:
class Game(object):
    
    def __init__(self, game_id, game_lineup, play_by_play):
        
        self.game_id = game_id
        self.play_by_play = self.chunk_pbp(play_by_play[play_by_play['Game_id'] == game_id])
        
        # tuple of lists of each player on the team
        self.teams = self.make_teams(game_lineup[game_lineup['Game_id']==game_id])
        self.poss_team_0 = None
        
        self.lineup_by_period =\
        [game_lineup[np.logical_and(game_lineup['Game_id']==game_id, \
                                    game_lineup['Period'] == i)] for i in xrange(1,5)]
    
    def score_event(self, idx, event):
        
        if self.poss_team_0 is None:
            pass # possession has not been established, no scoring plays will have occurred
        for play in event:
            play_e, play_a = play[2], play[6]
            play_value = EventScoringValues[play_e][play_a]
            
            if play_value == 0:
                continue
            play_value = play[7]
            if (play_e == 3) and (play_value != 1):
                #On free throws, only a made ft if play_value ('Option1' = 1), it is sometimes 2 or another non-zero number
                continue

            for team, offense_or_defense in zip(self.teams, (self.poss_team_0, not self.poss_team_0)):
                for player in team.itervalues():
                    if player.active:
                        if offense_or_defense: #team 0 on offense?
                            player.off_points+=play_value
                        else:
                            player.def_points+=play_value
                        
        
    def possession_change(self, idx,event):
        #TODO only change posssesion Made Shot, Missed Shot, FT, TO, EoP
        # Check if team has actually changed , not automatically flipping it. 
        # winston has an idea of propogating it back, but subs fuck that up. 
        for play in event:
            play_e, play_a = play[2], play[6]
            poss_change = EventPossChange[play_e][play_a]
            
            #print ('poss change:', poss_change)
            if poss_change: #ambiguous, have to check
                #Don't actually thinks this works in all cases. 
                
                # TODO this assume's were using iter properly
                next_poss_team_0 = self._get_next_poss(idx)
                if self.poss_team_0 != next_poss_team_0:
                    self._add_possessions()
                self.poss_team_0 = next_poss_team_0
                break
                
        if self._EoP_in_event(event):
            self.poss_team_0 = None
                
    def _EoP_in_event(self, event):
        return event[0][5] == 0
       
    def _get_next_poss(self, idx):
        #lift the doPossessionFix from winston's codce
        if self._EoP_in_event(self.play_by_play[idx]):
            return not self.poss_team_0
        
        for event in self.play_by_play[idx+1:]:
            for play in event:
                # gonna have to check that this makes sense...
                play_e, play_a = play[2], play[6]
                #print ('play_e next poss', play_e)
                #print play
                #Only made shot, missed shot, free throw, turnover, and end of period establishes posession
                if play_e in (1, 2, 3, 5, 13):
                    #may need to check on free throw (event 3) that it doesn't correspond to No Shot (action type 0)
                    return play[11] in self.teams[0]
            
    def _add_possessions(self):
        
            for team, offense_or_defense in zip(self.teams, (self.poss_team_0, not self.poss_team_0)):
                for player in team.itervalues():
                    if player.active:
                        if offense_or_defense: #team 0 on offense?
                            player.off_poss+=1
                        else:
                            player.def_poss+=1
                            
                            
    def substitution(self,idx, event):
        for play in event:
            play_e, play_a = play[2], play[6]
            sub = EventSub[play_e][play_a]
            
            if sub:
                if play_e == 8: #Garden variety substitution
                    outgoing_player, ingoing_player = play[11], play[12]
                    if outgoing_player in self.teams[0]:
                        self.teams[0][outgoing_player].active = False
                        self.teams[0][ingoing_player].active = True

                    else:
                        self.teams[1][outgoing_player].active = False
                        self.teams[1][ingoing_player].active = True
                        
                else: #start of period
                    period = play[3]-1
                    if period == 0:
                        continue # we handle this on startup
                        
                    for team in self.teams:
                        for player in team.itervalues():
                            player.active = False # turn everyone off
                    
                    period_lineup = self.lineup_by_period[period]
                    for _, row in period_lineup.iterrows():
                        player_id = row[2]
                        
                        active = row[4] == 'A'
                        if player_id in self.teams[0]:
                            self.teams[0][player_id].active = active
                        else:
                            self.teams[1][player_id].active = active

    
    def chunk_pbp(self, game_pbp):
        sorted_pbp = self.sort_pbp(game_pbp)
        real_time = sorted_pbp['PC_Time'].as_matrix()+  (4-sorted_pbp['Period'].as_matrix())*7200
        _, unique_idxs = np.unique(real_time, return_index=True)
        return np.split(game_pbp.as_matrix(), indices_or_sections=unique_idxs[::-1] )[1:] # remove the first one thats empty
        
    def sort_pbp(self, game_pbp):
        return game_pbp.sort_values(['Period', 'PC_Time', 'WC_Time', 'Event_Num'],\
                                    ascending=[True, False, True,True])
    
    def make_teams(self, game_lineup):
        team1_id, team2_id = np.unique(game_lineup['Team_id'])
        team1, team2 = {},{}
        for team_id, team_arr in izip((team1_id, team2_id), (team1, team2)):
            players_on_team = game_lineup[game_lineup['Team_id']==team_id]['Person_id'].unique()
            for player in players_on_team:

                player_data = game_lineup[game_lineup['Person_id'] == player] #ignore possessions
                active_to_start = 1 in set(player_data['Period'])
                team_arr[player] = Player(player_data.iloc[0], active_to_start)
                
        return team1, team2
    
    def compute_ratings(self):
        
        for idx, event in enumerate(self.play_by_play):
            if self.poss_team_0 == None:
                #At start of periods, don't know who has possession so use get_next_poss method to establish
                self.poss_team_0 = self._get_next_poss(idx)
            self.score_event(idx, event)
            self.possession_change(idx, event)
            self.substitution(idx, event)
            
        for team in self.teams:
            for player in team.itervalues():
                player.finalize_ratings()
                #print player.off_points, player.def_points
                print player.player_id, player.off_rating, player.def_rating

In [96]:
class Player(object):
    
    def __init__(self, player_data, active_to_start):
        self.player_id = player_data['Person_id']
        self.team_id = player_data['Team_id']
        self.active = active_to_start
        
        self.off_points = 0
        self.def_points = 0
        
        self.off_poss = 0
        self.def_poss = 0
        
        self.off_rating = None
        self.def_rating = None
        
    def finalize_ratings(self):
        print self.off_points
        print self.off_poss
        print 
        print self.def_points
        print self.def_poss
        if self.off_poss>0:
            self.off_rating = self.off_points*100.0/self.off_poss
        if self.def_poss>0:
            self.def_rating = self.def_points*100.0/self.def_poss

In [97]:
game0_id = GameLineup.iloc[0]['Game_id']
game0 = Game(game0_id, GameLineup, PlayByPlay)

In [98]:
game0_id

'006728e4c10e957011e1f24878e6054a'

In [99]:
game0.compute_ratings()

0
0

0
0
ed95dff5440fadf3042b5acacea81eed None None
81
74

80
71
766802a8fda500d7945950de7398c9c6 109.459459459 112.676056338
34
35

41
32
2ad626904c8b28cceb8e12c624a84240 97.1428571429 128.125
80
69

76
66
42e0d7167f04a4ff958c6442da0e6851 115.942028986 115.151515152
0
0

0
0
e59b921ab3da55f632bc748beb12805a None None
66
57

70
57
8d2127290c94bd41b82a2938734bc750 115.789473684 122.807017544
28
29

30
28
f4a5ca938177c407a9dab5412e39498f 96.5517241379 107.142857143
0
0

0
0
bd45fe7dba52aa2cd00ba80ff107d05b None None
0
0

0
0
dd1da128c27db468d95b99b583f8a57d None None
14
15

14
14
ae53f8ba6761b64a174051da817785bc 93.3333333333 100.0
61
48

56
47
618f6d58ab2881152607c2a6e057bc51 127.083333333 119.14893617
0
0

0
0
e816ff284dc3f965b8f3d605a3b91bae None None
86
79

91
78
c5dd5b2e3b975f0849d9b74e74125cb9 108.860759494 116.666666667
0
0

0
0
c10b49616a2f4a23607dc1a8be4fde9f None None
30
34

32
32
5db9c1c8184510fee8161e7fafdc9c49 88.2352941176 100.0
0
0

0
0
7027df5d9c51192f3527ac8c74b4d08d Non

In [91]:
def sortedPlayByPlay(PlayByPlay):
    """Returns PlayByPlay with nested sorted given by hackathon pdf file.
    
    Input:
        PlayByPlay: Pandas Dataframe from reading Play_by_Play csv file"""
    return PlayByPlay.sort_values(['Period', 'PC_Time', 'WC_Time', 'Event_Num'], ascending=[True, False, True,True])

In [92]:
game0PlayByPlay = sortedPlayByPlay(PlayByPlay[PlayByPlay['Game_id'] == game0_id])

In [93]:
game0 = Game(game0_id, GameLineup, game0PlayByPlay[117:140])
game0.compute_ratings()

ed95dff5440fadf3042b5acacea81eed None None
766802a8fda500d7945950de7398c9c6 None None
2ad626904c8b28cceb8e12c624a84240 40.0 133.333333333
42e0d7167f04a4ff958c6442da0e6851 None None
e59b921ab3da55f632bc748beb12805a None None
8d2127290c94bd41b82a2938734bc750 None None
f4a5ca938177c407a9dab5412e39498f 40.0 133.333333333
bd45fe7dba52aa2cd00ba80ff107d05b None None
dd1da128c27db468d95b99b583f8a57d None None
ae53f8ba6761b64a174051da817785bc 40.0 133.333333333
618f6d58ab2881152607c2a6e057bc51 None None
e816ff284dc3f965b8f3d605a3b91bae None None
c5dd5b2e3b975f0849d9b74e74125cb9 40.0 133.333333333
c10b49616a2f4a23607dc1a8be4fde9f None None
5db9c1c8184510fee8161e7fafdc9c49 40.0 133.333333333
7027df5d9c51192f3527ac8c74b4d08d None None
e814950408915f43de2b079dce7c21c5 None None
4ef3dae16c436459ff05156abca5cebd 133.333333333 40.0
4dd3d6a51dc97c651d3a86eec4362a1f 133.333333333 40.0
d00d552bc4e50f63f9397a74514519ba None None
616281dee946056b071699476fdee9ec None None
5cce6ffa455e6372d9de0de400482ab6 1

In [94]:
PlayByPlay

Unnamed: 0,Game_id,Event_Num,Event_Msg_Type,Period,WC_Time,PC_Time,Action_Type,Option1,Option2,Option3,Team_id,Person1,Person2,Person3,Team_id_type,Person1_type,Person2_type,Person3_type
0,006728e4c10e957011e1f24878e6054a,2,12,1,546410,7200,0,0,0,0,1473d70e5646a26de3c52aa1abd85b1f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0,0,0,0
1,006728e4c10e957011e1f24878e6054a,4,10,1,546420,7200,0,0,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,8d2127290c94bd41b82a2938734bc750,99104de2626f67c1fa2ce70504970c3f,766802a8fda500d7945950de7398c9c6,2,4,5,4
2,006728e4c10e957011e1f24878e6054a,7,1,1,546590,7020,79,2,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,42e0d7167f04a4ff958c6442da0e6851,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,4,0,0
3,006728e4c10e957011e1f24878e6054a,8,6,1,546620,7020,2,0,0,1,01be0ad4af7aeb1f6d2cc2b6b6d6d811,616281dee946056b071699476fdee9ec,42e0d7167f04a4ff958c6442da0e6851,0b1c631effab25d4af2d16fc123c3d31,2,5,4,1
4,006728e4c10e957011e1f24878e6054a,10,3,1,546810,7020,10,1,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,42e0d7167f04a4ff958c6442da0e6851,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,4,0,0
5,006728e4c10e957011e1f24878e6054a,11,1,1,546970,6900,79,2,0,0,45ba8fc87f55b1191c50c400dc7ed11c,e814950408915f43de2b079dce7c21c5,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,5,0,0
6,006728e4c10e957011e1f24878e6054a,12,2,1,547190,6680,63,2,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,42e0d7167f04a4ff958c6442da0e6851,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,4,0,0
7,006728e4c10e957011e1f24878e6054a,13,4,1,547200,6660,0,0,0,0,01be0ad4af7aeb1f6d2cc2b6b6d6d811,616281dee946056b071699476fdee9ec,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,2,5,0,0
8,006728e4c10e957011e1f24878e6054a,14,2,1,547260,6610,41,2,1,0,45ba8fc87f55b1191c50c400dc7ed11c,48ec4e6c52f418d5ca4ef510ba473ea0,0370a0d090da0d0edc6319f120187e0e,8d2127290c94bd41b82a2938734bc750,3,5,0,4
9,006728e4c10e957011e1f24878e6054a,15,4,1,547280,6610,0,0,0,0,45ba8fc87f55b1191c50c400dc7ed11c,42e0d7167f04a4ff958c6442da0e6851,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,3,4,0,0
