In [1]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Connect to db
connection = sqlite3.connect('analytics_track.db')
cursor = connection.cursor()

In [2]:
def run_query(qry):
    return pd.read_sql_query(qry, connection)

Tables:
* event_codes (Event_Msg_Type, Action_Type, Event_Msg_Type_Description, Action_Type_Description)
* game_lineup (Game_id,Period, Person_id, Team_id, status)
* play_by_play (Game_id, Event_Num, Event_Msg_Type, Period, WC_Time, PC_Time, Action_Type, Option1, Option2, Option3, Team_id, Person1, Person2,Team_id_type)

## Explore data from first game: 
Game_id = 021fd159b55773fba8157e2090fe0fe2

In [3]:
game1_lineup = run_query("SELECT * from game_lineup WHERE Game_id = '021fd159b55773fba8157e2090fe0fe2'")
game1_lineup.sort_values(by = ['Team_id', 'Period']).drop('index', axis=1).head()

Unnamed: 0,Game_id,Period,Person_id,Team_id,status
0,021fd159b55773fba8157e2090fe0fe2,1,881f83d2dee3f18c7d1751659406144e,012059d397c0b7e5a30a5bb89c0b075e,A
3,021fd159b55773fba8157e2090fe0fe2,1,cec898a1d355dbfbad8c760615fde1af,012059d397c0b7e5a30a5bb89c0b075e,A
5,021fd159b55773fba8157e2090fe0fe2,1,a99f44bbff39e352191a870e17f04537,012059d397c0b7e5a30a5bb89c0b075e,A
7,021fd159b55773fba8157e2090fe0fe2,1,89706b99ddd00dc05d37ef5cafc04276,012059d397c0b7e5a30a5bb89c0b075e,A
8,021fd159b55773fba8157e2090fe0fe2,1,2b313e2bcef0268bc8e9415132ba9997,012059d397c0b7e5a30a5bb89c0b075e,A


In [57]:
qry = """
SELECT Period, PC_Time, WC_Time, Event_Num, p.Event_Msg_Type, p.Action_Type, Event_Msg_Type_Description,
       Action_Type_Description, Option1, Option2, Team_id, Person1, Person2
FROM play_by_play p 
INNER JOIN event_codes e ON p.Event_Msg_Type = e.Event_Msg_Type AND p.Action_Type = e.Action_Type
WHERE p.Game_id = '021fd159b55773fba8157e2090fe0fe2'
ORDER BY Period asc, PC_Time desc, WC_Time asc, Event_Num asc
"""
game1_plays = run_query(qry)
game1_plays

Unnamed: 0,Period,PC_Time,WC_Time,Event_Num,Event_Msg_Type,Action_Type,Event_Msg_Type_Description,Action_Type_Description,Option1,Option2,Team_id,Person1,Person2
0,1,7200,546427,0,12,0,Start Period,,0,0,1473d70e5646a26de3c52aa1abd85b1f,6bcf6c1f8c373d25fca1579bc4464a91,6bcf6c1f8c373d25fca1579bc4464a91
1,1,7200,546495,1,10,0,Jump Ball,,0,0,012059d397c0b7e5a30a5bb89c0b075e,89706b99ddd00dc05d37ef5cafc04276,307beab25b1021a548b4a47550bc4b25
2,1,7050,546665,2,2,1,Missed Shot,Jump Shot,3,0,012059d397c0b7e5a30a5bb89c0b075e,cec898a1d355dbfbad8c760615fde1af,6bcf6c1f8c373d25fca1579bc4464a91
3,1,6960,546714,3,4,0,Rebound,Unknown,0,0,012059d397c0b7e5a30a5bb89c0b075e,307beab25b1021a548b4a47550bc4b25,6bcf6c1f8c373d25fca1579bc4464a91
4,1,6920,546886,6,6,4,Foul,Offensive,0,0,cff694c8186a4bd377de400e4f60fe47,c00264c3114d23bac482e9de50fb7d28,89706b99ddd00dc05d37ef5cafc04276
5,1,6920,546887,7,5,5,Turnover,Foul,0,0,cff694c8186a4bd377de400e4f60fe47,c00264c3114d23bac482e9de50fb7d28,6bcf6c1f8c373d25fca1579bc4464a91
6,1,6820,547110,8,6,1,Foul,Personal,0,0,012059d397c0b7e5a30a5bb89c0b075e,57bbd7e30bc694aeee9ee40c583e6811,89706b99ddd00dc05d37ef5cafc04276
7,1,6740,547220,9,1,49,Made Shot,Driving Dunk Shot,2,0,012059d397c0b7e5a30a5bb89c0b075e,a99f44bbff39e352191a870e17f04537,881f83d2dee3f18c7d1751659406144e
8,1,6580,547395,10,1,1,Made Shot,Jump Shot,2,0,cff694c8186a4bd377de400e4f60fe47,57bbd7e30bc694aeee9ee40c583e6811,c00264c3114d23bac482e9de50fb7d28
9,1,6450,547524,12,2,1,Missed Shot,Jump Shot,3,0,012059d397c0b7e5a30a5bb89c0b075e,881f83d2dee3f18c7d1751659406144e,6bcf6c1f8c373d25fca1579bc4464a91


## Notes about Data

* Event associated with neither team at the start of every game:
    * Event_Msg_type = 12, which signals start of the game.
    * Team_id_type = 0 & Team_id = 1473d70e5646a26de3c52aa1abd85b1f
    
* None values for Player Column:
    * Player_id = 6bcf6c1f8c373d25fca1579bc4464a91	

In [5]:
# first = run_query("SELECT * from play_by_play WHERE Team_id_type = 0")
# first = run_query("SELECT * from play_by_play WHERE Team_id = '1473d70e5646a26de3c52aa1abd85b1f'")
# first

# Game Class & Game Simulator:

In [12]:
from collections import deque

In [3]:
class Game:
           
    # Initialize a new game object with: GameID, & TeamID's
    def __init__(self, game_id, team1_id, team2_id):
        self.game = game_id
        self.team1 = team1_id
        self.team2 = team2_id 
        self.team1_score = 0
        self.team2_score = 0

        # Dictionary with Player_id:Active Status
        self.team1_active = {}
        self.team2_active = {}

        # Dictionary with Player_id:Plus/Minus
        self.team1_pm = {}
        self.team2_pm = {}

    ### Update Plus/Minus stats and score:  ###
    
    def update_score(self, team_id, points):
        # Plus/Minus points relative to TEAM 1
        if team_id == self.team1:
            self.team1_score += points
            pm = points
        elif team_id == self.team2:
            self.team2_score += points
            pm = -points
            
        for player, active in self.team1_active.items():
            if active:
                try:
                    self.team1_pm[player] += pm
                except KeyError:
                    self.team1_pm[player] = pm

        for player, active in self.team2_active.items():
            if active:
                try:
                    self.team2_pm[player] -= pm
                except KeyError:
                    self.team2_pm[player] = -pm           

    ### Updating active players ###       
    
    def get_starter(self, team_id, period):
        ### Gets the starting 5 for each quarter ###
        ### Returns iterable ###
        df = run_query("SELECT Person_id FROM game_lineup WHERE Game_id='{}' AND Team_id='{}' AND Period={}".format(self.game,team_id,period))
        return df.Person_id
    
    def start_period(self, period):
        tm1_starters = self.get_starter(self.team1, period)
        tm2_starters = self.get_starter(self.team2, period)
        for player in tm1_starters:
            self.team1_active[player] = True
        for player in tm2_starters:
            self.team2_active[player] = True
            
    def end_period(self):
        for player in self.team1_active:
            self.team1_active[player] = False
        for player in self.team2_active:
            self.team2_active[player] = False            
        
    def make_substitution(self,leaving, entering):
        ### Changes the active state of the leaving and entering players.
        ### Checks the active players sets instead of TeamID provided because
        ### original dataset is inconsistent for that field.
        if leaving in self.team1_active.keys():
            self.team1_active[leaving] = False
            self.team1_active[entering] = True
        elif leaving in self.team2_active.keys():
            self.team2_active[leaving] = False
            self.team2_active[entering] = True
        else:
            print('ERROR: Player', leaving, "not found on either team found.")

    def get_plusminus(self):
        return [self.team1_pm, self.team2_pm]

### Query to get play-by-play for a specific GameID:

In [4]:
plays_query = """
SELECT Period, PC_Time, WC_Time, Event_Num, p.Event_Msg_Type, p.Action_Type, Event_Msg_Type_Description,
       Action_Type_Description, Option1, Option2, Team_id, Person1, Person2
FROM play_by_play p 
INNER JOIN event_codes e 
    ON p.Event_Msg_Type = e.Event_Msg_Type 
    AND p.Action_Type = e.Action_Type
WHERE p.Game_id = '{}'
ORDER BY Period asc, PC_Time desc, WC_Time asc, Event_Num asc;
"""

### Game simulator that reads the table above and updates the stats:

In [61]:
from collections import deque

def game_simulation(game_id):
    teams_list = run_query("SELECT distinct Team_id from game_lineup WHERE Game_id = '{}'".format(game_id)).Team_id
    game = Game(game_id, teams_list[0], teams_list[1])
    play_by_play = run_query(plays_query.format(game_id))
    
    # To keep track of fouls/and subs for special case:
    foul_state = False
    foul_time = 0
    sub_queue = deque([])
    
    for play in play_by_play.itertuples():
        event_type = play.Event_Msg_Type
                       
        if foul_state:
            if event_type not in [3,8,9] and play.PC_Time != foul_time:  # If action after foul is not a free throw or substitution
                while len(sub_queue) > 0:   # make the substitutions (after PM was updated)
                    sub = sub_queue.popleft()
                    game.make_substitution(sub[0], sub[1])
                foul_state = False
            elif event_type == 8:   # Substitution
                sub_queue.append((play.Person1, play.Person2))
                continue
                
        if event_type in [2, 4, 5, 7, 9, 10, 11]:
        # Ignore Missed Shot, Rebound, Turnover, Violation, Timeout, JumpBall
            continue
            
        elif event_type == 12:  # Start Period
            game.start_period(play.Period)
            print('Start period', play.Period)
            
        elif event_type == 13:  # End Period
            game.end_period()
            print('End of Period', play.Period)
            print('.......... Score:', game.team1_score, '-', game.team2_score)
            
        elif event_type == 1:  # Made Shot
            game.update_score(play.Team_id, play.Option1)
            
        elif event_type == 3:  # Free Throw
            if play.Option1 == 1:
                game.update_score(play.Team_id, play.Option1)
                
        elif event_type == 6:  # Foul
            foul_state = True
            foul_time = play.PC_Time

        elif event_type == 8:  # Substitution
            game.make_substitution(play.Person1, play.Person2)
            
    return game

In [62]:
g1 = game_simulation('021fd159b55773fba8157e2090fe0fe2')

Start period 1
End of Period 1
.......... Score: 29 - 16
Start period 2
End of Period 2
.......... Score: 57 - 42
Start period 3
End of Period 3
.......... Score: 79 - 68
Start period 4
End of Period 4
.......... Score: 93 - 91


## Tests to make sure functions are working:

In [63]:
print(g1.team1_score, g1.team2_score)

93 91


In [64]:
print(g1.team1_active, g1.team2_active)

{'881f83d2dee3f18c7d1751659406144e': False, 'cec898a1d355dbfbad8c760615fde1af': False, 'a99f44bbff39e352191a870e17f04537': False, '89706b99ddd00dc05d37ef5cafc04276': False, '2b313e2bcef0268bc8e9415132ba9997': False, 'd6bc5eb6ed54c734628cb4d237f99031': False, '1362d9ad805e7cc4ea3a9467c91142d6': False, 'b2fada91a0c8a0f400623c425286ae2e': False, '2d9594e4d250679e7a3383ad5ccd2f44': False, '314c71574689f752c17b83cd6a4d770e': False} {'27ea17a8685c4919f157e83fe9cb2d9e': False, '57bbd7e30bc694aeee9ee40c583e6811': False, '33963fe856a1523ff46438ba07d1d99f': False, 'c00264c3114d23bac482e9de50fb7d28': False, '307beab25b1021a548b4a47550bc4b25': False, '603daefb2fa321f3abab289805fdcc25': False, '60b5f9f68ccbbf7ba3fa13a2189e2775': False, 'c1fe6ed491d04e3cdf50755dff23b263': False, '8bb18fb1d0cc10c985afa823ecec6d18': False}


## Research to see if this game data matches any real scores:
Sixers vs Timberwolves 1/3/2017

https://www.google.com/search?q=sixers+timberwolves+1%2F3%2F2017&oq=sixers+timberwolves+1%2F3%2F2017&aqs=chrome..69i57.7643j0j7&sourceid=chrome&ie=UTF-8

