In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import glob
import tqdm
import statsapi
import warnings
import pybaseball
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
#define bo_states. The main thing this affects is the index. We want to be consistent, meaning index 4 should 
#always be '1___-0'
bo_states = ['___-0', '__3-0', '_2_-0', '_23-0', '1__-0', '1_3-0', '12_-0',
       '123-0', '___-1', '__3-1', '_2_-1', '_23-1', '1__-1', '1_3-1',
       '12_-1', '123-1', '___-2', '__3-2', '_2_-2', '_23-2', '1__-2',
       '1_3-2', '12_-2', '123-2']

In [3]:
#get the game_pks scraped by Caleb/Tristan
game_files = glob.glob('../baseball-scraping/games/*.csv')
gamepks = [int(game.split('_')[1]) for game in game_files]

states = pd.read_csv('start_nodes.csv')

In [4]:
# events = set()
# for gamepk in gamepks:
#     file = pd.read_csv(f'baseball-scraping/games/game_{gamepk}_decisions.csv').sort_values(by = 'At_Bat')
#     events = events | set(file.Event_Type.unique())
# events

In [5]:
def get_batting_order_spot(bo):
    if bo >= 900:
        return 8
    if bo >= 800:
        return 7
    if bo >= 700:
        return 6 
    if bo >= 600:
        return 5
    if bo >= 500:
        return 4
    if bo >= 400:
        return 3
    if bo >= 300:
        return 2
    if bo >= 200:
        return 1
    return 0

In [6]:
sc23 = pybaseball.statcast('2023-01-01', '2023-12-01')

This is a large query, it may take a moment to complete


  0%|                                                                                          | 0/246 [00:00<?, ?it/s]

Skipping offseason dates
Skipping offseason dates


100%|████████████████████████████████████████████████████████████████████████████████| 246/246 [03:03<00:00,  1.34it/s]


In [7]:
def define_all_nodes(gamepk):
    game_p = pd.read_csv(f'../baseball-scraping/games/game_{gamepk}_decisions.csv').sort_values(by = 'At_Bat')
    if game_p.shape[0] == 0:
        return pd.DataFrame()
    game_p = game_p.loc[game_p.Event_Type.isin(['Balk', 'Catcher Interference', 'Caught Stealing 2B',
     'Caught Stealing 3B',
     'Caught Stealing Home',
     'Defensive Indiff', 'Disengagement Violation', 'Ejection', 'Game Advisory', 'Injury',
                                               'Other Advance',
     'Passed Ball',
     'Pickoff 1B',
     'Pickoff 2B',
     'Pickoff 3B',
     'Pickoff Caught Stealing 2B',
     'Pickoff Caught Stealing 3B',
     'Pickoff Caught Stealing Home',
     'Pickoff Error 1B',
     'Pickoff Error 2B',
     'Pickoff Error 3B','Runner Out',
     'Runner Placed On Base',
     'Stolen Base 2B',
     'Stolen Base 3B',
     'Stolen Base Home','Umpire Substitution','Wild Pitch']) == False]
    #sometimes gameday says there are three outs, so fix that using statcast
    if game_p.Outs.max() > 2:
        scg = sc23.loc[sc23.game_pk == gamepk].sort_values(by = 'at_bat_number')
        problem_abs = game_p.loc[game_p.Outs > 2, 'At_Bat'].values
        corrected_outs = []
        for ab in problem_abs:
            corrected_outs.append(scg.loc[scg.at_bat_number == ab, 'outs_when_up'].values[0])
        game_p.loc[game_p.Outs > 2, 'Outs'] = corrected_outs
    
    
    #define the baserunner out state in the format I use 
    game_p['thirdbase'] = '_'
    game_p.loc[np.isnan(game_p.Third_Base) == False, 'thirdbase'] = '3'
    game_p['secondbase'] = '_'
    game_p.loc[np.isnan(game_p.Second_Base) == False, 'secondbase'] = '2'
    game_p['firstbase'] = '_'
    game_p.loc[np.isnan(game_p.First_Base) == False, 'firstbase'] = '1'
    game_p['bo_state'] = game_p.firstbase + game_p.secondbase + game_p.thirdbase + '-' + game_p.Outs.astype(str)
    
    #define is_top_of_inning
    game_p['is_top_of_inning'] = 0
    game_p.loc[game_p.Half == 'Top', 'is_top_of_inning'] = 1

    gameplays = states.loc[states.game_pk == gamepk]
    subsp = game_p.loc[(game_p.Is_Decision) & (game_p.Event_Type != 'Intent Walk')]
    pinch_hits = subsp.loc[subsp.Event_Type == 'Offensive Substitution']
    def_sub = subsp.loc[subsp.Event_Type.isin(['Defensive Sub', 'Defensive Switch'])]
    pit_change = subsp.loc[subsp.Event_Type == 'Pitching Substitution']
    game_p = game_p.loc[(game_p.Is_Decision == False) | (game_p.Event_Type == 'Intent Walk')]
    

    #for the substitutions, we need to know who came in for whom, which we can get from the statsapi
    sapi = statsapi.get('game', {'gamePk': gamepk})['liveData']['plays']['allPlays']
    for i in range(1,game_p.shape[0]): #we start with the second play of the game
        #initialize the nodes and newplay dataframe
        nodes = pd.DataFrame()
        newplay = gameplays[['game_date', 'game_pk']].iloc[[0]]
        newplay['batter_per_game'] = game_p.At_Bat.values[i]
        newplay[['home_team_name', 'away_team_name', 'home_team_won']] = gameplays[['home_team_name', 'away_team_name', 'home_team_won']].values[0]
        newplay[['is_top_of_inning', 'inning']] = game_p[['is_top_of_inning', 'Inning']].values[i]

        #define the new bo_state_index. 
        newplay['bo_state_index'] = bo_states.index(game_p.bo_state.values[i])

        #define score diff
        newplay['home_score_diff'] = game_p.Score_Deficit.values[i]

        #initialize the batter indices, lineups, pitchers, benches, and bullpens to the previous play's values
        newplay[['home_batter_index', 'away_batter_index',
        'home_batting_order_0', 'home_batting_order_1', 'home_batting_order_2',
        'home_batting_order_3', 'home_batting_order_4', 'home_batting_order_5',
        'home_batting_order_6', 'home_batting_order_7', 'home_batting_order_8',
        'home_dh', 'home_lost_dh', 'home_pitcher', 'home_bullpen_0',
        'home_bullpen_1', 'home_bullpen_2', 'home_bullpen_3', 'home_bullpen_4',
        'home_bullpen_5', 'home_bullpen_6', 'home_bullpen_7', 'home_bullpen_8',
        'home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3',
        'away_batting_order_0', 'away_batting_order_1', 'away_batting_order_2',
        'away_batting_order_3', 'away_batting_order_4', 'away_batting_order_5',
        'away_batting_order_6', 'away_batting_order_7', 'away_batting_order_8',
        'away_dh', 'away_lost_dh', 'away_pitcher', 'away_bullpen_0',
        'away_bullpen_1', 'away_bullpen_2', 'away_bullpen_3', 'away_bullpen_4',
        'away_bullpen_5', 'away_bullpen_6', 'away_bullpen_7', 'away_bullpen_8',
        'away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 
        'home_pitcher_batters_faced', 'home_pitcher_can_be_subbed',
        'away_pitcher_batters_faced', 'away_pitcher_can_be_subbed', 'node_type', 'home_bullpen_9', 'home_bullpen_10',
           'home_bullpen_11', 'home_bullpen_12', 'home_bullpen_13',
           'home_bullpen_14', 'home_bench_4', 'home_bench_5', 'away_bullpen_9', 'away_bullpen_10', 'away_bullpen_11',
           'away_bullpen_12', 'away_bullpen_13', 'away_bullpen_14',
        'away_bench_4', 'away_bench_5', 'terminal_value', 'action_taken']] = gameplays[['home_batter_index', 'away_batter_index',
        'home_batting_order_0', 'home_batting_order_1', 'home_batting_order_2',
        'home_batting_order_3', 'home_batting_order_4', 'home_batting_order_5',
        'home_batting_order_6', 'home_batting_order_7', 'home_batting_order_8',
        'home_dh', 'home_lost_dh', 'home_pitcher', 'home_bullpen_0',
        'home_bullpen_1', 'home_bullpen_2', 'home_bullpen_3', 'home_bullpen_4',
        'home_bullpen_5', 'home_bullpen_6', 'home_bullpen_7', 'home_bullpen_8',
        'home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3',
        'away_batting_order_0', 'away_batting_order_1', 'away_batting_order_2',
        'away_batting_order_3', 'away_batting_order_4', 'away_batting_order_5',
        'away_batting_order_6', 'away_batting_order_7', 'away_batting_order_8',
        'away_dh', 'away_lost_dh', 'away_pitcher', 'away_bullpen_0',
        'away_bullpen_1', 'away_bullpen_2', 'away_bullpen_3', 'away_bullpen_4',
        'away_bullpen_5', 'away_bullpen_6', 'away_bullpen_7', 'away_bullpen_8',
        'away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 
        'home_pitcher_batters_faced', 'home_pitcher_can_be_subbed',
        'away_pitcher_batters_faced', 'away_pitcher_can_be_subbed','node_type', 'home_bullpen_9', 'home_bullpen_10',
           'home_bullpen_11', 'home_bullpen_12', 'home_bullpen_13',
           'home_bullpen_14', 'home_bench_4', 'home_bench_5', 'away_bullpen_9', 'away_bullpen_10', 'away_bullpen_11',
           'away_bullpen_12', 'away_bullpen_13', 'away_bullpen_14',
        'away_bench_4', 'away_bench_5', 'terminal_value', 'action_taken']].values[-1]
        
        is_top = game_p.is_top_of_inning.values[i]
        was_top = game_p.is_top_of_inning.values[i-1]
        ############################## Top of Inning
        if is_top:
            #it is the top of the inning
            #update batter index and pitcher batters faced based on who batted previously. If the away team batted
            #previously, update away batter index and home batters faced. Otherwise update home batter index and 
            #away batters faced
            if was_top:
                #the last batter was the away team
                bi = (newplay['away_batter_index'].values[0] + 1)%9 
                newplay['away_batter_index'] = bi
                newplay['home_pitcher_batters_faced'] += 1
                #check if the home pitcher can now be subbed
                if newplay.home_pitcher_batters_faced.values[0] >= 3:
                    newplay['home_pitcher_can_be_subbed'] = True
                else:
                    newplay['home_pitcher_can_be_subbed'] = False
            else:
                #the last batter was on the home team, so we just switched half innings
                bi = (newplay['home_batter_index'].values[0] + 1)%9 
                newplay['home_batter_index'] = bi
                newplay['away_pitcher_batters_faced'] += 1
                #check if the away pitcher can now be subbed
                if newplay.away_pitcher_batters_faced.values[0] >= 3:
                    newplay['away_pitcher_can_be_subbed'] = True
                else:
                    newplay['away_pitcher_can_be_subbed'] = False
                #since we just switched from the bottom to the top of an inning, the home pitcher can be subbed
                newplay['home_pitcher_can_be_subbed'] = True
                
            #define the batting order spot more carefully in case I skipped a play or something using the statsapi
            batter = sapi[game_p.At_Bat.values[i] - 1]['matchup']['batter']['id']
            lup = list(game_p[['Away_Lineup_1', 'Away_Lineup_2', 'Away_Lineup_3', 'Away_Lineup_4', 'Away_Lineup_5', 'Away_Lineup_6',
                        'Away_Lineup_7', 'Away_Lineup_8', 'Away_Lineup_9']].values[i])
            bi = lup.index(batter)
            newplay['away_batter_index'] = bi
                

            #first thing we do is define defensive sub nodes if it is the start of an inning or if a team actually made a def sub in the middle of an inning.
            #in my version of the game you only do def subs at the start of an inning, but you can really do them at any time, so if a team in reality 
            #made a def sub in the middle of an inning, I need a node for that.
            if game_p.is_top_of_inning.values[i] != game_p.is_top_of_inning.values[i-1] or game_p.At_Bat.values[i] in def_sub.At_Bat.unique():
                #we just switched half innings or we made a defensive sub, so we need a def sub node
                newnode = newplay.copy()
                newnode['node_type'] = 'def sub node'
                action = ''
                if game_p.At_Bat.values[i] in def_sub.At_Bat.unique():
                    #there were substitutions. Since we have already copied newplay for the def sub node, update newplay's
                    #lineup with the substitutions. Then when we copy newplay to define the pitcher, batter, and chance nodes, it will 
                    #have the right defensive lineup
                    events = sapi[game_p.At_Bat.values[i] - 1]['playEvents']
                    subs_on_play = []
                    for s in events:
                        if 'event' in s['details']:
                            if s['details']['event'] in {'Defensive Sub', 'Defensive Switch'}:
                                subs_on_play.append(s)
                    for j, s in enumerate(subs_on_play):
                        player = s['player']['id']
                        spot = get_batting_order_spot(int(s['battingOrder']))
                        if player == newplay.home_dh.values[0] and s['position']['abbreviation'] != 'DH':
                            #the DH came in to play defense, which means the pitcher has to bat, so set the pitcher
                            #spot in the batting order to 10 for DH and set home_lost_dh to true
                            new_lineup = game_p[['Home_Lineup_1', 'Home_Lineup_2', 'Home_Lineup_3', 'Home_Lineup_4', 'Home_Lineup_5', 'Home_Lineup_6',
                        'Home_Lineup_7', 'Home_Lineup_8', 'Home_Lineup_9']].values[i]
                            pitspot = list(new_lineup).index(game_p.Home_Pitcher.values[i])
                            newplay['home_batting_order_' + str(pitspot)] = 10
                            newplay['home_lost_dh'] = True
                            newplay['home_dh'] = -1
                            newplay['home_batting_order_' + str(spot)] = player
                            action = action + s['details']['description']
                        else:
                            #the DH didn't come in, so just update the batting order spot 
                            newplay['home_batting_order_' + str(spot)] = player
                            action = action + s['details']['description']
                            
                        #remove player from the bench
                        bench = list(newplay[['home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3', 'home_bench_4', 'home_bench_5']].values[0])
                        if player in bench:
                            bench[bench.index(player)] = -1
                        newplay[['home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3', 'home_bench_4', 'home_bench_5']] = bench
                        
                #finish defining the def sub node and append to nodes
                newnode['action_taken'] = action
                nodes = pd.concat((nodes, newnode))

            #next define pitcher nodes and batter nodes. I have it ordered as batter then pitcher then at bat, but if there
            #is a pitcher sub we have to give the batter another chance to sub. So I'll start with a batter sub node, and 
            #if there are subs on the play and the first sub is a pitcher sub we'll go batter play at bat then pitcher 
            #sub then just go in order of the subs
            if game_p.At_Bat.values[i] in pit_change.At_Bat.unique() or game_p.At_Bat.values[i] in pinch_hits.At_Bat.unique():
                #there were pitcher/batter subs. Get them in the right order 
                events = sapi[game_p.At_Bat.values[i] - 1]['playEvents']
                subs_on_play = []
                for s in events:
                    if 'event' in s['details']:
                        if s['details']['event'] in {'Pitching Substitution', 'Offensive Substitution'}:
                            subs_on_play.append(s)

                for j, s in enumerate(subs_on_play):
                    if j == 0:
                        #the first play should be a batter node, regardless of if the pitcher subbed or the batter subbed 
                        if s['details']['event'] == 'Pitching Substitution':
                            #the first action was a pitcher sub, so we need to define a batter sub node with action play at bat
                            newnode = newplay.copy()
                            newnode['action_taken'] = 'play at bat'
                            newnode['node_type'] = 'batter'
                            nodes = pd.concat((nodes, newnode))
                            
                            #now we can define the pitcher sub node
                            newnode = newplay.copy()
                            new_pit = s['player']['id']
                            newplay['home_pitcher'] = new_pit
                            newplay['home_pitcher_can_be_subbed'] = False
                            newplay['home_pitcher_batters_faced'] = 0
                            #remove the pitcher from the bullpen
                            bullpen = list(newplay[['home_bullpen_0',
            'home_bullpen_1', 'home_bullpen_2', 'home_bullpen_3', 'home_bullpen_4',
            'home_bullpen_5', 'home_bullpen_6', 'home_bullpen_7', 'home_bullpen_8', 'home_bullpen_9', 'home_bullpen_10',
                                                   'home_bullpen_11', 'home_bullpen_12', 'home_bullpen_13', 'home_bullpen_14']].values[0])
                            if new_pit in bullpen:
                                bullpen[bullpen.index(new_pit)] = -1
                            newplay[['home_bullpen_0',
            'home_bullpen_1', 'home_bullpen_2', 'home_bullpen_3', 'home_bullpen_4',
            'home_bullpen_5', 'home_bullpen_6', 'home_bullpen_7', 'home_bullpen_8', 'home_bullpen_9', 'home_bullpen_10',
                                                   'home_bullpen_11', 'home_bullpen_12', 'home_bullpen_13', 'home_bullpen_14']] = bullpen

                            #define newnode's action. Note, when the DH comes into field, the pitcher spot being added to the lineup
                            #is represented as a pitcher substitution in the data, even if they use the same pitcher they had before,
                            #so in that case I need to put in an action of play at bat instead of relief pitcher
                            if new_pit == newnode.home_pitcher.values[0]:
                                newnode['action_taken'] = 'play at bat'
                            else:
                                newnode['action_taken'] = 'relief pitcher ' + str(new_pit)
                            #also define the node type as pitcher
                            newnode['node_type'] = 'pitcher'
                            nodes = pd.concat((nodes, newnode))
                        elif s['details']['event'] == 'Offensive Substitution':
                            #the first sub was a batter sub, so we just define the batter node with a sub
                            newnode = newplay.copy()
                            #there was a pinch hit (or run), so update the batting order with the new batter. 
                            new_batter = s['player']['id']
                            old_batter = s['replacedPlayer']['id']
                            spot = get_batting_order_spot(int(s['battingOrder']))
                            #remove new batter from bench
                            bench = list(newplay[['away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 'away_bench_4', 'away_bench_5']].values[0])
                            if new_batter in bench:
                                bench[bench.index(new_batter)] = -1
                            newplay[['away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 'away_bench_4', 'away_bench_5']] = bench
                            #add them to the lineup
                            if newplay[['away_batting_order_0', 'away_batting_order_1', 'away_batting_order_2',
                    'away_batting_order_3', 'away_batting_order_4', 'away_batting_order_5',
                    'away_batting_order_6', 'away_batting_order_7', 'away_batting_order_8']].values[0,spot] == 10:
                                #it's the DH so replace the DH
                                newplay['away_dh'] = new_batter
                            else:
                                #replace the spot in the order
                                newplay['away_batting_order_' + str(spot)] = new_batter

                            #note, if the substitution was a pinch runner, then we don't actually want to define 
                            #a node for that since we're not considering pinch runner actions. We just want to 
                            #update the lineup without a node
                            if s['position']['abbreviation'] != 'PR':
                                newnode['action_taken'] = 'pinch hit ' + str(new_batter)
                                newnode['node_type'] = 'batter'
                                nodes = pd.concat((nodes, newnode))

                    else:
                        #otherwise, we just define a node for each sub
                        if s['details']['event'] == 'Pitching Substitution':
                            #the action was a pitcher sub
                            #update newplay. Again this works because I have already copied newplay so when I copy it again it will have the new pitcher
                            #but the pitcher node that resulted in the pitching change has the old pitcher
                            newnode = newplay.copy()
                            new_pit = s['player']['id']
                            newplay['home_pitcher'] = new_pit
                            newplay['home_pitcher_can_be_subbed'] = False
                            newplay['home_pitcher_batters_faced'] = 0
                            #remove the pitcher from the bullpen
                            bullpen = list(newplay[['home_bullpen_0',
            'home_bullpen_1', 'home_bullpen_2', 'home_bullpen_3', 'home_bullpen_4',
            'home_bullpen_5', 'home_bullpen_6', 'home_bullpen_7', 'home_bullpen_8', 'home_bullpen_9', 'home_bullpen_10',
                                                   'home_bullpen_11', 'home_bullpen_12', 'home_bullpen_13', 'home_bullpen_14']].values[0])
                            if new_pit in bullpen:
                                bullpen[bullpen.index(new_pit)] = -1
                            newplay[['home_bullpen_0',
            'home_bullpen_1', 'home_bullpen_2', 'home_bullpen_3', 'home_bullpen_4',
            'home_bullpen_5', 'home_bullpen_6', 'home_bullpen_7', 'home_bullpen_8', 'home_bullpen_9', 'home_bullpen_10',
                                                   'home_bullpen_11', 'home_bullpen_12', 'home_bullpen_13', 'home_bullpen_14']] = bullpen

                            #define newnode's action. Note, when the DH comes into field, the pitcher spot being added to the lineup
                            #is represented as a pitcher substitution in the data, even if they use the same pitcher they had before,
                            #so in that case I need to put in an action of play at bat instead of relief pitcher
                            if new_pit == newnode.home_pitcher.values[0]:
                                newnode['action_taken'] = 'play at bat'
                            else:
                                newnode['action_taken'] = 'relief pitcher ' + str(new_pit)
                            #also define the node type as pitcher
                            newnode['node_type'] = 'pitcher'
                            nodes = pd.concat((nodes, newnode))
                            
                        elif s['details']['event'] == 'Offensive Substitution':
                            #define the batter node
                            newnode = newplay.copy()
                            #there was a pinch hit (or run), so update the batting order with the new batter. 
                            new_batter = s['player']['id']
                            old_batter = s['replacedPlayer']['id']
                            spot = get_batting_order_spot(int(s['battingOrder']))
                            #remove new batter from bench
                            bench = list(newplay[['away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 'away_bench_4', 'away_bench_5']].values[0])
                            if new_batter in bench:
                                bench[bench.index(new_batter)] = -1
                            newplay[['away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 'away_bench_4', 'away_bench_5']] = bench
                            #add them to the lineup
                            if newplay[['away_batting_order_0', 'away_batting_order_1', 'away_batting_order_2',
                    'away_batting_order_3', 'away_batting_order_4', 'away_batting_order_5',
                    'away_batting_order_6', 'away_batting_order_7', 'away_batting_order_8']].values[0,spot] == 10:
                                #it's the DH so replace the DH
                                newplay['away_dh'] = new_batter
                            else:
                                #replace the spot in the order
                                newplay['away_batting_order_' + str(spot)] = new_batter

                            #note, if the substitution was a pinch runner, then we don't actually want to define 
                            #a node for that since we're not considering pinch runner actions. We just want to 
                            #update the lineup without a node
                            if s['position']['abbreviation'] != 'PR':
                                newnode['action_taken'] = 'pinch hit ' + str(new_batter)
                                newnode['node_type'] = 'batter'
                                nodes = pd.concat((nodes, newnode))

            #now after any number of substitutions, we have one more batter node, if the last substitution was a pitcher or
            #if there were no subs,and one more pitcher node where they can choose an intentional walk or not. If the last 
            #sub was a batter node, then we just go straight to a pitcher node where they can choose to play at bat or ibb 
            #or possibly relieve
            if nodes.shape[0] == 0:
                #we need a batter node
                newnode = newplay.copy()
                newnode['node_type'] = 'batter'
                newnode['action_taken'] = 'play at bat'
                nodes = pd.concat((nodes, newnode))
            elif nodes.node_type.values[-1] in {'pitcher', 'def sub node'}:
                #we need a batter node
                newnode = newplay.copy()
                newnode['node_type'] = 'batter'
                newnode['action_taken'] = 'play at bat'
                nodes = pd.concat((nodes, newnode))
            #now define the last pitcher node
            newnode = newplay.copy()
            newnode['node_type'] = 'pitcher'
            if game_p.Event_Type.values[i] == 'Intent Walk':
                #it was an intentional walk
                newnode['action_taken'] = 'intentional walk'
                nodes = pd.concat((nodes, newnode))

            else:
                #it wasn't an intentional walk
                newnode['action_taken'] = 'play at bat'
                nodes = pd.concat((nodes, newnode))

                #lastly we need a chance node
                newnode = newplay.copy()
                if i == game_p.shape[0]-1:
                    newnode['action_taken'] = 'end of game'
                    newnode['node_type'] = 'terminal'
                else:
                    newnode['node_type'] = 'chance'
                    newnode['action_taken'] = game_p.bo_state.values[i+1]
                nodes = pd.concat((nodes, newnode))
                
                
    ############################## Bottom of Inning
        else:
            #it is the bottom of the inning
            #update batter index and pitcher batters faced based on who batted previously. If the away team batted
            #previously, update away batter index and home batters faced. Otherwise update home batter index and 
            #away batters faced
            if was_top:
                #the last batter was the away team
                bi = (newplay['away_batter_index'].values[0] + 1)%9 
                newplay['away_batter_index'] = bi
                newplay['home_pitcher_batters_faced'] += 1
                #since we just switched half innings, the away pitcher can be subbed
                newplay['away_pitcher_can_be_subbed'] = True
                #see if the home pitcher can now be subbed
                if newplay.home_pitcher_batters_faced.values[0] >= 3:
                    newplay['home_pitcher_can_be_subbed'] = True
                else:
                    newplay['home_pitcher_can_be_subbed'] = False
                    
            else:
                #the last batter was on the home team
                bi = (newplay['home_batter_index'].values[0] + 1)%9 
                newplay['home_batter_index'] = bi
                newplay['away_pitcher_batters_faced'] += 1
                #check if the away pitcher can now be subbed
                if newplay.away_pitcher_batters_faced.values[0] >= 3:
                    newplay['away_pitcher_can_be_subbed'] = True
                else:
                    newplay['away_pitcher_can_be_subbed'] = False
                
            #define the batting order spot more carefully in case I skipped a play or something using the statsapi
            batter = sapi[game_p.At_Bat.values[i] - 1]['matchup']['batter']['id']
            lup = list(game_p[['Home_Lineup_1', 'Home_Lineup_2', 'Home_Lineup_3', 'Home_Lineup_4', 'Home_Lineup_5', 'Home_Lineup_6',
                        'Home_Lineup_7', 'Home_Lineup_8', 'Home_Lineup_9']].values[i])
            bi = lup.index(batter)
            newplay['home_batter_index'] = bi
                

            #first thing we do is define defensive sub nodes if it is the start of an inning or if a team actually made a def sub in the middle of an inning.
            #in my version of the game you only do def subs at the start of an inning, but you can really do them at any time, so if a team in reality 
            #made a def sub in the middle of an inning, I need a node for that.
            if game_p.is_top_of_inning.values[i] != game_p.is_top_of_inning.values[i-1] or game_p.At_Bat.values[i] in def_sub.At_Bat.unique():
                #we just switched half innings or we made a defensive sub, so we need a def sub node
                newnode = newplay.copy()
                newnode['node_type'] = 'def sub node'
                action = ''
                if game_p.At_Bat.values[i] in def_sub.At_Bat.unique():
                    #there were substitutions. Since we have already copied newplay for the def sub node, update newplay's
                    #lineup with the substitutions. Then when we copy newplay to define the pitcher, batter, and chance nodes, it will 
                    #have the right defensive lineup
                    events = sapi[game_p.At_Bat.values[i] - 1]['playEvents']
                    subs_on_play = []
                    for s in events:
                        if 'event' in s['details']:
                            if s['details']['event'] in {'Defensive Sub', 'Defensive Switch'}:
                                subs_on_play.append(s)
                    for j, s in enumerate(subs_on_play):
                        player = s['player']['id']
                        spot = get_batting_order_spot(int(s['battingOrder']))
                        if player == newplay.away_dh.values[0] and s['position']['abbreviation'] != 'DH':
                            #the DH came in to play defense, which means the pitcher has to bat, so set the pitcher
                            #spot in the batting order to 10 for DH and set home_lost_dh to true
                            new_lineup = game_p[['Away_Lineup_1', 'Away_Lineup_2', 'Away_Lineup_3', 'Away_Lineup_4', 'Away_Lineup_5', 'Away_Lineup_6',
                        'Away_Lineup_7', 'Away_Lineup_8', 'Away_Lineup_9']].values[i]
                            pitspot = list(new_lineup).index(game_p.Away_Pitcher.values[i])
                            newplay['away_batting_order_' + str(pitspot)] = 10
                            newplay['away_lost_dh'] = True
                            newplay['away_dh'] = -1
                            newplay['away_batting_order_' + str(spot)] = player
                            action = action + s['details']['description']
                        else:
                            #the DH didn't come in, so just update the batting order spot 
                            newplay['away_batting_order_' + str(spot)] = player
                            action = action + s['details']['description']
                            
                        #remove player from the bench
                        bench = list(newplay[['away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 'away_bench_4', 'away_bench_5']].values[0])
                        if player in bench:
                            bench[bench.index(player)] = -1
                        newplay[['away_bench_0', 'away_bench_1', 'away_bench_2', 'away_bench_3', 'away_bench_4', 'away_bench_5']] = bench
                        
                #finish defining the def sub node and append to nodes
                newnode['action_taken'] = action
                nodes = pd.concat((nodes, newnode))

            #next define pitcher nodes and batter nodes. I have it ordered as batter then pitcher then at bat, but if there
            #is a pitcher sub we have to give the batter another chance to sub. So I'll start with a batter sub node, and 
            #if there are subs on the play and the first sub is a pitcher sub we'll go batter play at bat then pitcher 
            #sub then just go in order of the subs
            if game_p.At_Bat.values[i] in pit_change.At_Bat.unique() or game_p.At_Bat.values[i] in pinch_hits.At_Bat.unique():
                #there were pitcher/batter subs. Get them in the right order 
                events = sapi[game_p.At_Bat.values[i] - 1]['playEvents']
                subs_on_play = []
                for s in events:
                    if 'event' in s['details']:
                        if s['details']['event'] in {'Pitching Substitution', 'Offensive Substitution'}:
                            subs_on_play.append(s)

                for j, s in enumerate(subs_on_play):
                    if j == 0:
                        #the first play should be a batter node, regardless of if the pitcher subbed or the batter subbed 
                        if s['details']['event'] == 'Pitching Substitution':
                            #the first action was a pitcher sub, so we need to define a batter sub node with action play at bat
                            newnode = newplay.copy()
                            newnode['action_taken'] = 'play at bat'
                            newnode['node_type'] = 'batter'
                            nodes = pd.concat((nodes, newnode))
                            
                            #now we can define the pitcher sub node
                            newnode = newplay.copy()
                            new_pit = s['player']['id']
                            newplay['away_pitcher'] = new_pit
                            newplay['away_pitcher_can_be_subbed'] = False
                            newplay['away_pitcher_batters_faced'] = 0
                            #remove the pitcher from the bullpen
                            bullpen = list(newplay[['away_bullpen_0',
            'away_bullpen_1', 'away_bullpen_2', 'away_bullpen_3', 'away_bullpen_4',
            'away_bullpen_5', 'away_bullpen_6', 'away_bullpen_7', 'away_bullpen_8', 'away_bullpen_9', 'away_bullpen_10',
                                                   'away_bullpen_11', 'away_bullpen_12', 'away_bullpen_13', 'away_bullpen_14']].values[0])
                            if new_pit in bullpen:
                                bullpen[bullpen.index(new_pit)] = -1
                            newplay[['away_bullpen_0',
            'away_bullpen_1', 'away_bullpen_2', 'away_bullpen_3', 'away_bullpen_4',
            'away_bullpen_5', 'away_bullpen_6', 'away_bullpen_7', 'away_bullpen_8', 'away_bullpen_9', 'away_bullpen_10',
                                                   'away_bullpen_11', 'away_bullpen_12', 'away_bullpen_13', 'away_bullpen_14']] = bullpen

                            #define newnode's action. Note, when the DH comes into field, the pitcher spot being added to the lineup
                            #is represented as a pitcher substitution in the data, even if they use the same pitcher they had before,
                            #so in that case I need to put in an action of play at bat instead of relief pitcher
                            if new_pit == newnode.home_pitcher.values[0]:
                                newnode['action_taken'] = 'play at bat'
                            else:
                                newnode['action_taken'] = 'relief pitcher ' + str(new_pit)
                            #also define the node type as pitcher
                            newnode['node_type'] = 'pitcher'
                            nodes = pd.concat((nodes, newnode))
                        elif s['details']['event'] == 'Offensive Substitution':
                            #the first sub was a batter sub, so we just define the batter node with a sub
                            newnode = newplay.copy()
                            #there was a pinch hit (or run), so update the batting order with the new batter. 
                            new_batter = s['player']['id']
                            old_batter = s['replacedPlayer']['id']
                            spot = get_batting_order_spot(int(s['battingOrder']))
                            #remove new batter from bench
                            bench = list(newplay[['home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3', 'home_bench_4', 'home_bench_5']].values[0])
                            if new_batter in bench:
                                bench[bench.index(new_batter)] = -1
                            newplay[['home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3', 'home_bench_4', 'home_bench_5']] = bench
                            #add them to the lineup
                            if newplay[['home_batting_order_0', 'home_batting_order_1', 'home_batting_order_2',
                    'home_batting_order_3', 'home_batting_order_4', 'home_batting_order_5',
                    'home_batting_order_6', 'home_batting_order_7', 'home_batting_order_8']].values[0,spot] == 10:
                                #it's the DH so replace the DH
                                newplay['home_dh'] = new_batter
                            else:
                                #replace the spot in the order
                                newplay['home_batting_order_' + str(spot)] = new_batter

                            #define the action and node type
                            #note, if the substitution was a pinch runner, then we don't actually want to define 
                            #a node for that since we're not considering pinch runner actions. We just want to 
                            #update the lineup without a node
                            if s['position']['abbreviation'] != 'PR':
                                newnode['action_taken'] = 'pinch hit ' + str(new_batter)
                                newnode['node_type'] = 'batter'
                                nodes = pd.concat((nodes, newnode))

                    else:
                        #otherwise, we just define a node for each sub
                        if s['details']['event'] == 'Pitching Substitution':
                            #the action was a pitcher sub
                            #update newplay. Again this works because I have already copied newplay so when I copy it again it will have the new pitcher
                            #but the pitcher node that resulted in the pitching change has the old pitcher
                            newnode = newplay.copy()
                            new_pit = s['player']['id']
                            newplay['away_pitcher'] = new_pit
                            newplay['away_pitcher_can_be_subbed'] = False
                            newplay['away_pitcher_batters_faced'] = 0
                            #remove the pitcher from the bullpen
                            bullpen = list(newplay[['away_bullpen_0',
            'away_bullpen_1', 'away_bullpen_2', 'away_bullpen_3', 'away_bullpen_4',
            'away_bullpen_5', 'away_bullpen_6', 'away_bullpen_7', 'away_bullpen_8', 'away_bullpen_9', 'away_bullpen_10',
                                                   'away_bullpen_11', 'away_bullpen_12', 'away_bullpen_13', 'away_bullpen_14']].values[0])
                            if new_pit in bullpen:
                                bullpen[bullpen.index(new_pit)] = -1
                            newplay[['away_bullpen_0',
            'away_bullpen_1', 'away_bullpen_2', 'away_bullpen_3', 'away_bullpen_4',
            'away_bullpen_5', 'away_bullpen_6', 'away_bullpen_7', 'away_bullpen_8', 'away_bullpen_9', 'away_bullpen_10',
                                                   'away_bullpen_11', 'away_bullpen_12', 'away_bullpen_13', 'away_bullpen_14']] = bullpen

                            #define newnode's action. Note, when the DH comes into field, the pitcher spot being added to the lineup
                            #is represented as a pitcher substitution in the data, even if they use the same pitcher they had before,
                            #so in that case I need to put in an action of play at bat instead of relief pitcher
                            if new_pit == newnode.home_pitcher.values[0]:
                                newnode['action_taken'] = 'play at bat'
                            else:
                                newnode['action_taken'] = 'relief pitcher ' + str(new_pit)
                            #also define the node type as pitcher
                            newnode['node_type'] = 'pitcher'
                            nodes = pd.concat((nodes, newnode))
                            
                        elif s['details']['event'] == 'Offensive Substitution':
                            #define the batter node
                            newnode = newplay.copy()
                            #there was a pinch hit (or run), so update the batting order with the new batter. 
                            new_batter = s['player']['id']
                            old_batter = s['replacedPlayer']['id']
                            spot = get_batting_order_spot(int(s['battingOrder']))
                            #remove new batter from bench
                            bench = list(newplay[['home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3', 'home_bench_4', 'home_bench_5']].values[0])
                            if new_batter in bench:
                                bench[bench.index(new_batter)] = -1
                            newplay[['home_bench_0', 'home_bench_1', 'home_bench_2', 'home_bench_3', 'home_bench_4', 'home_bench_5']] = bench
                            #add them to the lineup
                            if newplay[['home_batting_order_0', 'home_batting_order_1', 'home_batting_order_2',
                    'home_batting_order_3', 'home_batting_order_4', 'home_batting_order_5',
                    'home_batting_order_6', 'home_batting_order_7', 'home_batting_order_8']].values[0,spot] == 10:
                                #it's the DH so replace the DH
                                newplay['home_dh'] = new_batter
                            else:
                                #replace the spot in the order
                                newplay['home_batting_order_' + str(spot)] = new_batter

                            #note, if the substitution was a pinch runner, then we don't actually want to define 
                            #a node for that since we're not considering pinch runner actions. We just want to 
                            #update the lineup without a node
                            if s['position']['abbreviation'] != 'PR':
                                newnode['action_taken'] = 'pinch hit ' + str(new_batter)
                                newnode['node_type'] = 'batter'
                                nodes = pd.concat((nodes, newnode))

            #now after any number of substitutions, we have one more batter node, if the last substitution was a pitcher or
            #if there were no subs,and one more pitcher node where they can choose an intentional walk or not. If the last 
            #sub was a batter node, then we just go straight to a pitcher node where they can choose to play at bat or ibb 
            #or possibly relieve
            if nodes.shape[0] == 0:
                #we need a batter node
                newnode = newplay.copy()
                newnode['node_type'] = 'batter'
                newnode['action_taken'] = 'play at bat'
                nodes = pd.concat((nodes, newnode))
            elif nodes.node_type.values[-1] in {'pitcher', 'def sub node'}:
                #we need a batter node
                newnode = newplay.copy()
                newnode['node_type'] = 'batter'
                newnode['action_taken'] = 'play at bat'
                nodes = pd.concat((nodes, newnode))
            #now define the last pitcher node
            newnode = newplay.copy()
            newnode['node_type'] = 'pitcher'
            if game_p.Event_Type.values[i] == 'Intent Walk':
                #it was an intentional walk
                newnode['action_taken'] = 'intentional walk'
                nodes = pd.concat((nodes, newnode))

            else:
                #it wasn't an intentional walk
                newnode['action_taken'] = 'play at bat'
                nodes = pd.concat((nodes, newnode))

                #lastly we need a chance node
                newnode = newplay.copy()
                if i == game_p.shape[0]-1:
                    newnode['action_taken'] = 'end of game'
                    newnode['node_type'] = 'terminal'
                else:
                    newnode['node_type'] = 'chance'
                    newnode['action_taken'] = game_p.bo_state.values[i+1]
                nodes = pd.concat((nodes, newnode))
                
        #lastly we concatenate onto gameplays
        gameplays = pd.concat((gameplays, nodes))

    #the last node needs a terminal value
    tv = gameplays.terminal_value.values
    tv[-1] = gameplays.home_team_won.values[-1]
    gameplays['terminal_value'] = tv
    return gameplays

In [8]:
nodes = pd.DataFrame()
for gamepk in tqdm.tqdm(gamepks):
    try:
        n = define_all_nodes(gamepk)
        nodes = pd.concat((nodes, n), ignore_index = True)
    except:
        print('failed for ' + str(gamepk))

 19%|██████████████▋                                                              | 462/2425 [23:23<2:00:23,  3.68s/it]

failed for 716816


 39%|██████████████████████████████▏                                              | 951/2425 [45:28<1:10:15,  2.86s/it]

failed for 717305


 45%|██████████████████████████████████▌                                         | 1102/2425 [52:35<1:01:59,  2.81s/it]

failed for 717457


 49%|██████████████████████████████████████                                        | 1183/2425 [56:16<56:11,  2.71s/it]

failed for 717538


 51%|███████████████████████████████████████▉                                      | 1240/2425 [58:50<40:15,  2.04s/it]

failed for 717595


100%|████████████████████████████████████████████████████████████████████████████| 2425/2425 [1:54:39<00:00,  2.84s/it]


In [9]:
nodes.to_csv('nodes_for_testing.csv', index = False)

In [10]:
nodes

Unnamed: 0,game_date,game_pk,batter_per_game,home_team_name,away_team_name,home_team_won,is_top_of_inning,inning,bo_state_index,home_score_diff,home_batter_index,away_batter_index,home_batting_order_0,home_batting_order_1,home_batting_order_2,home_batting_order_3,home_batting_order_4,home_batting_order_5,home_batting_order_6,home_batting_order_7,home_batting_order_8,home_dh,home_lost_dh,home_pitcher,home_bullpen_0,home_bullpen_1,home_bullpen_2,home_bullpen_3,home_bullpen_4,home_bullpen_5,home_bullpen_6,home_bullpen_7,home_bullpen_8,home_bench_0,home_bench_1,home_bench_2,home_bench_3,away_batting_order_0,away_batting_order_1,away_batting_order_2,away_batting_order_3,away_batting_order_4,away_batting_order_5,away_batting_order_6,away_batting_order_7,away_batting_order_8,away_dh,away_lost_dh,away_pitcher,away_bullpen_0,away_bullpen_1,away_bullpen_2,away_bullpen_3,away_bullpen_4,away_bullpen_5,away_bullpen_6,away_bullpen_7,away_bullpen_8,away_bench_0,away_bench_1,away_bench_2,away_bench_3,home_pitcher_batters_faced,home_pitcher_can_be_subbed,away_pitcher_batters_faced,away_pitcher_can_be_subbed,node_type,home_bullpen_9,home_bullpen_10,home_bullpen_11,home_bullpen_12,home_bullpen_13,home_bullpen_14,home_bench_4,home_bench_5,away_bullpen_9,away_bullpen_10,away_bullpen_11,away_bullpen_12,away_bullpen_13,away_bullpen_14,away_bench_4,away_bench_5,terminal_value,action_taken
0,2023-10-01,716352,1,KC,NYY,1,1,1,0,0,0,0,672580,677951,521692,669004,658668,686681,680118,668472,10,622110.0,False,425844.0,666142,676092,680742,663804,663704,679525,672578,664199,685107,679845,671221,682515,664728,518934,683011,669224,10,643396,665828,672724,677592,664314,543309.0,False,650633.0,666745,607074,621199,669711,596133,657376,502083,641656,593423,666163,641343,592450,650402,0,False,0,False,batter,656353,672582,543475,674444,-1,-1,676369,-1,605280,641871,543037,598286,-1,-1,519317,-1,,play at bat
1,2023-10-01,716352,1,KC,NYY,1,1,1,0,0,0,0,672580,677951,521692,669004,658668,686681,680118,668472,10,622110.0,False,425844.0,666142,676092,680742,663804,663704,679525,672578,664199,685107,679845,671221,682515,664728,518934,683011,669224,10,643396,665828,672724,677592,664314,543309.0,False,650633.0,666745,607074,621199,669711,596133,657376,502083,641656,593423,666163,641343,592450,650402,0,False,0,False,pitcher,656353,672582,543475,674444,-1,-1,676369,-1,605280,641871,543037,598286,-1,-1,519317,-1,,play at bat
2,2023-10-01,716352,1,KC,NYY,1,1,1,0,0,0,0,672580,677951,521692,669004,658668,686681,680118,668472,10,622110.0,False,425844.0,666142,676092,680742,663804,663704,679525,672578,664199,685107,679845,671221,682515,664728,518934,683011,669224,10,643396,665828,672724,677592,664314,543309.0,False,650633.0,666745,607074,621199,669711,596133,657376,502083,641656,593423,666163,641343,592450,650402,0,False,0,False,chance,656353,672582,543475,674444,-1,-1,676369,-1,605280,641871,543037,598286,-1,-1,519317,-1,,1__-0
3,2023-10-01,716352,2,KC,NYY,1,1,1,4,0,0,1,672580,677951,521692,669004,658668,686681,680118,668472,10,622110.0,False,425844.0,666142,676092,680742,663804,663704,679525,672578,664199,685107,679845,671221,682515,664728,518934,683011,669224,10,643396,665828,672724,677592,664314,543309.0,False,650633.0,666745,607074,621199,669711,596133,657376,502083,641656,593423,666163,641343,592450,650402,1,False,0,False,batter,656353,672582,543475,674444,-1,-1,676369,-1,605280,641871,543037,598286,-1,-1,519317,-1,,play at bat
4,2023-10-01,716352,2,KC,NYY,1,1,1,4,0,0,1,672580,677951,521692,669004,658668,686681,680118,668472,10,622110.0,False,425844.0,666142,676092,680742,663804,663704,679525,672578,664199,685107,679845,671221,682515,664728,518934,683011,669224,10,643396,665828,672724,677592,664314,543309.0,False,650633.0,666745,607074,621199,669711,596133,657376,502083,641656,593423,666163,641343,592450,650402,1,False,0,False,pitcher,656353,672582,543475,674444,-1,-1,676369,-1,605280,641871,543037,598286,-1,-1,519317,-1,,play at bat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615990,2023-03-30,718782,89,BOS,BAL,0,0,9,14,-2,3,4,657077,646240,10,807799,594807,671213,624414,657136,606132,608701,False,455119,-1,-1,519242,445276,-1,676710,601713,-1,656557,644374,-1,-1,-1,656775,668939,663630,663624,10,602104,669720,624428,622761,683002.0,False,642585,665152,-1,669330,657508,680694,608344,608723,-1,-1,621532,-1,669065,679631,5,True,4,True,pitcher,-1,670167,-1,-1,-1,-1,-1,-1,-1,-1,543056,-1,-1,-1,-1,-1,,play at bat
615991,2023-03-30,718782,89,BOS,BAL,0,0,9,14,-2,3,4,657077,646240,10,807799,594807,671213,624414,657136,606132,608701,False,455119,-1,-1,519242,445276,-1,676710,601713,-1,656557,644374,-1,-1,-1,656775,668939,663630,663624,10,602104,669720,624428,622761,683002.0,False,642585,665152,-1,669330,657508,680694,608344,608723,-1,-1,621532,-1,669065,679631,5,True,4,True,chance,-1,670167,-1,-1,-1,-1,-1,-1,-1,-1,543056,-1,-1,-1,-1,-1,,_2_-2
615992,2023-03-30,718782,90,BOS,BAL,0,0,9,18,-1,4,4,657077,646240,10,807799,594807,671213,624414,657136,606132,608701,False,455119,-1,-1,519242,445276,-1,676710,601713,-1,656557,644374,-1,-1,-1,656775,668939,663630,663624,10,602104,669720,624428,622761,683002.0,False,642585,665152,-1,669330,657508,680694,608344,608723,-1,-1,621532,-1,669065,679631,5,True,5,True,batter,-1,670167,-1,-1,-1,-1,-1,-1,-1,-1,543056,-1,-1,-1,-1,-1,,play at bat
615993,2023-03-30,718782,90,BOS,BAL,0,0,9,18,-1,4,4,657077,646240,10,807799,594807,671213,624414,657136,606132,608701,False,455119,-1,-1,519242,445276,-1,676710,601713,-1,656557,644374,-1,-1,-1,656775,668939,663630,663624,10,602104,669720,624428,622761,683002.0,False,642585,665152,-1,669330,657508,680694,608344,608723,-1,-1,621532,-1,669065,679631,5,True,5,True,pitcher,-1,670167,-1,-1,-1,-1,-1,-1,-1,-1,543056,-1,-1,-1,-1,-1,,play at bat


In [90]:
import random
gamepk = 716435
t=define_all_nodes(gamepk)

In [100]:
t[['batter_per_game', 'is_top_of_inning', 'inning', 'bo_state_index', 'home_score_diff', 'home_batter_index', 
   'away_batter_index', 'home_pitcher_batters_faced', 'home_pitcher_can_be_subbed',
   'away_pitcher_batters_faced', 'away_pitcher_can_be_subbed', 'node_type', 'away_lost_dh', 'action_taken']].iloc[278:288]

Unnamed: 0,batter_per_game,is_top_of_inning,inning,bo_state_index,home_score_diff,home_batter_index,away_batter_index,home_pitcher_batters_faced,home_pitcher_can_be_subbed,away_pitcher_batters_faced,away_pitcher_can_be_subbed,node_type,away_lost_dh,action_taken
243,83,0,9,20,-4,2,8,12,True,3,True,batter,True,play at bat
243,83,0,9,20,-4,2,8,12,True,3,True,pitcher,True,play at bat
243,83,0,9,20,-4,2,8,12,True,3,True,terminal,True,end of game


In [101]:
bo_states[20]

'1__-2'

In [97]:
t[['away_batting_order_' + str(i) for i in range(8)] +['away_dh'] + ['away_bench_' + str(i) for i in range(6)]].iloc[259:269]

Unnamed: 0,away_batting_order_0,away_batting_order_1,away_batting_order_2,away_batting_order_3,away_batting_order_4,away_batting_order_5,away_batting_order_6,away_batting_order_7,away_dh,away_bench_0,away_bench_1,away_bench_2,away_bench_3,away_bench_4,away_bench_5
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,687952,10,682622,682829,666181,656413,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,10,656413,682622,682829,666181,-1,641584,669222,663886,605361,-1,-1
243,663697,670770,668715,10,656413,682622,682829,666181,-1,641584,669222,663886,605361,-1,-1


In [92]:
t[['home_batting_order_' + str(i) for i in range(8)] + ['home_bench_' + str(i) for i in range(6)]].iloc[226:236]

Unnamed: 0,home_batting_order_0,home_batting_order_1,home_batting_order_2,home_batting_order_3,home_batting_order_4,home_batting_order_5,home_batting_order_6,home_batting_order_7,home_bench_0,home_bench_1,home_bench_2,home_bench_3,home_bench_4,home_bench_5
243,680757,608070,647304,10,665926,671289,686823,666310,595956,657656,677588,-1,681807,-1
243,680757,608070,647304,10,665926,671289,686823,666310,595956,657656,677588,-1,681807,-1
243,680757,608070,647304,10,665926,671289,686823,666310,595956,657656,677588,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1
243,680757,608070,647304,10,677588,671289,686823,666310,595956,657656,-1,-1,681807,-1


In [99]:
t[['away_bullpen_' + str(i) for i in range(15)] + ['away_pitcher']].iloc[268:278]

Unnamed: 0,away_bullpen_0,away_bullpen_1,away_bullpen_2,away_bullpen_3,away_bullpen_4,away_bullpen_5,away_bullpen_6,away_bullpen_7,away_bullpen_8,away_bullpen_9,away_bullpen_10,away_bullpen_11,away_bullpen_12,away_bullpen_13,away_bullpen_14,away_pitcher
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,664747,671096,-1,-1,608371
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747
243,594902,682227,622065,650960,683175,-1,664139,-1,571656,571882,-1,-1,671096,-1,-1,664747


In [68]:
t[['home_bullpen_' + str(i) for i in range(15)] + ['home_pitcher']].iloc[199:209]

Unnamed: 0,home_bullpen_0,home_bullpen_1,home_bullpen_2,home_bullpen_3,home_bullpen_4,home_bullpen_5,home_bullpen_6,home_bullpen_7,home_bullpen_8,home_bullpen_9,home_bullpen_10,home_bullpen_11,home_bullpen_12,home_bullpen_13,home_bullpen_14,home_pitcher
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,675540,656529,625643,663986,668909,-1,-1,680704
243,669456,-1,-1,-1,663474,661403,660853,615698,-1,656529,625643,663986,668909,-1,-1,675540
243,669456,-1,-1,-1,663474,661403,660853,615698,-1,656529,625643,663986,668909,-1,-1,675540
243,669456,-1,-1,-1,663474,661403,660853,615698,-1,656529,625643,663986,668909,-1,-1,675540
