- http://www.baseballprospectus.com/article.php?articleid=15562
- http://www.baseballprospectus.com/article.php?articleid=15532
- https://baseball.physics.illinois.edu/FastPFXGuide.pdf

In [13]:
import pandas as pd
import tqdm
import os

# INFO: https://www.retrosheet.org/boxfile.txt


In [57]:
import pandas as pd
import os
import tqdm

def parse_baseball_game(file_path):
    game_infos = []
    players = []
    plays = []
    comments = []
    substitutions = []
    game_data = []
    batting_lines = []
    pinch_hitting_lines = []
    pinch_running_lines = []
    defensive_lines = []
    pitching_lines = []
    team_lines = []
    team_stats = []
    event_lines = []

    current_game_info = {}
    current_game_id = None

    with open(file_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        parts = line.strip().split(',')
        key = parts[0]

        if key == 'id':
            if current_game_info:
                game_infos.append(current_game_info)
                current_game_info = {}
            current_game_id = parts[1]
            current_game_info['id'] = current_game_id
        elif key == 'version':
            current_game_info['version'] = parts[1]
        elif key == 'info':
            current_game_info[parts[1]] = parts[2]
        elif key == 'start':
            players.append([current_game_id] + parts[1:])
        elif key == 'play':
            plays.append([current_game_id] + parts[1:])
        elif key == 'com':
            comments.append([current_game_id, parts[1]])
        elif key == 'sub':
            substitutions.append([current_game_id] + parts[1:])
        elif key == 'data':
            game_data.append([current_game_id] + parts[1:])
        elif key == 'stat':
            if parts[1] == 'bline':
                batting_lines.append([current_game_id] + parts[2:])
            elif parts[1] == 'phline':
                pinch_hitting_lines.append([current_game_id] + parts[2:])
            elif parts[1] == 'prline':
                pinch_running_lines.append([current_game_id] + parts[2:])
            elif parts[1] == 'dline':
                defensive_lines.append([current_game_id] + parts[2:])
            elif parts[1] == 'pline':
                pitching_lines.append([current_game_id] + parts[2:])
            elif parts[1] == 'tline':
                team_stats.append([current_game_id] + parts[2:])
        elif key == 'line':
            team_lines.append([current_game_id] + parts[1:])
        elif key.startswith('event'):
            event_lines.append([current_game_id] + parts[1:])

    if current_game_info:
        game_infos.append(current_game_info)

    game_info_df = pd.DataFrame(game_infos)
    game_info_df['file_path'] = file_path
    players_df = pd.DataFrame(players, columns=['game_id', 'player_id', 'player_name', 'team', 'batting_order', 'fielding_position'])
    plays_df = pd.DataFrame(plays, columns=['game_id', 'inning', 'team', 'player_id', 'count', 'pitches', 'event'])
    comments_df = pd.DataFrame(comments, columns=['game_id', 'comment'])
    substitutions_df = pd.DataFrame(substitutions, columns=['game_id', 'player_id', 'player_name', 'team', 'batting_order', 'fielding_position'])
    game_data_df = pd.DataFrame(game_data, columns=['game_id', 'stat', 'player_id', 'value'])
    batting_lines_df = pd.DataFrame(batting_lines, columns=['game_id', 'player_id', 'side', 'pos', 'seq', 'ab', 'r', 'h', '2b', '3b', 'hr', 'rbi', 'sh', 'sf', 'hbp', 'bb', 'ibb', 'k', 'sb', 'cs', 'gidp', 'int'])
    pinch_hitting_lines_df = pd.DataFrame(pinch_hitting_lines, columns=['game_id', 'player_id', 'inning', 'side', 'ab', 'r', 'h', '2b', '3b', 'hr', 'rbi', 'sh', 'sf', 'hbp', 'bb', 'ibb', 'k', 'sb', 'cs', 'gidp', 'int'])
    pinch_running_lines_df = pd.DataFrame(pinch_running_lines, columns=['game_id', 'player_id', 'inning', 'side', 'r', 'sb', 'cs'])
    defensive_lines_df = pd.DataFrame(defensive_lines, columns=['game_id', 'player_id', 'side', 'seq', 'pos', 'if', 'po', 'a', 'e', 'dp', 'tp', 'pb'])
    pitching_lines_df = pd.DataFrame(pitching_lines, columns=['game_id', 'player_id', 'side', 'seq', 'ip', 'no_out', 'bfp', 'h', '2b', '3b', 'hr', 'r', 'er', 'bb', 'ibb', 'k', 'hbp', 'wp', 'balk', 'sh', 'sf'])
    team_lines_df = pd.DataFrame(team_lines, columns=['game_id', 'side', 'inn_1', 'inn_2', 'inn_3', 'inn_4', 'inn_5', 'inn_6', 'inn_7', 'inn_8', 'inn_9', 'extras'])
    team_stats_df = pd.DataFrame(team_stats, columns=['game_id', 'side', 'lob', 'er', 'dp', 'tp'])
    event_lines_df = pd.DataFrame(event_lines, columns=['game_id', 'event_type', 'side', 'player_id_1', 'player_id_2', 'player_id_3', 'player_id_4', 'player_id_5'])

    return game_info_df, players_df, plays_df, comments_df, substitutions_df, game_data_df, batting_lines_df, pinch_hitting_lines_df, pinch_running_lines_df, defensive_lines_df, pitching_lines_df, team_lines_df, team_stats_df, event_lines_df

def track_positions(players_df, plays_df, substitutions_df):
    positions = {}

    # Initialize positions with starting players
    for _, player in players_df.iterrows():
        game_id = player['game_id']
        team = player['team']
        position = player['fielding_position']
        player_id = player['player_id']
        
        if game_id not in positions:
            positions[game_id] = {}
        
        if team not in positions[game_id]:
            positions[game_id][team] = {pos: None for pos in range(1, 10)}
        
        positions[game_id][team][int(position)] = player_id

    # Update positions with substitutions
    for _, sub in substitutions_df.iterrows():
        game_id = sub['game_id']
        team = sub['team']
        position = sub['fielding_position']
        player_id = sub['player_id']
        
        if game_id not in positions:
            positions[game_id] = {}
        
        if team not in positions[game_id]:
            positions[game_id][team] = {pos: None for pos in range(1, 10)}
        
        positions[game_id][team][int(position)] = player_id

    return positions

def assign_pitchers_and_positions_to_plays(plays_df, positions):
    plays_with_positions = []

    for _, play in plays_df.iterrows():
        game_id = play['game_id']
        team = play['team']
        inning = play['inning']
        player_id = play['player_id']
        
        # Assign the pitcher's player_id for this play
        pitcher_id = positions[game_id][team].get(1, None)
        play_with_position = play.to_dict()
        play_with_position['pitcher_id'] = pitcher_id
        
        plays_with_positions.append(play_with_position)

    return pd.DataFrame(plays_with_positions)

def main(directory_path):
    all_game_info_df = pd.DataFrame()
    all_players_df = pd.DataFrame()
    all_plays_df = pd.DataFrame()
    all_comments_df = pd.DataFrame()
    all_substitutions_df = pd.DataFrame()
    all_game_data_df = pd.DataFrame()
    all_batting_lines_df = pd.DataFrame()
    all_pinch_hitting_lines_df = pd.DataFrame()
    all_pinch_running_lines_df = pd.DataFrame()
    all_defensive_lines_df = pd.DataFrame()
    all_pitching_lines_df = pd.DataFrame()
    all_team_lines_df = pd.DataFrame()
    all_team_stats_df = pd.DataFrame()
    all_event_lines_df = pd.DataFrame()

    for filename in tqdm.tqdm(os.listdir(directory_path)):
        if filename.endswith('.EVA') or filename.endswith('.EVN'):
            if '2023' in filename:
                file_path = os.path.join(directory_path, filename)
                result = parse_baseball_game(file_path)
                game_info_df, players_df, plays_df, comments_df, substitutions_df, game_data_df, batting_lines_df, pinch_hitting_lines_df, pinch_running_lines_df, defensive_lines_df, pitching_lines_df, team_lines_df, team_stats_df, event_lines_df = result

                all_game_info_df = pd.concat([all_game_info_df, game_info_df], ignore_index=True)
                all_players_df = pd.concat([all_players_df, players_df], ignore_index=True)
                all_plays_df = pd.concat([all_plays_df, plays_df], ignore_index=True)
                all_comments_df = pd.concat([all_comments_df, comments_df], ignore_index=True)
                all_substitutions_df = pd.concat([all_substitutions_df, substitutions_df], ignore_index=True)
                all_game_data_df = pd.concat([all_game_data_df, game_data_df], ignore_index=True)
                all_batting_lines_df = pd.concat([all_batting_lines_df, batting_lines_df], ignore_index=True)
                all_pinch_hitting_lines_df = pd.concat([all_pinch_hitting_lines_df, pinch_hitting_lines_df], ignore_index=True)
                all_pinch_running_lines_df = pd.concat([all_pinch_running_lines_df, pinch_running_lines_df], ignore_index=True)
                all_defensive_lines_df = pd.concat([all_defensive_lines_df, defensive_lines_df], ignore_index=True)
                all_pitching_lines_df = pd.concat([all_pitching_lines_df, pitching_lines_df], ignore_index=True)
                all_team_lines_df = pd.concat([all_team_lines_df, team_lines_df], ignore_index=True)
                all_team_stats_df = pd.concat([all_team_stats_df, team_stats_df], ignore_index=True)
                all_event_lines_df = pd.concat([all_event_lines_df, event_lines_df], ignore_index=True)

    positions = track_positions(all_players_df, all_plays_df, all_substitutions_df)
    all_plays_with_positions_df = assign_pitchers_and_positions_to_plays(all_plays_df, positions)

    return (all_game_info_df, all_players_df, all_plays_with_positions_df, all_comments_df, all_substitutions_df, 
            all_game_data_df, all_batting_lines_df, all_pinch_hitting_lines_df, all_pinch_running_lines_df, 
            all_defensive_lines_df, all_pitching_lines_df, all_team_lines_df, all_team_stats_df, all_event_lines_df)

# Run the main function
directory_path = '../alldata/events/'
result = main(directory_path)
(all_game_info_df, all_players_df, all_plays_with_positions_df, all_comments_df, all_substitutions_df, 
 all_game_data_df, all_batting_lines_df, all_pinch_hitting_lines_df, all_pinch_running_lines_df, 
 all_defensive_lines_df, all_pitching_lines_df, all_team_lines_df, all_team_stats_df, all_event_lines_df) = result

# Print DataFrames to verify
print("All Game Info:")
display(all_game_info_df.head())
print("\nAll Players:")
display(all_players_df.head())
print("\nAll Plays with Positions:")
display(all_plays_with_positions_df.head())
print("\nAll Comments:")
display(all_comments_df.head())
print("\nAll Substitutions:")
display(all_substitutions_df.head())
print("\nAll Game Data:")
display(all_game_data_df.head())
print("\nAll Batting Lines:")
display(all_batting_lines_df.head())
print("\nAll Pinch Hitting Lines:")
display(all_pinch_hitting_lines_df.head())
print("\nAll Pinch Running Lines:")
display(all_pinch_running_lines_df.head())
print("\nAll Defensive Lines:")
display(all_defensive_lines_df.head())
print("\nAll Pitching Lines:")
display(all_pitching_lines_df.head())
print("\nAll Team Lines:")
display(all_team_lines_df.head())
print("\nAll Team Stats:")
display(all_team_stats_df.head())
print("\nAll Event Lines:")
display(all_event_lines_df.head())


100%|██████████| 2578/2578 [00:02<00:00, 1031.73it/s]


All Game Info:


Unnamed: 0,id,version,visteam,hometeam,site,date,number,gametype,starttime,daynight,...,windspeed,fieldcond,precip,sky,timeofgame,attendance,wp,lp,save,file_path
0,MIL202304030,2,NYN,MIL,MIL06,2023/04/03,0,regular,1:10PM,day,...,0,unknown,none,dome,157,42017,peraf001,carrc003,wilsb003,../alldata/events/2023MIL.EVN
1,MIL202304040,2,NYN,MIL,MIL06,2023/04/04,0,regular,6:40PM,night,...,0,unknown,none,dome,137,19412,milew001,schem001,,../alldata/events/2023MIL.EVN
2,MIL202304050,2,NYN,MIL,MIL06,2023/04/05,0,regular,12:40PM,day,...,0,unknown,none,dome,165,18387,willd004,ottaa001,,../alldata/events/2023MIL.EVN
3,MIL202304070,2,SLN,MIL,MIL06,2023/04/07,0,regular,7:10PM,night,...,0,unknown,none,dome,165,28459,woodb005,flahj002,,../alldata/events/2023MIL.EVN
4,MIL202304080,2,SLN,MIL,MIL06,2023/04/08,0,regular,6:10PM,night,...,0,unknown,none,dome,152,43077,montj004,lauee001,,../alldata/events/2023MIL.EVN



All Players:


Unnamed: 0,game_id,player_id,player_name,team,batting_order,fielding_position
0,MIL202304030,nimmb001,"""Brandon Nimmo""",0,1,8
1,MIL202304030,marts002,"""Starling Marte""",0,2,9
2,MIL202304030,lindf001,"""Francisco Lindor""",0,3,6
3,MIL202304030,alonp001,"""Pete Alonso""",0,4,3
4,MIL202304030,mcnej002,"""Jeff McNeil""",0,5,4



All Plays with Positions:


Unnamed: 0,game_id,inning,team,player_id,count,pitches,event,pitcher_id
0,MIL202304030,1,0,nimmb001,30,BBBB,W,guill001
1,MIL202304030,1,0,marts002,2,CF1X,8/F8XD,guill001
2,MIL202304030,1,0,lindf001,32,BBBCF>B,W.1-2,guill001
3,MIL202304030,1,0,alonp001,10,BX,63/G6MS.2-3;1-2,guill001
4,MIL202304030,1,0,mcnej002,0,X,5/P5DF/FL,guill001



All Comments:


Unnamed: 0,game_id,comment
0,MIL202304070,"""$Cardinals challenged (force play)"
1,MIL202304240,"""$Tigers challenged (tag play)"
2,MIL202304280,"""$Brewers challenged (force play)"
3,MIL202304290,"""ej"
4,MIL202304290,"""$Brewers manager Craig Counsell ejected by HP..."



All Substitutions:


Unnamed: 0,game_id,player_id,player_name,team,batting_order,fielding_position
0,MIL202304030,huntt002,"""Tommy Hunter""",0,0,1
1,MIL202304030,wilsb003,"""Bryse Wilson""",1,0,1
2,MIL202304030,escoe001,"""Eduardo Escobar""",0,3,5
3,MIL202304030,guill001,"""Luis Guillorme""",0,9,6
4,MIL202304030,santd003,"""Dennis Santana""",0,0,1



All Game Data:


Unnamed: 0,game_id,stat,player_id,value
0,MIL202304030,er,guill001,0
1,MIL202304030,er,carrc003,5
2,MIL202304030,er,huntt002,5
3,MIL202304030,er,santd003,0
4,MIL202304030,er,peraf001,0



All Batting Lines:


Unnamed: 0,game_id,player_id,side,pos,seq,ab,r,h,2b,3b,...,sh,sf,hbp,bb,ibb,k,sb,cs,gidp,int



All Pinch Hitting Lines:


Unnamed: 0,game_id,player_id,inning,side,ab,r,h,2b,3b,hr,...,sh,sf,hbp,bb,ibb,k,sb,cs,gidp,int



All Pinch Running Lines:


Unnamed: 0,game_id,player_id,inning,side,r,sb,cs



All Defensive Lines:


Unnamed: 0,game_id,player_id,side,seq,pos,if,po,a,e,dp,tp,pb



All Pitching Lines:


Unnamed: 0,game_id,player_id,side,seq,ip,no_out,bfp,h,2b,3b,...,r,er,bb,ibb,k,hbp,wp,balk,sh,sf



All Team Lines:


Unnamed: 0,game_id,side,inn_1,inn_2,inn_3,inn_4,inn_5,inn_6,inn_7,inn_8,inn_9,extras



All Team Stats:


Unnamed: 0,game_id,side,lob,er,dp,tp



All Event Lines:


Unnamed: 0,game_id,event_type,side,player_id_1,player_id_2,player_id_3,player_id_4,player_id_5


In [59]:
# Define the game data
game_data = """
id,PIT202304070
version,2
info,visteam,CHA
info,hometeam,PIT
info,site,PIT08
info,date,2023/04/07
info,number,0
info,gametype,regular
info,starttime,4:12PM
info,daynight,day
info,innings,9
info,tiebreaker,2
info,usedh,true
info,umphome,willr901
info,ump1b,estam901
info,ump2b,diazl901
info,ump3b,fleta901
info,umplf,(none)
info,umprf,(none)
info,inputtime,2023/07/28 12:23:55
info,howscored,unknown
info,pitches,pitches
info,oscorer,patte701
info,temp,53
info,winddir,tocf
info,windspeed,7
info,fieldcond,unknown
info,precip,none
info,sky,cloudy
info,timeofgame,168
info,attendance,39167
info,wp,mored002
info,lp,diekj001
info,save,croww001
start,andet001,"Tim Anderson",0,1,6
start,robel002,"Luis Robert Jr.",0,2,8
start,vauga001,"Andrew Vaughn",0,3,3
start,moncy001,"Yoan Moncada",0,4,5
start,burgj001,"Jake Burger",0,5,10
start,grany001,"Yasmani Grandal",0,6,2
start,andre001,"Elvis Andrus",0,7,4
start,gonzr003,"Romy Gonzalez",0,8,7
start,colao001,"Oscar Colas",0,9,9
start,gioll001,"Lucas Giolito",0,0,1
start,cruzo001,"Oneil Cruz",1,1,6
start,reynb001,"Bryan Reynolds",1,2,7
start,mccua001,"Andrew McCutchen",1,3,10
start,santc002,"Carlos Santana",1,4,3
start,hayek001,"Ke'Bryan Hayes",1,5,5
start,suwij001,"Jack Suwinski",1,6,8
start,joe-c001,"Connor Joe",1,7,9
start,bae-j001,"Ji Hwan Bae",1,8,4
start,delaj004,"Jason Delay",1,9,2
start,hillr001,"Rich Hill",1,0,1
play,1,0,andet001,10,BX,D8/L78XD+
play,1,0,robel002,02,FFX,4/L4MD
play,1,0,vauga001,00,>B,SB3
play,1,0,vauga001,10,>B.X,63/G6.3-H
play,1,0,moncy001,31,BBCBX,53/G5
play,1,1,cruzo001,00,X,8/F8
play,1,1,reynb001,21,CBBX,8/F8XD+
play,1,1,mccua001,00,X,S9/G4
play,1,1,santc002,11,CB1,POCS2(136)
play,2,0,burgj001,02,CSX,D9/F9D+
play,2,0,grany001,32,FBFBBT,K
play,2,0,andre001,12,CBFX,4/P4D
play,2,0,gonzr003,01,CX,8/F8XD+
play,2,1,santc002,22,BCCBFX,HR/F9D
play,2,1,hayek001,21,BCBX,8/F89XD
play,2,1,suwij001,32,CCBBBC,K
play,2,1,joe-c001,32,BBCCBX,D9/L89D
play,2,1,bae-j001,20,B2*BX,D48/G4.2-H
play,2,1,delaj004,12,SSBX,S2/G2-.2-3
play,2,1,cruzo001,02,CCS,K
play,3,0,colao001,12,BFSX,3/G3
play,3,0,andet001,22,BCFBFX,S8/G6M
play,3,0,robel002,01,C1X,HR/F8XD.1-H
play,3,0,vauga001,11,BFX,53/G56
play,3,0,moncy001,01,CX,S9/L89
play,3,0,burgj001,11,BSX,HR/F7D.1-H
play,3,0,grany001,00,X,9/F9S
play,3,1,reynb001,12,BCFFX,S8/L89D+
play,3,1,mccua001,12,*BSCFX,S8/L78D.1-2
play,3,1,santc002,22,SFB*BX,3/L3
play,3,1,hayek001,01,CX,D8/L78XD+.2-H;1-3
play,3,1,suwij001,12,CBTS,K
play,3,1,joe-c001,00,X,63/G6
play,4,0,andre001,12,CBFX,53/G56
play,4,0,gonzr003,00,X,3/P3D
play,4,0,colao001,10,BX,5/P5F/FL
play,4,1,bae-j001,01,CX,S1/BG23
com,"$Pirates challenged (play at 1st), call on the field was overturned."
play,4,1,delaj004,00,X,S9/L9L.1-3
play,4,1,cruzo001,11,S*BX,S6/P8S.3-H;1-2
play,4,1,reynb001,10,BX,HR/F8XD.2-H;1-H
play,4,1,mccua001,10,BX,3/G3
play,4,1,santc002,11,BCX,9/F9
play,4,1,hayek001,01,CX,4/P9LS
play,5,0,andet001,12,SBFX,S7/G56
play,5,0,robel002,20,B11>B1,SB2
play,5,0,robel002,32,B11>B1.BCC.X,HR/F9D.2-H
play,5,0,vauga001,00,,NP
sub,mored002,"Dauri Moreta",1,0,1
play,5,0,vauga001,31,.BCBBX,7/F78D
play,5,0,moncy001,22,CSBFFFBFT,K
play,5,0,burgj001,32,SBBBC.S,K
play,5,1,suwij001,00,,NP
sub,diekj001,"Jake Diekman",0,0,1
play,5,1,suwij001,32,.BFCBBX,43/G4
play,5,1,joe-c001,32,CBFFBBX,D8/L78D
play,5,1,bae-j001,32,BCFBBX,S7/G56.2-3
play,5,1,delaj004,01,CX,S7/G56.3-H;1-2
play,5,1,cruzo001,30,*B*BBB,W.2-3;1-2
play,5,1,reynb001,00,,NP
sub,lambj003,"Jimmy Lambert",0,0,1
play,5,1,reynb001,01,.CX,T9/L9L.3-H;2-H;1-H;B-H(E4/TH)(NR)(UR)
play,5,1,mccua001,32,BCBSBX,63/G6
play,5,1,santc002,12,BCFS,K
play,6,0,grany001,31,BCBBB,W
play,6,0,andre001,20,*BBX,7/F78D
play,6,0,gonzr003,00,X,64(1)/FO/G6.B-1
play,6,0,colao001,10,*BX,7/F7D
play,6,1,hayek001,00,,NP
sub,schoj002,"Jesse Scholtens",0,0,1
play,6,1,hayek001,00,.X,9/F9LS
play,6,1,suwij001,22,BCSBC,K
play,6,1,joe-c001,01,CX,S8/L89
play,6,1,bae-j001,20,BBX,9/F89D
play,7,0,andet001,00,,NP
sub,croww001,"Wil Crowe",1,0,1
play,7,0,andet001,21,.CBBX,8/L8XD+
play,7,0,robel002,02,CSF*S,K
play,7,0,vauga001,31,BBBC*B,W
play,7,0,moncy001,12,CBSX,7/L78D
play,7,1,delaj004,22,CBSBFX,3/G34
play,7,1,cruzo001,31,BTB.BB,W
play,7,1,reynb001,11,BCX,36(1)/FO/G3.B-1
play,7,1,mccua001,22,*BFF*BS,K
play,8,0,burgj001,00,X,13/G1
play,8,0,grany001,32,BFSBBFC,K
play,8,0,andre001,32,CCBFFFBFBX,7/F7D
play,8,1,santc002,32,FBBBFFFB,W
play,8,1,hayek001,02,CCX,S8/L8.1-3
play,8,1,suwij001,22,SBBF>FS,K
play,8,1,joe-c001,21,CB*BX,D9/F89XD+.3-H;1-3
play,8,1,bae-j001,00,,NP
sub,castr006,"Rodolfo Castro",1,7,12
play,8,1,bae-j001,22,.FFBBS,K
play,8,1,delaj004,01,FX,3/G34S-
play,9,0,gonzr003,00,,NP
sub,suwij001,"Jack Suwinski",1,6,9
play,9,0,gonzr003,00,.,NP
sub,castr006,"Rodolfo Castro",1,7,4
play,9,0,gonzr003,00,..,NP
sub,bae-j001,"Ji Hwan Bae",1,8,8
play,9,0,gonzr003,22,...BBCFX,5/P5S-
play,9,0,colao001,10,BX,HR/F78XD
play,9,0,andet001,11,BSX,D7/L7S+
play,9,0,robel002,02,SSX,S8/L8.2-H
play,9,0,vauga001,12,FBCFS,K
play,9,0,moncy001,32,BSBF*B>C,K
data,er,diekj001,4
data,er,schoj002,1
data,er,lambj003,0
data,er,gioll001,7
data,er,mored002,0
data,er,hillr001,7
data,er,croww001,2
"""

# Parse the data
lines = game_data.strip().split('\n')
pitchers = {}

# Store initial starting pitchers
for line in lines:
    if line.startswith('start,'):
        parts = line.split(',')
        player_id = parts[1]
        team = parts[3]
        pos = parts[5]
        if pos == '1':  # Pitcher position
            if team == '0':
                team_name = 'CHA'
            else:
                team_name = 'PIT'
            pitchers[(team_name, 1)] = player_id

# Parse substitutions and play events to find inning changes
current_inning = 1
for line in lines:
    if line.startswith('play,'):
        parts = line.split(',')
        inning = int(parts[1])
        if inning > current_inning:
            current_inning = inning

    if line.startswith('sub,'):
        parts = line.split(',')
        player_id = parts[1]
        team = parts[4]
        if team == '0':
            team_name = 'CHA'
        else:
            team_name = 'PIT'
        
        # Find the next available inning
        for inning in range(current_inning, 10):
            if (team_name, inning) not in pitchers:
                pitchers[(team_name, inning)] = player_id
                break

# Get the pitchers for each inning
def get_pitchers_for_innings(pitchers, innings=9):
    cha_pitchers = []
    pit_pitchers = []

    cha_current_pitcher = pitchers.get(('CHA', 1))
    pit_current_pitcher = pitchers.get(('PIT', 1))

    for inning in range(1, innings + 1):
        cha_pitchers.append(cha_current_pitcher)
        pit_pitchers.append(pit_current_pitcher)

        cha_next_pitcher = pitchers.get(('CHA', inning + 1))
        if cha_next_pitcher:
            cha_current_pitcher = cha_next_pitcher

        pit_next_pitcher = pitchers.get(('PIT', inning + 1))
        if pit_next_pitcher:
            pit_current_pitcher = pit_next_pitcher

    return cha_pitchers, pit_pitchers

# Get the pitchers for each inning
cha_pitchers, pit_pitchers = get_pitchers_for_innings(pitchers)

# Print the results
print("Inning\tWhite Sox Pitcher\tPirates Pitcher")
for inning in range(1, 10):
    print(f"{inning}\t{cha_pitchers[inning-1]}\t{pit_pitchers[inning-1]}")


Inning	White Sox Pitcher	Pirates Pitcher
1	gioll001	hillr001
2	gioll001	hillr001
3	gioll001	hillr001
4	gioll001	hillr001
5	mored002	hillr001
6	diekj001	hillr001
7	lambj003	hillr001
8	schoj002	castr006
9	croww001	suwij001
