In [3]:
import pandas as pd
import requests
import numpy as np
import statsapi

In [4]:
url = 'http://statsapi.mlb.com/api/v1/schedule/games/?sportId=1'
r = requests.get(url=url).json()
dates = r['dates'][0]['games']
df = pd.DataFrame(dates)
# df

In [5]:
HOST = 'https://statsapi.mlb.com'
url = HOST + df.loc[0,'link']
r = requests.get(url=url).json()
player_df = pd.DataFrame(r['gameData']['players'])
player_df_all = pd.DataFrame(['players'])
player_df = player_df.T
gameteams_df = pd.DataFrame(r['gameData']['teams'])
# player_df.T

In [6]:
# The cell below holds all of the functions needed to get the final 'dayScore' of each player in the subsequent cell

In [31]:
prevPitchingData = pd.read_csv("22-23 Pitching Data.csv")
prevBattingData = pd.read_csv("22-23 Batting Data.csv")

# Returns 'hits per 9 innings' stat for given pitcher, accounting for the last 3 seasons
def get_L3HP9(player_id):
    player = statsapi.player_stat_data(int(player_id), group= "pitching", type="season", sportId=1)
    num_seasons = 2
    penult_ult_hp9 = 0
    ip = 0
    pitch_df = prevPitchingData.loc[prevPitchingData['player_id'] == player_id]
    if not pitch_df.empty:
        for index, row in pitch_df.iterrows():
            penult_ult_hp9 += float(pitch_df['hit'].iloc[0])
            ip += float(pitch_df['p_formatted_ip'].iloc[0])
            num_seasons += 1
    if ip == 0:
        penult_ult_hp9 = 0
    else:
        penult_ult_hp9 = penult_ult_hp9 / ip * 9
    current = float(player['stats'][0]['stats']['hitsPer9Inn'])
    # current season HP9 multiplied by 2 in order to give greater weight to more recent performance
    return (penult_ult_hp9 + current * 2) / num_seasons


# Fills list of away and home players with the players of each team
def get_player_teams(player_df, away_id, away_players, home_players):
    away_roster = statsapi.roster(away_id)
    for player in player_df['fullName']:
        if away_roster.find(player) != -1:
            away_players.append(player)
        else:
            home_players.append(player)

# Returns the batScore of the given player, accounting for the last 3 seasons (using avg and atBats)
def get_batScore(player_id):
    player = statsapi.player_stat_data(int(player_id), group= "batting", type="season", sportId=1)
    if player['position'] == 'P' or player['stats'] == []:
        return 0
    num_seasons = 2
    penult_ult_avg = 0
    penult_ult_atBats = 0
    penult_ult_games = 0
    bat_df = prevBattingData.loc[prevBattingData['player_id'] == player_id]
    if not bat_df.empty:
        for index, row in bat_df.iterrows():
            penult_ult_avg += float(bat_df['batting_avg'].iloc[0])
            penult_ult_atBats += float(bat_df['ab'].iloc[0])
            penult_ult_games += float(bat_df['b_game'].iloc[0])
            num_seasons += 1
    
    currentSAVG = float(player['stats'][0]['stats']['avg'])
    # current season avg multiplied by 2 in order to give greater weight to more recent performance
    average = (penult_ult_avg + currentSAVG * 2) / num_seasons
    
    currentSAB = float(player['stats'][0]['stats']['atBats'])
    # current season atBats multiplied by 2 in order to give greater weight to more recent performance
    atBats = (penult_ult_atBats + currentSAB * 2) / num_seasons

    currentG = float(player['stats'][0]['stats']['gamesPlayed'])
    # current season atBats multiplied by 2 in order to give greater weight to more recent performance
    games = (penult_ult_games + currentG * 2) / num_seasons

    atBatsPG = atBats / games

    return average * atBatsPG / .956

In [18]:
# The cell below traverses through each game being played today

In [32]:
# Empty list to hold all game dfs
all_player_list = []

# Traverses through each game being played today
for game in range (0, len(df)):
    # Gets the necessary general info for each game (teams, players, etc.)
    url = HOST + df.loc[game,'link']
    r = requests.get(url=url).json()
    s = HOST + '/api/v1/schedule?sportId=1&hydrate=probablePitcher&startDate=2024-07-23&endDate=2024-07-23'
    t = requests.get(url=s).json()
    t_df = pd.DataFrame(t['dates'][0]['games'])
    gameteams_df = pd.DataFrame(r['gameData']['teams'])
    # player_df set equal to all the players for the given game
    player_df = pd.DataFrame(r['gameData']['players'])
    player_df = player_df.T

    # Assigns 'bat score' stat to every player in player_df
    bScore = player_df.id.apply(get_batScore)
    player_df = player_df.assign(batScore=bScore.values)
    # Assigns HP9 stat of opposing pitcher to every player in player_df (initially set to 0 here)
    oppHP9 = player_df.id * 0
    player_df = player_df.assign(oppHP9=oppHP9.values)
    

    # Gets team id of away team
    away_id = int(df.T[game]['teams']['away']['team']['id'])

    # Creates empty list to hold (the full names of) the players of each team
    away_players = []
    home_players = []

    # Fills the player list of away and home teams
    get_player_teams(player_df, away_id, away_players, home_players)

    # Gets the HP9 of the away team starting pitcher
    away_pitcher_id = t_df['teams'][game]['away']['probablePitcher']['id']
    away_HP9 = float(get_L3HP9(away_pitcher_id)) / 8.344
    # Gets the HP9 of the home team starting pitcher
    home_pitcher_id = t_df['teams'][game]['home']['probablePitcher']['id']
    home_HP9 = float(get_L3HP9(home_pitcher_id)) / 8.344

    # Assigns HP9 stat of opposing pitcher to every player in player_df
    for player_name in away_players:
        player_df.loc[player_df['fullName'] == player_name, 'oppHP9'] = home_HP9
    for player_name in home_players:
        player_df.loc[player_df['fullName'] == player_name, 'oppHP9'] = away_HP9

    # Adds player_df to the list of all game dfs
    all_player_list.append(player_df)


# Combines all of the game dfs into one df with every player playing today
all_player_df = pd.concat(all_player_list)

# all_player_df.T


In [33]:
# Gets the 'batScore' (hitter's matchup score for today) stat and assigns it to every player
dayScore = all_player_df.batScore + all_player_df.oppHP9 * 2
all_player_df = all_player_df.assign(dayScore=dayScore.values)

all_player_df

Unnamed: 0,id,fullName,link,firstName,lastName,primaryNumber,birthDate,currentAge,birthCity,birthStateProvince,...,fullLFMName,strikeZoneTop,strikeZoneBottom,pronunciation,nameMatrilineal,batScore,oppHP9,nameTitle,nameSuffix,dayScore
ID542888,542888,Shawn Armstrong,/api/v1/people/542888,Shawn,Armstrong,30,1990-09-11,33,New Bern,NC,...,"Armstrong, Shawn Michael",3.467,1.589,,,0.000000,1.133917,,,2.267834
ID575929,575929,Willson Contreras,/api/v1/people/575929,Willson,Contreras,40,1992-05-13,32,Puerto Cabello,,...,"Contreras, Willson Eduardo",3.51,1.59,,,0.966044,1.133917,,,3.233878
ID571448,571448,Nolan Arenado,/api/v1/people/571448,Nolan,Arenado,28,1991-04-16,33,Newport Beach,CA,...,"Arenado, Nolan James",3.44,1.66,R-en-NAH-do,,1.099927,1.133917,,,3.367761
ID621550,621550,Patrick Wisdom,/api/v1/people/621550,Patrick,Wisdom,16,1991-08-27,32,Murrieta,CA,...,"Wisdom, Patrick Ian-Cashel",3.43,1.57,,,0.630857,0.733461,,,2.097779
ID684007,684007,Shota Imanaga,/api/v1/people/684007,Shota,Imanaga,18,1993-09-01,30,Kitakyushu,,...,"Imanaga, Shota",3.301,1.504,,,0.000000,0.733461,,,1.466922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ID593871,593871,Jorge Polanco,/api/v1/people/593871,Jorge,Polanco,7,1993-07-05,31,San Pedro de Macoris,,...,"Polanco, Jorge Luis",3.4,1.65,,Pacheco,0.773342,0.776104,,,2.325551
ID607208,607208,Trea Turner,/api/v1/people/607208,Trea,Turner,7,1993-06-30,31,Boynton Beach,FL,...,"Turner, Trea Vance",3.52,1.73,,,1.325442,0.8262,,,2.977843
ID614179,614179,José Ruiz,/api/v1/people/614179,José,Ruiz,66,1994-10-21,29,Guacara,,...,"Ruiz, José Rafael",3.411,1.565,,Aparicio,0.000000,0.8262,,,1.652401
ID613564,613564,Jason Vosler,/api/v1/people/613564,Jason,Vosler,35,1993-09-06,30,West Nyack,NY,...,"Vosler, Jason Glenn",3.3,1.52,voss-LURR,,0.745965,0.776104,,,2.298174


In [34]:
# Gets the top players for today's batting matchups
valid_picks = all_player_df['dayScore'] > 14.5
top_picks = all_player_df.loc[valid_picks]
# Displays top 10 matchups, with best at the top
all_player_df.sort_values(by = ['dayScore'], ascending = False).head(10)

Unnamed: 0,id,fullName,link,firstName,lastName,primaryNumber,birthDate,currentAge,birthCity,birthStateProvince,...,fullLFMName,strikeZoneTop,strikeZoneBottom,pronunciation,nameMatrilineal,batScore,oppHP9,nameTitle,nameSuffix,dayScore
ID660766,660766,Juan Yepez,/api/v1/people/660766,Juan,Yepez,18,1998-02-19,26,Caracas,,...,"Yepez, Juan David",3.43,1.66,,Alvarez,1.378138,1.397411,,,4.172961
ID683734,683734,Andrew Vaughn,/api/v1/people/683734,Andrew,Vaughn,25,1998-04-03,26,Santa Rosa,CA,...,"Vaughn, Andrew Clayton",3.2,1.49,,,1.000806,1.559204,,,4.119214
ID650859,650859,Luis Rengifo,/api/v1/people/650859,Luis,Rengifo,2,1997-02-26,27,Naguanagua,,...,"Rengifo, Luis Jose",3.18,1.45,ren-HE-foe,Canate,1.131975,1.478907,,,4.089789
ID643217,643217,Andrew Benintendi,/api/v1/people/643217,Andrew,Benintendi,23,1994-07-06,30,Cincinnati,OH,...,"Benintendi, Andrew Sebastian",3.3,1.52,,,0.95039,1.559204,,,4.068798
ID673357,673357,Luis Robert Jr.,/api/v1/people/673357,Luis,Robert,88,1997-08-03,26,Guantanamo,,...,"Robert Jr., Luis",3.87,1.83,RAH-bert,Moirant,0.929363,1.559204,Jr.,Jr.,4.047771
ID681351,681351,Logan O'Hoppe,/api/v1/people/681351,Logan,O'Hoppe,14,2000-02-09,24,West Islip,NY,...,"O'Hoppe, Logan",3.34,1.5,oh-HOP-ee,,1.036127,1.478907,,,3.993941
ID672820,672820,Lenyn Sosa,/api/v1/people/672820,Lenyn,Sosa,50,2000-01-25,24,Puerto Ordaz,,...,"Sosa, Lenyn Jose",3.33,1.63,,Salazar,0.832016,1.559204,,,3.950425
ID670032,670032,Nicky Lopez,/api/v1/people/670032,Nicholas,Lopez,8,1995-03-13,29,Naperville,IL,...,"Lopez, Nicholas",3.01,1.35,,,0.79337,1.559204,,,3.911778
ID621493,621493,Taylor Ward,/api/v1/people/621493,Joseph,Ward,3,1993-12-14,30,Dayton,OH,...,"Ward, Joseph Taylor",3.34,1.58,,,0.943506,1.478907,,,3.90132
ID694384,694384,Nolan Schanuel,/api/v1/people/694384,Nolan,Schanuel,18,2002-02-14,22,Boca Raton,FL,...,"Schanuel, Nolan Ryan",3.4,1.65,,,0.933862,1.478907,,,3.891676


In [None]:
# dayStatus = all_player_df.fullName + " "
# all_player_df = all_player_df.assign(dayStatus=dayStatus.values)

# for index in range (0, len(all_player_df.T)):
#     if all_player_df['dayScore'].iloc[index] > 16:
#         all_player_df.loc[index, 'dayStatus'] = 'amazing'
#     elif all_player_df['dayScore'].iloc[index] > 15:
#         all_player_df.loc[index, 'dayStatus'] = 'great'
#     elif all_player_df['dayScore'].iloc[index] > 14:
#         all_player_df.loc[index, 'dayStatus'] = 'good'
#     else:
#         all_player_df.loc[index, 'dayStatus'] = 'average'

# all_player_df.T