In [113]:
import pandas as pd
import requests
import numpy as np
import statsapi

In [114]:
url = 'http://statsapi.mlb.com/api/v1/schedule/games/?sportId=1'
r = requests.get(url=url).json()
dates = r['dates'][0]['games']
df = pd.DataFrame(dates)
# df

In [115]:
HOST = 'https://statsapi.mlb.com'
url = HOST + df.loc[0,'link']
r = requests.get(url=url).json()
player_df = pd.DataFrame(r['gameData']['players'])
player_df_all = pd.DataFrame(['players'])
player_df = player_df.T
gameteams_df = pd.DataFrame(r['gameData']['teams'])
# player_df.T

In [116]:
# The cell below holds all of the functions needed to get the final 'dayScore' of each player in the subsequent cell

In [117]:
prevPitchingData = pd.read_csv("22-23 Pitching Data.csv")
prevBattingData = pd.read_csv("22-23 Batting Data.csv")

# Returns 'hits per 9 innings' stat for given pitcher, accounting for the last 3 seasons
def get_L3HP9(player_id):
    player = statsapi.player_stat_data(int(player_id), group= "pitching", type="season", sportId=1)
    num_seasons = 2
    penult_ult_hp9 = 0
    ip = 0
    pitch_df = prevPitchingData.loc[prevPitchingData['player_id'] == player_id]
    if not pitch_df.empty:
        for index, row in pitch_df.iterrows():
            penult_ult_hp9 += float(pitch_df['hit'].iloc[0])
            ip += float(pitch_df['p_formatted_ip'].iloc[0])
            num_seasons += 1
    if ip == 0:
        penult_ult_hp9 = 0
    else:
        penult_ult_hp9 = penult_ult_hp9 / ip * 9
    current = float(player['stats'][0]['stats']['hitsPer9Inn'])
    # current season HP9 multiplied by 2 in order to give greater weight to more recent performance
    return (penult_ult_hp9 + current * 2) / num_seasons


# Fills list of away and home players with the players of each team
def get_player_teams(player_df, away_id, away_players, home_players):
    away_roster = statsapi.roster(away_id)
    for player in player_df['fullName']:
        if away_roster.find(player) != -1:
            away_players.append(player)
        else:
            home_players.append(player)

# Returns the atBats of the given player, accounting for the last 3 seasons
def get_last3avg(player_id):
    player = statsapi.player_stat_data(int(player_id), group= "batting", type="season", sportId=1)
    if player['position'] == 'P' or player['stats'] == []:
        return 0
    num_seasons = 2
    penult_ult_avg = 0
    bat_df = prevBattingData.loc[prevBattingData['player_id'] == player_id]
    if not bat_df.empty:
        for index, row in bat_df.iterrows():
            penult_ult_avg += float(bat_df['batting_avg'].iloc[0])
            num_seasons += 1
    current = float(player['stats'][0]['stats']['avg'])
    # current season avg multiplied by 2 in order to give greater weight to more recent performance
    return (penult_ult_avg + current * 2) / num_seasons

# Returns the batting average of the given player, accounting for the last 3 seasons
def get_last3AB(player_id):
    player = statsapi.player_stat_data(int(player_id), group= "batting", type="season", sportId=1)
    if player['position'] == 'P' or player['stats'] == []:
        return 0
    num_seasons = 2
    penult_ult_atBats = 0
    bat_df = prevBattingData.loc[prevBattingData['player_id'] == player_id]
    if not bat_df.empty:
        for index, row in bat_df.iterrows():
            penult_ult_atBats += float(bat_df['ab'].iloc[0])
            num_seasons += 1
    current = float(player['stats'][0]['stats']['atBats'])
    return (penult_ult_atBats + current * 2) / num_seasons



In [118]:
# The cell below traverses through each game being played today

In [None]:
# Empty list to hold all game dfs
all_player_list = []

# Traverses through each game being played today
for game in range (0, len(df)):
    # Gets the necessary general info for each game (teams, players, etc.)
    url = HOST + df.loc[game,'link']
    r = requests.get(url=url).json()
    s = HOST + '/api/v1/schedule?sportId=1&hydrate=probablePitcher&startDate=2024-07-23&endDate=2024-07-23'
    t = requests.get(url=s).json()
    t_df = pd.DataFrame(t['dates'][0]['games'])
    gameteams_df = pd.DataFrame(r['gameData']['teams'])
    # player_df set equal to all the players for the given game
    player_df = pd.DataFrame(r['gameData']['players'])
    player_df = player_df.T

    # Assigns 'batting avg' stat to every player in player_df
    L3AVG = player_df.id.apply(get_last3avg)
    player_df = player_df.assign(avg=L3AVG.values)
    # Assigns 'at bats' stat to every player in player_df
    L3AB = player_df.id.apply(get_last3AB)
    player_df = player_df.assign(atBats=L3AB.values)
    # Assigns HP9 stat of opposing pitcher to every player in player_df (initially set to 0 here)
    oppHP9 = player_df.id * 0
    player_df = player_df.assign(oppHP9=oppHP9.values)
    

    # Gets team id of away team
    away_id = int(df.T[game]['teams']['away']['team']['id'])

    # Creates empty list to hold (the full names of) the players of each team
    away_players = []
    home_players = []

    # Fills the player list of away and home teams
    get_player_teams(player_df, away_id, away_players, home_players)

    # Gets the HP9 of the away team starting pitcher
    away_pitcher_id = t_df['teams'][game]['away']['probablePitcher']['id']
    away_HP9 = float(get_L3HP9(away_pitcher_id))
    # Gets the HP9 of the home team starting pitcher
    home_pitcher_id = t_df['teams'][game]['home']['probablePitcher']['id']
    home_HP9 = float(get_L3HP9(home_pitcher_id))

    # Assigns HP9 stat of opposing pitcher to every player in player_df
    for player_name in away_players:
        player_df.loc[player_df['fullName'] == player_name, 'oppHP9'] = home_HP9
    for player_name in home_players:
        player_df.loc[player_df['fullName'] == player_name, 'oppHP9'] = away_HP9

    # Adds player_df to the list of all game dfs
    all_player_list.append(player_df)


# Combines all of the game dfs into one df with every player playing today
all_player_df = pd.concat(all_player_list)

# all_player_df.T


In [None]:
# Gets the 'batScore' (hitter's hitting score) stat and assigns it to every player
batScore = all_player_df.avg * all_player_df.atBats / 30
all_player_df = all_player_df.assign(batScore=batScore.values)

# Gets the 'batScore' (hitter's matchup score for today) stat and assigns it to every player
dayScore = all_player_df.batScore + all_player_df.oppHP9
all_player_df = all_player_df.assign(dayScore=dayScore.values)

all_player_df

In [None]:
# Gets the top players for today's batting matchups
valid_picks = all_player_df['dayScore'] > 14.5
top_picks = all_player_df.loc[valid_picks]
# Displays top 10 matchups, with best at the top
all_player_df.sort_values(by = ['dayScore'], ascending = False).head(10)

In [None]:
# dayStatus = all_player_df.fullName + " "
# all_player_df = all_player_df.assign(dayStatus=dayStatus.values)

# for index in range (0, len(all_player_df.T)):
#     if all_player_df['dayScore'].iloc[index] > 16:
#         all_player_df.loc[index, 'dayStatus'] = 'amazing'
#     elif all_player_df['dayScore'].iloc[index] > 15:
#         all_player_df.loc[index, 'dayStatus'] = 'great'
#     elif all_player_df['dayScore'].iloc[index] > 14:
#         all_player_df.loc[index, 'dayStatus'] = 'good'
#     else:
#         all_player_df.loc[index, 'dayStatus'] = 'average'

# all_player_df.T