# Functions to select best pitch to throw in given baseball situation

In [1]:
# Import statements
import pandas as pd

### Function to combine pitcher and batter outcome histories 

In [2]:
def combine_histories(pitcher_history: pd.DataFrame, batter_history: pd.DataFrame,
                      method: str = 'product', extra_pitches: bool = False) -> pd.DataFrame:
    '''
    Combines outcome histories to produce outcome metrics for pitch selections. Returns pandas dataframe.
    Method argument can be 'product', 'min', or 'max'. Describes how cells are combined to get result.
    Extra pitches specifies if pitches in pitcher's history but not batter's are included.
    Method and extra pitches args default to 'product' and False, respectively.
    '''
    # Check if dataframes requires numeric conversion
    
    # Pitcher
    if pitcher_history.Ball.dtype.kind is 'O': # Object dtype
        pitcher_history = bb_webscraper.perc_to_dec(pitcher_history,ignore_cols='Count')
        
    # Batter
    if batter_history.Ball.dtype.kind is 'O': # Object dtype
        batter_history = bb_webscraper.perc_to_dec(batter_history,ignore_cols='Count')
        
    # Product method
    combined_history = pitcher_history # Placeholder df
    combined_history.iloc[:,1:] = pitcher_history.iloc[:,1:].multiply(batter_history.iloc[:,1:]) # Product
    combined_history.Count += batter_history.Count # Total pitch counts
    combined_history.dropna() # Drop rows/cols where pitch was not in both players' data
    
    # Return resulting dataframe
    return(combined_history)

### Pitch Selector Function

In [3]:
def pitch_selector(pitcher: str, batter: str, balls: int, strikes: int, outs: int, 
                   first: int, second: int, third: int, inning: int = None, season: int = None,
                   runs_scored:int = 0, runs_allowed:int = 0) -> dict:
    '''
    Takes in game state info, calculates best pitch to throw given game state, pitcher history,
    and batter history, then returns dict of best pitch, location, and other important variables.
    First, second, and third should be 1 for occupied, 0 for empty.
    Note: Need to think of how to pass extra parameters to internal functions.
    >>> pitch_selector('Dallas Keuchel', 'Albert Almora', balls=1, strikes=2, outs=1,
                        first=1, second=0, third=0, inning=3, season=2019)['Pitch']
    'Changeup'
    '''
    # Initialize return dict
    results = {}
    
    # Scrape pitcher and batter histories in given game state
    pitcher_history = bb_webscraper.scrape_brooksbb(player=pitcher, batter_hand='R', season=season,
                                                   pitcher_or_batter='pitcher',table_type='po',
                                                   params_dict={'balls':balls,'strikes':strikes,'1b':first,
                                                               '2b':second,'3b':third})
    #print("Pitcher Data: \n", pitcher_history)
    batter_history = bb_webscraper.scrape_brooksbb(player=batter, pitcher_hand='R', season=season,
                                                   pitcher_or_batter='batter',table_type='po',
                                                   params_dict={'balls':balls,'strikes':strikes,'1b':first,
                                                               '2b':second,'3b':third})
    #print("Batter Data: \n", batter_history)
    
    # Create game state df and decide desired outcome
    game_state = pd.DataFrame(data={"Inning":inning, "Outs":outs, "Strikes":strikes, "Balls":balls,
                                    "First":bool(first), "Second":bool(second), "Third":bool(third),
                                    "R":0, "RA":0}, index=[0])
    game_state['Desired Outcome'] = outcomes.desired_outcomes(game_state)
    desired_outcome = game_state.loc[0,'Desired Outcome']
    #print("Desired Outcome:", desired_outcome)
    
    # Combine pitcher and batter histories for outcome metrics
    combined_histories = combine_histories(pitcher_history,batter_history)
    #print("Combined Histories: \n", combined_histories)
    
    # Choose pitch with greatest chance of producign desired result based on combined histories
    best_pitch = combined_histories[desired_outcome].idxmax()
    
    # Choose best pitch location !!!WORK IN PROGRESS!!!
    location = outcomes.pitch_for_outcome().loc[desired_outcome,'Location']
    
    # Fill result dict and return it
    results['Pitch'] = best_pitch
    results['Location'] = location
    results['Desired Outcome'] = desired_outcome
    
    return(results)

In [4]:
# Given game state, i.e. 1 row of info...
# 1. Decide desired outcome
# 2. Retrieve pitcher and batter outcome histories (maybe add pitcher/batter names to df)
# 3. Left join data so only pitches in pitcher's repertoire are considered (pd.merge)
#     a. Make sure data is converted from string percents to decimal values for numerical work
#     b. Maybe multiply dfs elementwise upon merging to combine batter/pitcher probabilities
#         -> e.g. 'Fourseam, Whiffs' is 0.25 for pitcher, 0.4 for batter, 0.1 resulting
#         -> Weighing the multiplication by pitches thrown might be good, but requires 
#            exponentiation and some basic probability concepts. Start without weights for now.
#         -> Maybe taking the min or max is smarter... try prod,weights,min,max and compare? :)
#     c. Note: If we already know the outcome we want, might just need 1 column from each outcome df
# 4. Choose pitch with maximum chances of desired result.