# Functions for simulating data to begin pitch predictions

In [1]:
# Import statements
import numpy as np
import pandas as pd
import random as rd

### Dataframe simulator function

In [2]:
def sim_df(options: dict, n: int, weights: dict = None) -> pd.DataFrame:
    '''
    Generates pandas dataframe from given dictionary (d) with n observations.
    Columns are keys of dictionary. Dictionary values should be potential values of associated column.
    Will use weighted choices if weights dictionary is given (same keys as options, values should be tuples).
    '''
    
    # Generate desired dataframe
    sim_dict = {}
    for key in options.keys():
        if weights:
            sim_dict[key] = rd.choices(options[key],weights=weights[key],k=n)
        else:
            sim_dict[key] = rd.choices(options[key],k=n)
        
    return(pd.DataFrame(data=sim_dict))

### Simulating game state dataframe (outs, runs, count, men on base)

In [3]:
def sim_game_state(n: int) -> pd.DataFrame:
    '''
    Return pandas dataframe with n observations of outs, strikes, balls, runs, and baserunners.
    '''
    # Define possible values of game state features
    game_state_options = {"Inning":[1,2,3,4,5,6,7,8,9], "Outs":[0,1,2], "Strikes":[0,1,2], "Balls":[0,1,2,3],
                        "First":[True,False], "Second":[True,False], "Third":[True,False], # Baserunners
                        "RS":[0,1,2,3,4,5,6,7,8,9,10], "RA":[0,1,2,3,4,5,6,7,8,9,10]} #Runs Scored/Allowed
    
    # Generate game state dataframe
    game_state = sim_df(game_state_options,n)
    
    # Return game state dataframe
    return(game_state)

### Simulating pitch choices (type and location)

In [4]:
def sim_pitch_choices(n: int, 
                     pitch_types: list = ["Fastball","Slider","Changeup","Curveball"]) -> pd.DataFrame:
    '''
    Return pandas dataframe with n observations of pitch type and pitch location.
    '''
    # Define possible values of pitch type and location
    # NOTE: "Away" and "In" w.r.t right-handed batter. Only considering righty-righty matchups for now.
    # NOTE: Pitch locations are 8 of the 9 strike zone sections (omits middle-middle). No chase pitches yet.
    pitch_options = {"Pitch": pitch_types, 
                    "Location": ["Up","Up and Away","Up and In","In","Away","Low","Low and Away","Low and In"]}
    
    # Generate pitch choice dataframe
    pitch_choices = sim_df(pitch_options,n)
    
    # Return pitch choice dataframe
    return(pitch_choices)

In [5]:
#test_pitch = sim_pitch_choices(10)
#test_game_state = sim_game_state(10)

In [6]:
#test_pitch

In [7]:
#test_game_state

In [8]:
def sim_batter_reaction(pitch_choice: pd.DataFrame, swing_chances: dict = None) -> pd.Series:
    '''
    Return pandas series with n observations of batter swinging at or taking a given pitch.
    Uses weights if swing_chances dict is given (e.g. likely to swing fastball, take curveball).
    >>> sim_batter_reaction(pitch_choice,{"Fastball":(85,15),"Curveball"})
    '''
    # Define possible values of batter swinging
    batter_options = {"Swing":[True,False]}
    
    # Generate batter reaction series
    n = pitch_choice.shape[0]
    batter_reaction = sim_df(batter_options,n,weights=swing_chances)
    
    # Return batter reaction series
    return(batter_reaction)

### Set decision rules for outcomes and record results (miss, groundball, flyball)

In [9]:
def sim_batter_result(game_state: pd.DataFrame, pitch_choice: pd.DataFrame) -> pd.DataFrame:
    '''
    Return pandas dataframe with n results of a batter's reaction to pitch given the situation.
    NOTE: This function only decides type of contact (swing and miss, groundball, flyball, etc.).
    Another function will take care of calculating the resulting game state.
    '''
    