In [108]:
import numpy as np
import pandas as pd

In [109]:
df = pd.read_csv('pitches.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [110]:
"""
Selects subset of data that applies to given pitcher
"""
def selectPitcher():
    pitcherLastname = input("Enter pitcher last name: ")
    return df[df["Pitcher"] == pitcherLastname]

In [111]:
"""
Selects ngram model where 1 <= n <= 3, with n == 1 being the unigram model and n == 5 being the trigram model
"""
def selectModel(pitcher):
    n = int(input("Enter n (between 1 and 3): "))
    if n == 1:
        model = unigram(pitcher)
    elif n == 2:
        model = bigram(pitcher)
    elif n == 3:
        model = trigram(pitcher)
    else:
        print("Model does not exist")
        raise("Error")
        
    return model, n

In [112]:
"""
The unigram model is completely naive. It computes the frequency a picher throws a fastball or an offspeed.
Using a pitcher's unigram model will return the same prediction everytime, as it will always predict the pitch
type that the pitcher throws more frequently. I.e., if a pitcher throws more fastballs than offspeed pitches,
the unigram model for that pitcher will always predict fastball.
"""
def unigram(pitcher):
    
    reg_fastballs = ["Fastball", "Four-seam FB"]
    moving_fastballs = ["Two-seam FB", "Cutter", "Sinker"]
    
    pitchCounts = {'regular_fastball' : 0,
                   'moving_fastball' : 0,
                   'offspeed' : 0}
    
    totalPitches = 0
    for pitchType in pitcher['Type']:
        if pitchType == '--':
            pass
        
        totalPitches += 1
        if pitchType in reg_fastballs:
            pitchCounts['regular_fastball'] += 1
            
        elif pitchType in moving_fastballs:
            pitchCounts['moving_fastball'] += 1
            
        else:
            pitchCounts['offspeed'] += 1
            
    pitchFrequency = {'regular_fastball' : pitchCounts['regular_fastball'] / totalPitches,
                      'moving_fastball' : pitchCounts['moving_fastball'] / totalPitches,
                      'offspeed' : pitchCounts['offspeed'] / totalPitches}
    
    p1 = 'regular_fastball' if pitchFrequency['regular_fastball'] >= pitchFrequency['moving_fastball'] else 'moving_fastball'
    p2 = p1 if pitchFrequency[p1] >= pitchFrequency['offspeed'] else 'offspeed'
    
    return p2

In [113]:
"""
The bigram function takes in a pitcher's data, and returns a dictionary containing the bigram predictions
"""
def bigram(pitcher):
    reg_fastballs = ["Fastball", "Four-seam FB"]
    moving_fastballs = ["Two-seam FB", "Cutter", "Sinker"]
    
    pitchCounts = {'regular_fastball' : 0,
                   'moving_fastball' : 0,
                   'offspeed' : 0}
    
    bigramCounts = {}
    totalPitches = 0
    prevPitch = ""
    
    for pitchType in pitcher["Type"]:
        
        if pitchType == '--':
            continue
            
        totalPitches += 1
        if pitchType in reg_fastballs:
            pitchCounts['regular_fastball'] += 1
            pitchType = 'regular_fastball'
            
        elif pitchType in moving_fastballs:
            pitchCounts['moving_fastball'] += 1
            pitchType = 'moving_fastball'
            
        else:
            pitchCounts['offspeed'] += 1
            pitchType = 'offspeed'
            
            
        if prevPitch == "":
            prevPitch = pitchType
            continue
            
        else:
            gram = prevPitch + ' ' + pitchType
            
            if gram in bigramCounts.keys():
                bigramCounts[gram] += 1
            else:
                bigramCounts[gram] = 1
                
            prevPitch = pitchType
                
    bigramFrequency = {}
    for gram in bigramCounts.keys():
        firstPitch = gram.split(' ')[0]
        bigramFrequency[gram] = bigramCounts[gram] / pitchCounts[firstPitch]
       
    #The keys are the previous pitch, the values are the prediction
    finalModel = {'moving_fastball' : '',
                  'regular_fastball' : '',
                  'offspeed' : ''}
    
    for pitch in finalModel.keys():
        freq = 0
        pred = ''
        for gram in bigramFrequency.keys():
            if gram.split(' ')[0] == pitch:
                if bigramFrequency[gram] > freq:
                    freq = bigramFrequency[gram]
                    pred = gram.split(' ')[1]
                    
        finalModel[pitch] = pred
        
    return finalModel

In [114]:
"""
The trigram function takes in a pitcher's data, and returns a dictionary containing the trigram predictions
"""
def trigram(pitcher):
    reg_fastballs = ["Fastball", "Four-seam FB"]
    moving_fastballs = ["Two-seam FB", "Cutter", "Sinker"]
    
    pitchCounts = {'regular_fastball' : 0,
                   'moving_fastball' : 0,
                   'offspeed' : 0}
    bigramCounts = {}
    trigramCounts = {}
    prevPitch = ""
    prevPrevPitch = ""
    
    for pitchType in pitcher["Type"]:
        
        if pitchType == '--':
            continue
            
        elif pitchType in reg_fastballs:
            pitchCounts['regular_fastball'] += 1
            pitchType = 'regular_fastball'
            
        elif pitchType in moving_fastballs:
            pitchCounts['moving_fastball'] += 1
            pitchType = 'moving_fastball'
            
        else:
            pitchCounts['offspeed'] += 1
            pitchType = 'offspeed'
            
            
        if prevPitch == "":
            prevPitch = pitchType
            continue
            
        elif prevPrevPitch == "":
            prevPrevPitch = prevPitch
            prevPitch = pitchType
            
        else:
            bigram = prevPitch + ' ' + pitchType
            
            if bigram in bigramCounts.keys():
                bigramCounts[bigram] += 1
            else:
                bigramCounts[bigram] = 1
            
            
            trigram = prevPrevPitch + ' ' + prevPitch + ' ' + pitchType
            
            if trigram in trigramCounts.keys():
                trigramCounts[trigram] += 1
            else:
                trigramCounts[trigram] = 1
            
            prevPrevPitch = prevPitch
            prevPitch = pitchType
    
    trigramFrequency = {}
    for gram in trigramCounts.keys():
        firstTwoPitches = ' '.join(gram.split(' ')[:-1])
        trigramFrequency[gram] = trigramCounts[gram] / bigramCounts[firstTwoPitches]
    
    #The keys are the previous pitch, the values are the prediction
    finalModel = {'moving_fastball moving_fastball' : '',
                  'moving_fastball regular_fastball' : '',
                  'moving_fastball offspeed' : '',
                  'regular_fastball moving_fastball' : '',
                  'regular_fastball regular_fastball' : '',
                  'regular_fastball offspeed' : '',
                  'offspeed moving_fastball' : '',
                  'offspeed regular_fastball' : '',
                  'offspeed offspeed' : ''}
    
    for pitches in finalModel.keys():
        freq = 0
        pred = ''
        for gram in trigramFrequency.keys():
            if ' '.join(gram.split(' ')[:-1]) == pitches:
                if trigramFrequency[gram] > freq:
                    freq = trigramFrequency[gram]
                    pred = gram.split(' ')[2]
                    
        finalModel[pitches] = pred
        
    return finalModel

In [115]:
"""
The useModel function takes in a model (either unigram, bigram, or trigram) and an n (1 for unigram, 2 for bigram, 3 for trigram
and enables someone to use the model
"""
def useModel(model, n):
    
    if n == 1:
        while True:
            prevPitch = input("Previous pitch (regular_fastball | moving_fastball | offspeed): ")
            if prevPitch == 'quit':
                print("Quitting model")
                break
            else:
                print('\n')
                print("Prediction: " + model + '\n')
            
    elif n == 2:
        prevPitch = ''
        while True:
            prevPitch = input("Previous pitch (regular_fastball | moving_fastball | offspeed): ")
            if prevPitch == 'quit':
                print("Quitting model")
                break
            else:
                print('\n')
                print("Prediction: " + model[prevPitch] + '\n')
                
    elif n == 3:
        prevPitch = ''
        prevPrevPitch = ''
        while True:
            prevPrevPitch = prevPitch
            prevPitch = input("Previous pitch (regular_fastball | moving_fastball | offspeed): ")
            if prevPitch == 'quit':
                print("Quitting model")
                break
            if prevPrevPitch == '':
                continue
            else:
                print('\n')
                print('Prediction: ' + model[prevPrevPitch + ' ' + prevPitch] + '\n')

In [116]:
"""
testModel runs makes predictions for all of a pitcher's pitches and returns the accuracy, which we've defined as number of correct
predictions over number of predictions
"""
def testModel(model, n, pitcher):
    reg_fastballs = ["Fastball", "Four-seam FB"]
    moving_fastballs = ["Two-seam FB", "Cutter", "Sinker"]
    
    numPredsCorrect = 0
    totalPitches = 0
    
    if n == 1:
        
        for pitchType in pitcher["Type"]:
            
            if pitchType == '--':
                continue
            elif pitchType in reg_fastballs:
                pitchType = 'regular_fastball'
            elif pitchType in moving_fastballs:
                pitchType = 'moving_fastball'
            else:
                pitchType = 'offspeed'
                
            totalPitches += 1
            
            pred = model
            
            if pred == pitchType:
                numPredsCorrect += 1
                
        accuracy = numPredsCorrect / totalPitches
        return accuracy
    
    elif n == 2:
        prevPitch = ''
        for pitchType in pitcher["Type"]:
            
            if pitchType == '--':
                continue
            elif pitchType in reg_fastballs:
                pitchType = 'regular_fastball'
            elif pitchType in moving_fastballs:
                pitchType = 'moving_fastball'
            else:
                pitchType = 'offspeed'
                
            if prevPitch == '':
                prevPitch = pitchType
                continue
                
            totalPitches += 1
            
            pred = model[prevPitch]
            
            if pred == pitchType:
                numPredsCorrect += 1
                
            prevPitch = pitchType
                
        accuracy = numPredsCorrect / totalPitches
        return accuracy
    
    else:
        prevPitch = ''
        prevPrevPitch = ''
        
        for pitchType in pitcher["Type"]:
        
            if pitchType == '--':
                continue
            elif pitchType in reg_fastballs:
                pitchType = 'regular_fastball'
            elif pitchType in moving_fastballs:
                pitchType = 'moving_fastball'
            else:
                pitchType = 'offspeed'

            if prevPitch == '':
                prevPitch = pitchType
                continue
            elif prevPrevPitch == '':
                prevPrevPitch = prevPitch
                prevPitch = pitchType
                continue

            totalPitches += 1

            pred = model[prevPrevPitch + ' ' + prevPitch]

            if pred == pitchType:
                numPredsCorrect += 1

            prevPrevPitch = prevPitch
            prevPitch = pitchType
        
        accuracy = numPredsCorrect / totalPitches
        return accuracy

In [118]:
def demo():
    pitcher = selectPitcher()
    if pitcher.empty:
        print("No data found for pitcher")
        raise('Pitcher not found')

    model, n = selectModel(pitcher)

    useModel(model, n)

demo()

Enter pitcher last name:  Darvish
Enter n (between 1 and 3):  3
Previous pitch (regular_fastball | moving_fastball | offspeed):  offspeed
Previous pitch (regular_fastball | moving_fastball | offspeed):  offspeed




Prediction: regular_fastball



Previous pitch (regular_fastball | moving_fastball | offspeed):  moving_fastball




Prediction: moving_fastball



Previous pitch (regular_fastball | moving_fastball | offspeed):  quit


Quitting model
