In [1]:
import numpy as np
import pandas as pd
import math
import random
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from preprocessing import *

In [2]:
#We plan to test the following 10 pitchers
pitchers = ['Darvish', 'deGrom', 'Keuchel', 'Porcello', 'Scherzer', 'Lester', 'Verlander', 'Kimbrel', 'Jansen', 'Eovaldi']


In [3]:
#Create dictionary to track scores of every pitcher predicted by Neural Network
scores = {}
for pitcher in pitchers:
    #x, y = get_x_y_fastballs_offspeed(pitcher)
    x, y = get_x_y_categorical(pitcher)
    #Create training and test sets with 80/20 split
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=random.randint(0, 50))

    #Create a validation set from training set with 80/20 split
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=random.randint(0, 50))

    #Turn y outputs into contiguous flattened array
    y_train = y_train.values.ravel()
    y_test = y_test.values.ravel()
    y_valid = y_valid.values.ravel()
    
    
    
    #Function used to create layer counts, used in fine-tuning below
    #ex input (layers = 3, count = 3 returns a list of possible tuple layer combinations [(1,1,1),(1,1,2),(1,1,3) ... (3,3,3)])
    
    def testmatrix(layers, count):
        # layers is the number of total layers in the Neural Network, count is the total number of possible nodes in each layer

        # list of options
        testvector = []

        #Create options based upon different base representations 
        for i in range(count ** layers):
            base_temp = np.base_repr(i, base=count)

            padding = layers - len(base_temp)
            base = np.base_repr(i, base=count, padding=padding)

            if i == 0:
                base = '0' + base

            option = ()
            for i in base:
                value = (int(i) + 1)
                option = option + (value,)
            testvector.append(option)

        return testvector
    
    #Neural Network Fine-Tuning

    #Set Neural Network Fine-Tuning Parameters
    solvers = ['sgd', 'adam', 'lbfgs']
    activations = ['identity', 'logistic', 'tanh', 'relu']
    
    #test all combinations of 3 layers and up to 5 nodes for neural network
    hidden_layers = testmatrix(3,5)
    
    
    best_model = None
    best_score = 0
    best_solv, best_activ, best_layers = None, None, None

    for solv in solvers:
        for activ in activations:
                for layers in hidden_layers:
                    clf = MLPClassifier(solver=solv, activation = activ, hidden_layer_sizes=layers, learning_rate = 'invscaling') #investigate adding alpha
                    clf.fit(X_train.values, y_train)

                    #test model score on validation, if score better than current model becomes best model
                    clf.predict(X_valid)
                    score = clf.score(X_valid, y_valid)

                    #print(solv + ' and ' + activ + ' with ' + str(layers) + ' scored ' + str(score))
                    if (score > best_score):
                        best_model = clf
                        best_score = score
                        best_solv, best_activ, best_layers = solv, activ, layers
                        
    #make predictions with best model
    preds = best_model.predict(X_test)
    corr, tot = 0, 0
    for i in range(len(preds)):
        tot += 1
        if y_test[i] == preds[i]:
            corr += 1
    
    #print out scores for each printer and respective fine-tuning parameters
    print(pitcher, corr/tot, best_solv, best_activ, best_layers)
    scores[pitcher] = (corr/tot, corr, tot, best_solv, best_activ, best_layers)

  exec(code_obj, self.user_global_ns, self.user_ns)


Darvish 0.425531914893617 lbfgs tanh 5


FileNotFoundError: File b'pitches.csv' does not exist

In [None]:
#output scores
scores