In [109]:
import sys
import copy
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [266]:
def formatPlayerName(name):
    try:
        starIndex = name.index("*")
    except ValueError:
        starIndex = sys.maxsize
    try:
        slashIndex = name.index("\\")
    except ValueError:
        slashIndex = sys.maxsize
    return name[:min(starIndex, slashIndex)]
def preprocess(data, position):
    data.columns = data.iloc[0]
    if(position == 'WR' or position == 'TE'):
        categoriesToDrop = ["Passing", "Rushing", "Scoring", "Fumbles"]
        columnsToDrop = ["Rk", "GS", "FantPos", "Y/R", "PPR", "DKPt", "VBD", "FDPt", "PosRank", "OvRank", "Tm"]
    elif(position == 'QB'): 
        categoriesToDrop = ["Receiving", "Rushing", "Scoring", "Fumbles"]
        columnsToDrop = ["Rk", "GS", "FantPos", "PPR", "DKPt", "VBD", "FDPt", "PosRank", "OvRank", "Tm"]
    elif(position == 'RB'): 
        categoriesToDrop = ["Passing", "Scoring", "Fumbles"]
        columnsToDrop = ["Rk", "GS", "FantPos", "Y/R", "Y/A", "PPR", "DKPt", "VBD", "FDPt", "PosRank", "OvRank", "Tm"]
    else:
        raise Exception("Incorrect position passed in as an argument to preprocessRB Function")
    data = data.drop(columns = categoriesToDrop)
    data = data.drop([0])
    data.columns = data.iloc[0]
    data = data.drop([1])
    data = data[data.FantPos == position]
    data = data.drop(columns = columnsToDrop)
    data = data[data.G >= 12]
    data = data.dropna()
    data["Player"] = data["Player"].map(formatPlayerName)
    data = data.set_index("Player")
    if(position == 'RB'):
        data.columns = ['Age', 'G', 'Att', 'Rush Yds', 'Rush TD', 'Tgt', 'Rec', 'Rec Yds', 'Rec TD',
           'FantPt']
    return data
def createDataset(years, position):
    yearToOriginalData = {}
    finalData = pd.DataFrame()
    for index, year in enumerate(years):
        if(position == 'WR' or position == 'TE' or position == 'QB' or position == 'RB'):
            data = preprocess(pd.read_excel(str(year) + "NFLData.xlsx"), position)
        else:
            raise Exception("Incorrect position passed in as an argument to createDataset Function")
        yearToOriginalData[year] = copy.deepcopy(data)
        if(index != 0):
            for name in data.index: 
                if name in yearToOriginalData[year + 1]["FantPt"]:
                    data["FantPt"][name] = yearToOriginalData[year + 1]["FantPt"][name]
                else:
                    data = data.drop(name)
            data.index = data.index.map(lambda name: name + str(year))
            if(position == 'TE'): 
                data = data.head(32) #Limiting the number of tight ends in the dataset (to avg 1 per team) in order to eliminate most blocking tight ends,
                #who are not relevant to fantasy and play a different role than pass catching tight ends, who are relevant to fantasy
            if(finalData.empty):
                finalData = data
            else:
                finalData = finalData.append(data)
    finalData = finalData.astype(int)
    yearToOriginalData[2018].pop("FantPt")
    return (finalData, yearToOriginalData[2018])
def normalizeData(data, train_data_stats):
    return (data - train_data_stats["mean"])/train_data_stats["std"]
def generate_model(shape):
    model = keras.Sequential([
    layers.Dense(256, activation=tf.nn.relu, input_shape=shape),
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(1, activation='linear')
  ])
    return model
def save_to_excel(players, predictions, filename):
    playerToPrediction = pd.DataFrame()
    playerToPrediction['Player'] = players
    playerToPrediction['Projected Fantasy Points'] = predictions
    playerToPrediction = playerToPrediction.sort_values(by='Projected Fantasy Points', ascending = False)
    playerToPrediction = playerToPrediction.set_index("Player")
    playerToPrediction.to_excel(filename)
def train_and_predict(final_data, final_labels, current_year_data, filename, position):
    train_data, test_data, train_labels, test_labels = train_test_split(final_data, final_labels, test_size = 0.2)
    normalized_train_data = normalizeData(train_data, train_data.astype(int).describe().transpose())
    normalized_test_data = normalizeData(test_data, train_data.astype(int).describe().transpose())
    model = generate_model([len(train_data.keys())])
    model.compile(loss='mean_squared_error',
            optimizer= 'adam',
            metrics=['mean_absolute_error', 'mean_squared_error'])
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model.fit(normalized_train_data, train_labels, epochs=1000,
                    validation_split = 0.2, verbose=0, batch_size = 32, callbacks=[early_stop])
    pd.DataFrame(history.history)
    loss, mae, mse = model.evaluate(normalized_test_data, test_labels, verbose=0)
    print(f'{position} Test Data Mean Absolute Error: {mae} Fantasy Points')
    currentSeasonPredictions = model.predict(normalizeData(current_year_data, train_data.astype(int).describe().transpose())).flatten()
    save_to_excel(current_year_data.index, currentSeasonPredictions, filename)

### Generate Datasets

In [267]:
final_data_qb, current_year_data_qb = createDataset(reversed(range(2008,2019)), 'QB')
final_data_rb, current_year_data_rb = createDataset(reversed(range(2013,2019)), 'RB')
final_data_wr, current_year_data_wr = createDataset(reversed(range(2013,2019)), 'WR')
final_data_te, current_year_data_te = createDataset(reversed(range(2008,2019)), 'TE')
final_labels_qb = final_data_qb.pop("FantPt")
final_labels_rb = final_data_rb.pop("FantPt")
final_labels_wr = final_data_wr.pop("FantPt")
final_labels_te = final_data_te.pop("FantPt")

### Train models and generate/save predictions

In [269]:
train_and_predict(final_data_qb, final_labels_qb, current_year_data_qb, "VTQBFantasyPredictions2019.xlsx", "QB")
train_and_predict(final_data_rb, final_labels_rb, current_year_data_rb, "VTRBFantasyPredictions2019.xlsx", "RB")
train_and_predict(final_data_wr, final_labels_wr, current_year_data_wr, "VTWRFantasyPredictions2019.xlsx", "WR")
train_and_predict(final_data_te, final_labels_te, current_year_data_te, "VTTEFantasyPredictions2019.xlsx", "TE")

QB Test Data Mean Absolute Error: 58.55894470214844 Fantasy Points
RB Test Data Mean Absolute Error: 32.076663970947266 Fantasy Points
WR Test Data Mean Absolute Error: 33.7569694519043 Fantasy Points
TE Test Data Mean Absolute Error: 27.81584930419922 Fantasy Points
