In [270]:
import sys
import copy
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [284]:
def formatPlayerName(name):
    try:
        starIndex = name.index("*")
    except ValueError:
        starIndex = sys.maxsize
    try:
        slashIndex = name.index("\\")
    except ValueError:
        slashIndex = sys.maxsize
    return name[:min(starIndex, slashIndex)]
def preprocessWR(data):
    data.columns = data.iloc[0]
    data = data.drop(columns = ["Passing", "Rushing", "Scoring", "Fumbles"])
    data = data.drop([0])
    data.columns = data.iloc[0]
    data = data.drop([1])
    data = data[data.FantPos == "WR"]
    data = data.drop(columns = ["Rk", "GS", "FantPos", "Y/R", "PPR", "DKPt", "VBD", "FDPt", "PosRank", "OvRank", "Tm"])
    data = data[data.G >= 12]
    data = data.dropna()
    data["Player"] = data["Player"].map(formatPlayerName)
    data = data.set_index("Player")
    return data
def createDataset(years):
    yearToOriginalData = {}
    finalData = pd.DataFrame()
    for index, year in enumerate(years): 
        data = preprocessWR(pd.read_excel(str(year) + "NFLData.xlsx"))
        yearToOriginalData[year] = copy.deepcopy(data)
        if(index != 0):
            for name in data.index: 
                if name in yearToOriginalData[year + 1]["FantPt"]:
                    data["FantPt"][name] = yearToOriginalData[year + 1]["FantPt"][name]
                else:
                    data = data.drop(name)
            data.index = data.index.map(lambda name: name + str(year))
            if(finalData.empty):
                finalData = data
            else:
                finalData = finalData.append(data)
    finalData = finalData.astype(int)
    return (finalData, yearToOriginalData[2018])
def normalizeData(data, train_data_stats):
    return (data - train_data_stats['mean'])/train_data_stats['std']

In [291]:
years = reversed(range(2013,2019))
final_data, currentYearData = createDataset(years)
currentYearData.pop("FantPt")
print(final_data)
print(currentYearData)
print(final_data.astype(int).describe())

1                    Age   G  Tgt  Rec   Yds  TD  FantPt
Player                                                  
DeAndre Hopkins2017   25  15  174   96  1378  13     219
Antonio Brown2017     29  14  163  101  1533   9     220
Keenan Allen2017      25  16  159  102  1393   6     163
Tyreek Hill2017       23  15  105   75  1183   7     247
Julio Jones2017       28  16  148   88  1444   3     213
...                  ...  ..  ...  ...   ...  ..     ...
Markus Wheaton2013    22  12   13    6    64   0      78
Denard Robinson2013   23  16    1    0     0   0      91
Brandon Tate2013      26  16    2    1     6   0      25
Eric Weems2013        28  16    2    1     8   0      22
Damaris Johnson2013   24  13    3    2    14   0      41

[397 rows x 7 columns]
1               Age   G  Tgt  Rec   Yds  TD
Player                                     
Tyreek Hill      24  16  137   87  1479  12
Antonio Brown    30  15  168  104  1297  15
Davante Adams    26  15  169  111  1386  13
DeAndre Hopkins

In [292]:
final_labels = final_data.pop("FantPt")
train_data, test_data, train_labels, test_labels = train_test_split(final_data, final_labels, test_size = 0.2)
normalized_train_data = normalizeData(train_data, train_data.astype(int).describe().transpose())
normalized_test_data = normalizeData(test_data, train_data.astype(int).describe().transpose())

In [293]:
model = keras.Sequential([
    layers.Dense(128, activation=tf.nn.relu, input_shape=[len(train_data.keys())]),
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(1, activation='linear')
  ])

model.compile(loss='mean_squared_error',
            optimizer= 'adam',
            metrics=['mean_absolute_error', 'mean_squared_error'])
model.summary()

Model: "sequential_31"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_160 (Dense)            (None, 128)               896       
_________________________________________________________________
dense_161 (Dense)            (None, 256)               33024     
_________________________________________________________________
dense_162 (Dense)            (None, 256)               65792     
_________________________________________________________________
dense_163 (Dense)            (None, 256)               65792     
_________________________________________________________________
dense_164 (Dense)            (None, 1)                 257       
Total params: 165,761
Trainable params: 165,761
Non-trainable params: 0
_________________________________________________________________


In [294]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(normalized_train_data, train_labels, epochs=1000,
                    validation_split = 0.2, verbose=0, batch_size = 32, callbacks=[early_stop])
hist = pd.DataFrame(history.history)
hist.tail()

Unnamed: 0,loss,mean_absolute_error,mean_squared_error,val_loss,val_mean_absolute_error,val_mean_squared_error
27,1333.139703,29.280092,1333.139648,1783.563049,34.364735,1783.562988
28,1315.631075,29.237823,1315.631104,1768.181091,33.68037,1768.181152
29,1462.647794,30.147305,1462.647827,1910.614075,36.057884,1910.614014
30,1409.554164,30.523472,1409.554077,1786.631165,33.575336,1786.631104
31,1352.111563,29.296896,1352.111572,1815.119995,35.172989,1815.119995


In [295]:
loss, mae, mse = model.evaluate(normalized_test_data, test_labels, verbose=0)
print(f'Mean Absolute Error: {mae} Fantasy Points')

Mean Absolute Error: 30.947368621826172 Fantasy Points


In [296]:
currentSeasonPredictions = model.predict(normalizeData(currentYearData, train_data.astype(int).describe().transpose())).flatten()
playerToPrediction = pd.DataFrame()
playerToPrediction['Player'] = currentYearData.index
playerToPrediction['Projected Fantasy Points'] = currentSeasonPredictions
playerToPrediction = playerToPrediction.sort_values(by='Projected Fantasy Points', ascending = False)
playerToPrediction = playerToPrediction.set_index("Player")
playerToPrediction.to_excel("WRFantasyPredictions2019v2.xlsx")