In [51]:
# Set the seed value for the notebook so the results are reproducible

from numpy.random import seed
seed(1)

In [52]:
# import necessary libraries

import warnings
warnings.simplefilter('ignore')

import numpy as np

import pandas as pd

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification

In [53]:
# load pandas DataFrame

pd.options.display.max_columns = None    #shows all columns in dataframe

ncaaDF = pd.read_csv("NCAA_data.csv")
ncaaDF.head()

Unnamed: 0,player_id,name,class,college,assists,blocks,effective_field_goal_percentage,field_goal_percentage,free_throw_attempt_rate,free_throw_percentage,games_played,height,minutes_played,points,steals,three_point_percentage,three_pointers,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,win_shares_per_40_minutes,assists_per_40,blocks_per_40,points_per_40,steals_per_40,three_pointers_per_40,total_rebounds_per_40,turnovers_per_40
0,kenyon-martin-1,Kenyon Martin,Player,cincinnati,142,292,0.587,0.586,0.494,0.581,116,81,2042.0,1279,124,0.222,2.0,869,0.592,14.3,139.0,0.346,2.781587,5.719882,25.053869,2.428991,0.039177,17.022527,2.722821
1,aj-guyton-1,A.J. Guyton,Player,indiana,403,52,0.541,0.455,0.249,0.79,128,73,3260.0,2100,129,0.414,283.0,429,0.572,13.6,218.0,0.145,4.944785,0.638037,25.766871,1.582822,3.472393,5.263804,2.674847
2,jake-voskuhl-1,Jake Voskuhl,Player,uconn,124,193,0.542,0.542,0.555,0.656,138,83,2184.0,863,86,0.329163,0.0,880,0.573,23.0,160.0,0.237,2.271062,3.534799,15.805861,1.575092,0.0,16.117216,2.930403
3,khalid-el-amin-1,Khalid El-Amin,Bust,uconn,479,10,0.486,0.416,0.257,0.822,108,70,2149.0,1650,186,0.355,195.0,319,0.527,15.6,188.0,0.158,8.915775,0.186133,30.711959,3.462075,3.629595,5.937645,3.499302
4,mike-smith-1,Mike Smith,Bust,louisiana monroe,147,42,0.507,0.428,0.357,0.769,55,80,1938.0,1062,90,0.318,130.0,449,0.551,20.0,241.0,0.165,3.034056,0.866873,21.919505,1.857585,2.683179,9.267286,4.9742


In [54]:
# Shuffle the NCAA dataframe

ncaaDF2 = ncaaDF.sample(frac=1).reset_index(drop=True)
ncaaDF2

Unnamed: 0,player_id,name,class,college,assists,blocks,effective_field_goal_percentage,field_goal_percentage,free_throw_attempt_rate,free_throw_percentage,games_played,height,minutes_played,points,steals,three_point_percentage,three_pointers,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,win_shares_per_40_minutes,assists_per_40,blocks_per_40,points_per_40,steals_per_40,three_pointers_per_40,total_rebounds_per_40,turnovers_per_40
0,cheick-diallo-1,Cheick Diallo,Player,kansas,1,23,0.569,0.569,0.466,0.556,27,81,202.0,81,7,0.329163,0.0,68,0.572,19.4,17.0,0.159,0.198020,4.554455,16.039604,1.386139,0.000000,13.465347,3.366337
1,hassan-adams-1,Hassan Adams,Bust,arizona,259,85,0.524,0.499,0.230,0.704,130,76,3609.0,1818,238,0.311000,75.0,706,0.545,13.3,255.0,0.168,2.870601,0.942089,20.149626,2.637850,0.831255,7.824882,2.826268
2,tyler-honeycutt-1,Tyler Honeycutt,Bust,ucla,163,98,0.497,0.431,0.346,0.684,59,81,1873.0,608,68,0.359000,65.0,407,0.528,22.1,163.0,0.111,3.481046,2.092899,12.984517,1.452216,1.388147,8.691938,3.481046
3,trevor-booker-1,Trevor Booker,Player,clemson,224,249,0.569,0.559,0.428,0.624,134,79,3806.0,1725,144,0.324000,23.0,1060,0.584,15.0,260.0,0.250,2.354178,2.616921,18.129270,1.513400,0.241724,11.140305,2.732528
4,jason-smith-1,Jason Smith,Player,colorado state,173,149,0.553,0.548,0.578,0.751,87,84,2429.0,1281,47,0.308000,8.0,683,0.604,20.1,266.0,0.172,2.848909,2.453685,21.095101,0.773981,0.131741,11.247427,4.380403
5,noah-vonleh-1,Noah Vonleh,Player,indiana,18,41,0.560,0.523,0.620,0.716,30,82,794.0,338,26,0.485000,16.0,269,0.604,18.6,64.0,0.182,0.906801,2.065491,17.027708,1.309824,0.806045,13.551637,3.224181
6,josh-okogie-1,Josh Okogie,Player,georgia tech,120,50,0.481,0.437,0.539,0.777,61,76,2014.0,1033,90,0.382000,66.0,351,0.550,12.8,138.0,0.157,2.383317,0.993049,20.516385,1.787488,1.310824,6.971202,2.740814
7,jordan-farmar-1,Jordan Farmar,Player,ucla,342,12,0.478,0.410,0.369,0.760,66,74,2122.0,881,82,0.333000,97.0,196,0.526,22.9,248.0,0.122,6.446748,0.226202,16.606975,1.545712,1.828464,3.694628,4.674835
8,pascal-siakam-1,Pascal Siakam,Player,new mexico state,103,136,0.553,0.551,0.424,0.711,68,81,2224.0,1126,62,0.176000,3.0,657,0.586,12.0,131.0,0.246,1.852518,2.446043,20.251799,1.115108,0.053957,11.816547,2.356115
9,meyers-leonard-1,Meyers Leonard,Player,illinois,48,73,0.569,0.567,0.413,0.729,65,85,1290.0,502,20,0.083000,1.0,303,0.601,18.4,94.0,0.149,1.488372,2.263566,15.565891,0.620155,0.031008,9.395349,2.914729


In [55]:
# Split the x variables (WITHOUT Player, Team, and College Columns) from the y variable

X = ncaaDF2.drop(["player_id",
                 "name",
                 "class", 
                 "college", 
                 "assists", 
                 "blocks", 
                 "effective_field_goal_percentage",
                 "field_goal_percentage",
                 "free_throw_attempt_rate",
                 "free_throw_percentage",
                 "height", 
                 #"points", 
                 "steals", 
                 "three_point_percentage",
                 "three_pointers",
                 "total_rebounds", 
                 "turnover_percentage",
                 "turnovers"
                ], axis=1)
y = ncaaDF2["class"]
print(f"Number of Rows and Independent(X) Variable: {X.shape}, \n Number of Rows (One Dependent(Y) variable): {y.shape}")

Number of Rows and Independent(X) Variable: (853, 12), 
 Number of Rows (One Dependent(Y) variable): (853,)


In [56]:
# # scikit-learn k-fold cross-validation (making multiple training datasets)

# kfold = KFold(5, True, 1)


# # This works for pandas dataframe giving one train-test split
# result = next(kfold.split(X), None)

# X_train = X.iloc[result[0]]
# X_test = X.iloc[result[1]]

In [57]:
# Label encoding for Dependent(Y) Variable

label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

for label, original_class in zip(encoded_y, y):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Origi

------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 0
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label: 1
------------
Original Class: Player
Encoded Label:

In [58]:
# Use train_test_split to create training and testing data

X_train, X_test, y_train, y_test = train_test_split(X, encoded_y, test_size=float(0.2), random_state=1)

In [59]:
X_scaler = StandardScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [60]:
#### Create a normal neural network with 12 inputs (12 independent variables), 6 hidden nodes, and 4 outputs
# input_dim refers to number of independent(X) variables
# units is output shape i.e. number of categories in the Dependent Variable Column
# In this example, final units=4 because Four Rankings: Star, Above Average, Below Average, Bust

model = Sequential()
model.add(Dense(units=30, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(units=25, activation='relu'))
model.add(Dense(units=20, activation='relu'))
model.add(Dense(units=len(y.unique()), activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 30)                390       
_________________________________________________________________
dense_5 (Dense)              (None, 25)                775       
_________________________________________________________________
dense_6 (Dense)              (None, 20)                520       
_________________________________________________________________
dense_7 (Dense)              (None, 2)                 42        
Total params: 1,727
Trainable params: 1,727
Non-trainable params: 0
_________________________________________________________________


In [61]:
# Compile the model

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy']
             )

In [62]:
# Fit the model

model.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    batch_size=10,
    shuffle=True,
    verbose=2
)

Epoch 1/100
 - 0s - loss: 0.6145 - acc: 0.7155
Epoch 2/100
 - 0s - loss: 0.5611 - acc: 0.7258
Epoch 3/100
 - 0s - loss: 0.5459 - acc: 0.7302
Epoch 4/100
 - 0s - loss: 0.5348 - acc: 0.7346
Epoch 5/100
 - 0s - loss: 0.5266 - acc: 0.7331
Epoch 6/100
 - 0s - loss: 0.5200 - acc: 0.7375
Epoch 7/100
 - 0s - loss: 0.5129 - acc: 0.7390
Epoch 8/100
 - 0s - loss: 0.5064 - acc: 0.7434
Epoch 9/100
 - 0s - loss: 0.5024 - acc: 0.7493
Epoch 10/100
 - 0s - loss: 0.4941 - acc: 0.7551
Epoch 11/100
 - 0s - loss: 0.4898 - acc: 0.7537
Epoch 12/100
 - 0s - loss: 0.4822 - acc: 0.7581
Epoch 13/100
 - 0s - loss: 0.4787 - acc: 0.7507
Epoch 14/100
 - 0s - loss: 0.4701 - acc: 0.7625
Epoch 15/100
 - 0s - loss: 0.4644 - acc: 0.7669
Epoch 16/100
 - 0s - loss: 0.4573 - acc: 0.7742
Epoch 17/100
 - 0s - loss: 0.4543 - acc: 0.7683
Epoch 18/100
 - 0s - loss: 0.4421 - acc: 0.7903
Epoch 19/100
 - 0s - loss: 0.4340 - acc: 0.7977
Epoch 20/100
 - 0s - loss: 0.4296 - acc: 0.8050
Epoch 21/100
 - 0s - loss: 0.4187 - acc: 0.8079
E

<tensorflow.python.keras.callbacks.History at 0x22d1f6836d8>

In [63]:
# Save the model

model.save("Draft_Machine_NeuralNetwork.h5")

In [64]:
# Evaluate the model using the testing data

model = load_model("Draft_Machine_NeuralNetwork.h5")
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

 - 0s - loss: 2.1173 - acc: 0.6257
Loss: 2.1173416477197793, Accuracy: 0.6257309913635254
