In [69]:
# Set the seed value for the notebook so the results are reproducible

from numpy.random import seed
seed(1)

In [70]:
# import necessary libraries

from tqdm import trange, tqdm_notebook
from time import sleep

import numpy as np

import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification

In [71]:
# load pandas DataFrame
nbaDF = pd.read_csv("nba_data.csv")
nbaDF.head()

Unnamed: 0,Win Shares,Mins Played (per game),Player,Games,Year,Rank,Points(per game),WS/48,Box +/-,Value Overall Replacement Player,Team,College,Years,Mins Played (Total),Points (Total),Class,player_id
0,48.0,30.6,Kenyon Martin,757,2000,1,12.3,0.1,0.7,15.5,NJN,Cincinnati,15,23134,9325,Above Avg,kenyon-martin-1
1,0.3,15.6,A.J. Guyton,80,2000,32,5.5,0.01,-5.4,-1.1,CHI,Indiana,3,1246,442,Below Avg,aj-guyton-1
2,11.6,14.3,Jake Voskuhl,450,2000,33,4.0,0.087,-2.2,-0.3,CHI,UConn,9,6443,1814,Below Avg,jake-voskuhl-1
3,0.6,18.7,Khalid El-Amin,50,2000,34,6.3,0.03,-3.8,-0.4,CHI,UConn,1,936,314,Bust,khalid-el-amin-1
4,-0.1,10.6,Mike Smith,17,2000,35,3.0,-0.033,-5.5,-0.2,WAS,University of Louisiana at Monroe,1,180,51,Bust,mike-smith-1


In [72]:
# Split the x variables (WITHOUT Player, Team, and College Columns) from the y variable

X = nbaDF.drop(["Player", "Team", "College", "player_id", "Class"], axis=1)
y = nbaDF["Class"]
print(f"Number of Rows and Independent(X) Variable: {X.shape}, \n Number of Rows (One y variable): {y.shape}")

Number of Rows and Independent(X) Variable: (829, 12), 
 Number of Rows (One y variable): (829,)


In [73]:
X.head()

Unnamed: 0,Win Shares,Mins Played (per game),Games,Year,Rank,Points(per game),WS/48,Box +/-,Value Overall Replacement Player,Years,Mins Played (Total),Points (Total)
0,48.0,30.6,757,2000,1,12.3,0.1,0.7,15.5,15,23134,9325
1,0.3,15.6,80,2000,32,5.5,0.01,-5.4,-1.1,3,1246,442
2,11.6,14.3,450,2000,33,4.0,0.087,-2.2,-0.3,9,6443,1814
3,0.6,18.7,50,2000,34,6.3,0.03,-3.8,-0.4,1,936,314
4,-0.1,10.6,17,2000,35,3.0,-0.033,-5.5,-0.2,1,180,51


In [74]:
# Label encoding for Dependent(Y) Variable

label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

for label, original_class in zip(encoded_y, y):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Star
Encoded Label: 3
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2

Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Star
Encoded Label: 3
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2

Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded

Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Labe

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Star
Encoded Label: 3
------------
Original Class: Below Avg
Encoded Label: 1
------------
Origina

In [75]:
# One-hot encoding the Dependent(Y) Variable

onehot_y = to_categorical(encoded_y)
onehot_y

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       ...,
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.]], dtype=float32)

In [84]:
# Use train_test_split to create training and testing data

X_train, X_test, y_train, y_test = train_test_split(X, onehot_y, test_size=float(0.2), random_state=1)

In [85]:
# Make sure the dependent variable categories are set up

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [86]:
# Create a StandardScaler model to fit to the training data

X_train_scaled = StandardScaler().fit(X_train)

  return self.partial_fit(X, y)


In [87]:
# Create a normal neural network with 12 inputs (12 independent variables), 6 hidden nodes, and 4 outputs
# input_dim refers to number of independent(X) variables
# units is output shape i.e. number of categories in the Dependent Variable Column
# In this example, final units=4 because Four Rankings: Star, Above Average, Below Average, Bust

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=12))
model.add(Dense(units=6, activation='relu'))
model.add(Dense(units=4, activation='softmax'))
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 6)                 78        
_________________________________________________________________
dense_14 (Dense)             (None, 6)                 42        
_________________________________________________________________
dense_15 (Dense)             (None, 4)                 28        
Total params: 148
Trainable params: 148
Non-trainable params: 0
_________________________________________________________________


In [90]:
# Compile the model

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

AttributeError: 'StandardScaler' object has no attribute 'ndim'

In [None]:
# Evaluate the model using the testing data

model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")