# Initial Data Setup

In [2]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import tensorflow as tf
import keras_tuner as kt

In [3]:
# Files to Import
all_nba_file = Path('Resources/End of Season Teams.csv')
all_star_file = Path('Resources/All-Star Selections.csv')
stats_file = Path('Resources/Player Per Game.csv')

# Set up DataFrames
all_nba_df = pd.read_csv(all_nba_file)
all_star_df = pd.read_csv(all_star_file)
stats_df = pd.read_csv(stats_file)

In [4]:
# Review DataFrames
all_nba_df.head()

Unnamed: 0,season,lg,type,number_tm,player,position,seas_id,player_id,birth_year,tm,age
0,2022,NBA,All-Defense,1st,Giannis Antetokounmpo,,29918,4164,,MIL,27
1,2022,NBA,All-Defense,1st,Jaren Jackson Jr.,,29991,4632,,MEM,22
2,2022,NBA,All-Defense,1st,Marcus Smart,,30183,4275,,BOS,27
3,2022,NBA,All-Defense,1st,Mikal Bridges,,30205,4658,,PHO,25
4,2022,NBA,All-Defense,1st,Rudy Gobert,,30313,4199,,UTA,29


In [5]:
all_star_df.head()

Unnamed: 0,player,team,lg,season,replaced
0,Bam Adebayo,East,NBA,2024,False
1,Giannis Antetokounmpo,East,NBA,2024,False
2,Paolo Banchero,East,NBA,2024,False
3,Scottie Barnes,East,NBA,2024,False
4,Jaylen Brown,East,NBA,2024,False


In [6]:
stats_df.head()

Unnamed: 0,seas_id,season,player_id,player,birth_year,pos,age,experience,lg,tm,...,ft_percent,orb_per_game,drb_per_game,trb_per_game,ast_per_game,stl_per_game,blk_per_game,tov_per_game,pf_per_game,pts_per_game
0,31136,2024,5025,A.J. Green,,SG,24.0,2,NBA,MIL,...,1.0,0.2,0.9,1.0,0.5,0.1,0.1,0.1,0.9,4.3
1,31137,2024,5026,A.J. Lawson,,SG,23.0,2,NBA,DAL,...,0.632,0.4,0.8,1.2,0.5,0.3,0.1,0.4,0.7,3.8
2,31138,2024,5027,AJ Griffin,,SF,20.0,2,NBA,ATL,...,1.0,0.1,0.7,0.8,0.2,0.1,0.1,0.3,0.3,2.1
3,31139,2024,4219,Aaron Gordon,,PF,28.0,10,NBA,DEN,...,0.652,2.4,4.1,6.5,3.2,0.9,0.7,1.5,1.9,13.9
4,31140,2024,4582,Aaron Holiday,,PG,27.0,6,NBA,HOU,...,0.889,0.3,1.4,1.7,1.8,0.5,0.1,0.8,1.6,7.0


# Data Cleaning

In [7]:
# Dropping unneeded columns
all_nba_clean = all_nba_df.drop(columns=['lg','number_tm','position','seas_id','player_id','birth_year','tm','age'])

# Getting dummy columns and combining the players seasons so that their multiple awards are shown
all_nba_pivot = pd.pivot_table(all_nba_clean, index=['season','player'], columns='type', aggfunc=lambda x:'Yes').fillna('No')

# Dropping the old awards for defunct leagues
all_nba_pivot_clean = all_nba_pivot.drop(columns=['All-ABA','All-BAA'])
all_nba_pivot_clean.tail(25)

Unnamed: 0_level_0,type,All-Defense,All-NBA,All-Rookie
season,player,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,Evan Mobley,No,No,Yes
2022,Franz Wagner,No,No,Yes
2022,Giannis Antetokounmpo,Yes,Yes,No
2022,Herbert Jones,No,No,Yes
2022,Ja Morant,No,Yes,No
2022,Jalen Green,No,No,Yes
2022,Jaren Jackson Jr.,Yes,No,No
2022,Jayson Tatum,No,Yes,No
2022,Joel Embiid,No,Yes,No
2022,Josh Giddey,No,No,Yes


In [8]:
# Dropping unneeded columns
all_star_clean = all_star_df.drop(columns=['team','lg','replaced'])
all_star_clean['All Star'] = 'Yes'
all_star_clean.head()

Unnamed: 0,player,season,All Star
0,Bam Adebayo,2024,Yes
1,Giannis Antetokounmpo,2024,Yes
2,Paolo Banchero,2024,Yes
3,Scottie Barnes,2024,Yes
4,Jaylen Brown,2024,Yes


In [9]:
# Merging
first_merged_df = pd.merge(stats_df, all_nba_pivot_clean, on=['season','player'], how='left')
merged_df = pd.merge(first_merged_df, all_star_clean, on=['season','player'], how='left')

merged_df = merged_df[merged_df['lg'] == 'NBA']
merged_df.drop(columns=['season','player','pos','seas_id','player_id','birth_year','lg','tm','fg_per_game','fga_per_game','x3p_per_game','x2p_per_game',
'x2pa_per_game','x2p_percent','ft_per_game'], inplace=True)
merged_df.head()

Unnamed: 0,age,experience,g,gs,mp_per_game,fg_percent,x3pa_per_game,x3p_percent,e_fg_percent,fta_per_game,...,ast_per_game,stl_per_game,blk_per_game,tov_per_game,pf_per_game,pts_per_game,All-Defense,All-NBA,All-Rookie,All Star
0,24.0,2,39,0.0,9.2,0.438,2.8,0.423,0.621,0.2,...,0.5,0.1,0.1,0.1,0.9,4.3,,,,
1,23.0,2,28,0.0,8.3,0.471,1.4,0.325,0.547,0.7,...,0.5,0.3,0.1,0.4,0.7,3.8,,,,
2,20.0,2,18,0.0,7.3,0.289,1.8,0.273,0.389,0.1,...,0.2,0.1,0.1,0.3,0.3,2.1,,,,
3,28.0,10,54,54.0,31.5,0.557,1.8,0.293,0.585,3.7,...,3.2,0.9,0.7,1.5,1.9,13.9,,,,
4,27.0,6,56,1.0,17.3,0.455,3.0,0.41,0.565,0.8,...,1.8,0.5,0.1,0.8,1.6,7.0,,,,


# Creating the All Star Prediction Model

In [10]:
# Setting up the data sets for each awards models
all_star_data = merged_df.copy().drop(columns=['All-Defense','All-NBA','All-Rookie'])
all_star_data['All Star'].fillna('No', inplace=True)
y = all_star_data['All Star']
X = all_star_data.drop(columns=['All Star'])

# Splitting into training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)

# Scaling the x data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [11]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=2))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [15]:
# Define the second model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = 20
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5
hidden_nodes_layer3 = 3

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 8)                 168       
                                                                 
 dense_5 (Dense)             (None, 5)                 45        
                                                                 
 dense_6 (Dense)             (None, 5)                 30        
                                                                 
 dense_7 (Dense)             (None, 1)                 6         
                                                                 
Total params: 249
Trainable params: 249
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [17]:
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100


2024-03-24 16:36:38.371517: W tensorflow/core/framework/op_kernel.cc:1757] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported


UnimplementedError: Graph execution error:

Detected at node 'binary_crossentropy/Cast' defined at (most recent call last):
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/traitlets/config/application.py", line 992, in launch_instance
      app.start()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 736, in start
      self.io_loop.start()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
      handle._run()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell
      await result
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 546, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell
      result = self._run_cell(
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell
      result = runner(coro)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/7t/vhwsvxrn0ls139l8xv43n7240000gn/T/ipykernel_13545/1205479617.py", line 1, in <module>
      fit_model = nn.fit(X_train_scaled,y_train,epochs=100)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/training.py", line 994, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/training.py", line 1052, in compute_loss
      return self.compiled_loss(
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/losses.py", line 272, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/will/anaconda3/envs/dev/lib/python3.10/site-packages/keras/losses.py", line 2151, in binary_crossentropy
      y_true = tf.cast(y_true, y_pred.dtype)
Node: 'binary_crossentropy/Cast'
Cast string to float is not supported
	 [[{{node binary_crossentropy/Cast}}]] [Op:__inference_train_function_1074]

# Setting up All-NBA Prediction Model

In [16]:
# Setting up the data sets for each awards models
all_nba_data = merged_df.copy().drop(columns=['All-Defense','All Star','All-Rookie'])
all_nba_data['All-NBA'].fillna('No', inplace=True)
y = all_nba_data['All-NBA']
X = all_nba_data.drop(columns=['All-NBA'])

# Splitting into training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)

# Scaling the x data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Setting up All-Defense Prediction Model

In [17]:
# Setting up the data sets for each awards models
all_d_data = merged_df.copy().drop(columns=['All Star','All-NBA','All-Rookie'])
all_d_data['All-Defense'].fillna('No', inplace=True)
y = all_d_data['All-Defense']
X = all_d_data.drop(columns=['All-Defense'])

# Splitting into training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)

# Scaling the x data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Setting up All-Rookie Prediction Model

In [18]:
# Setting up the data sets for each awards models
all_rookie_data = merged_df.copy().drop(columns=['All-Defense','All-NBA','All Star'])
all_rookie_data['All-Rookie'].fillna('No', inplace=True)
y = all_rookie_data['All-Rookie']
X = all_rookie_data.drop(columns=['All-Rookie'])

# Splitting into training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)

# Scaling the x data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)