# Initial Data Setup

In [3]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# import tensorflow as tf
# import keras_tuner as kt

In [4]:
# Files to Import
all_nba_file = Path('Resources/End of Season Teams.csv')
all_star_file = Path('Resources/All-Star Selections.csv')
stats_file = Path('Resources/Player Per Game.csv')

# Set up DataFrames
all_nba_df = pd.read_csv(all_nba_file)
all_star_df = pd.read_csv(all_star_file)
stats_df = pd.read_csv(stats_file)

In [5]:
# Review DataFrames
all_nba_df.head()

Unnamed: 0,season,lg,type,number_tm,player,position,seas_id,player_id,birth_year,tm,age
0,2022,NBA,All-Defense,1st,Giannis Antetokounmpo,,29918,4164,,MIL,27
1,2022,NBA,All-Defense,1st,Jaren Jackson Jr.,,29991,4632,,MEM,22
2,2022,NBA,All-Defense,1st,Marcus Smart,,30183,4275,,BOS,27
3,2022,NBA,All-Defense,1st,Mikal Bridges,,30205,4658,,PHO,25
4,2022,NBA,All-Defense,1st,Rudy Gobert,,30313,4199,,UTA,29


In [6]:
all_star_df.head()

Unnamed: 0,player,team,lg,season,replaced
0,Bam Adebayo,East,NBA,2024,False
1,Giannis Antetokounmpo,East,NBA,2024,False
2,Paolo Banchero,East,NBA,2024,False
3,Scottie Barnes,East,NBA,2024,False
4,Jaylen Brown,East,NBA,2024,False


In [7]:
stats_df.head()

Unnamed: 0,seas_id,season,player_id,player,birth_year,pos,age,experience,lg,tm,...,ft_percent,orb_per_game,drb_per_game,trb_per_game,ast_per_game,stl_per_game,blk_per_game,tov_per_game,pf_per_game,pts_per_game
0,31136,2024,5025,A.J. Green,,SG,24.0,2,NBA,MIL,...,1.0,0.2,0.9,1.0,0.5,0.1,0.1,0.1,0.9,4.3
1,31137,2024,5026,A.J. Lawson,,SG,23.0,2,NBA,DAL,...,0.632,0.4,0.8,1.2,0.5,0.3,0.1,0.4,0.7,3.8
2,31138,2024,5027,AJ Griffin,,SF,20.0,2,NBA,ATL,...,1.0,0.1,0.7,0.8,0.2,0.1,0.1,0.3,0.3,2.1
3,31139,2024,4219,Aaron Gordon,,PF,28.0,10,NBA,DEN,...,0.652,2.4,4.1,6.5,3.2,0.9,0.7,1.5,1.9,13.9
4,31140,2024,4582,Aaron Holiday,,PG,27.0,6,NBA,HOU,...,0.889,0.3,1.4,1.7,1.8,0.5,0.1,0.8,1.6,7.0


# Data Cleaning

In [8]:
# Dropping unneeded columns
all_nba_clean = all_nba_df.drop(columns=['lg','number_tm','position','seas_id','player_id','birth_year','tm','age'])

# Getting dummy columns and combining the players seasons so that their multiple awards are shown
all_nba_pivot = pd.pivot_table(all_nba_clean, index=['season','player'], columns='type', aggfunc=lambda x:'Yes').fillna('No')

# Dropping the old awards for defunct leagues
all_nba_pivot_clean = all_nba_pivot.drop(columns=['All-ABA','All-BAA'])
all_nba_pivot_clean.tail(25)

Unnamed: 0_level_0,type,All-Defense,All-NBA,All-Rookie
season,player,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,Evan Mobley,No,No,Yes
2022,Franz Wagner,No,No,Yes
2022,Giannis Antetokounmpo,Yes,Yes,No
2022,Herbert Jones,No,No,Yes
2022,Ja Morant,No,Yes,No
2022,Jalen Green,No,No,Yes
2022,Jaren Jackson Jr.,Yes,No,No
2022,Jayson Tatum,No,Yes,No
2022,Joel Embiid,No,Yes,No
2022,Josh Giddey,No,No,Yes


In [9]:
# Dropping unneeded columns
all_star_clean = all_star_df.drop(columns=['team','lg','replaced'])
all_star_clean['All Star'] = 'Yes'
all_star_clean.head()

Unnamed: 0,player,season,All Star
0,Bam Adebayo,2024,Yes
1,Giannis Antetokounmpo,2024,Yes
2,Paolo Banchero,2024,Yes
3,Scottie Barnes,2024,Yes
4,Jaylen Brown,2024,Yes


In [10]:
# Merging
first_merged_df = pd.merge(stats_df, all_nba_pivot_clean, on=['season','player'], how='left')
merged_df = pd.merge(first_merged_df, all_star_clean, on=['season','player'], how='left')

merged_df = merged_df[merged_df['lg'] == 'NBA']
merged_df.drop(columns=['season','player','pos','seas_id','player_id','birth_year','lg','tm','fg_per_game','fga_per_game','x3p_per_game','x2p_per_game',
'x2pa_per_game','x2p_percent','ft_per_game'], inplace=True)
merged_df.head()

Unnamed: 0,age,experience,g,gs,mp_per_game,fg_percent,x3pa_per_game,x3p_percent,e_fg_percent,fta_per_game,...,ast_per_game,stl_per_game,blk_per_game,tov_per_game,pf_per_game,pts_per_game,All-Defense,All-NBA,All-Rookie,All Star
0,24.0,2,39,0.0,9.2,0.438,2.8,0.423,0.621,0.2,...,0.5,0.1,0.1,0.1,0.9,4.3,,,,
1,23.0,2,28,0.0,8.3,0.471,1.4,0.325,0.547,0.7,...,0.5,0.3,0.1,0.4,0.7,3.8,,,,
2,20.0,2,18,0.0,7.3,0.289,1.8,0.273,0.389,0.1,...,0.2,0.1,0.1,0.3,0.3,2.1,,,,
3,28.0,10,54,54.0,31.5,0.557,1.8,0.293,0.585,3.7,...,3.2,0.9,0.7,1.5,1.9,13.9,,,,
4,27.0,6,56,1.0,17.3,0.455,3.0,0.41,0.565,0.8,...,1.8,0.5,0.1,0.8,1.6,7.0,,,,


# Creating the All Star Prediction Model

In [11]:
# Setting up the data sets for each awards models
all_star_data = merged_df.copy().drop(columns=['All-Defense','All-NBA','All-Rookie'])
all_star_data['All Star'].fillna('No', inplace=True)
y = all_star_data['All Star']
X = all_star_data.drop(columns=['All Star'])

# Splitting into training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)

# Scaling the x data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [12]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=2))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [2]:
# import keras_tuner as kt

tuner = keras_tuner.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

NameError: name 'keras_tuner' is not defined