# Initial Data Setup

In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import tensorflow as tf
import keras_tuner as kt

In [None]:
# Files to Import
all_nba_file = Path('Resources/End of Season Teams.csv')
all_star_file = Path('Resources/All-Star Selections.csv')
stats_file = Path('Resources/Player Per Game.csv')

# Set up DataFrames
all_nba_df = pd.read_csv(all_nba_file)
all_star_df = pd.read_csv(all_star_file)
stats_df = pd.read_csv(stats_file)

In [None]:
# Review DataFrames
all_nba_df.head()

In [None]:
all_star_df.head()

In [None]:
stats_df.head()

# Data Cleaning

In [None]:
# Dropping unneeded columns
all_nba_clean = all_nba_df.drop(columns=['lg','number_tm','position','seas_id','player_id','birth_year','tm','age'])

# Getting dummy columns and combining the players seasons so that their multiple awards are shown
all_nba_pivot = pd.pivot_table(all_nba_clean, index=['season','player'], columns='type', aggfunc=lambda x:'Yes').fillna('No')

# Dropping the old awards for defunct leagues
all_nba_pivot_clean = all_nba_pivot.drop(columns=['All-ABA','All-BAA'])
all_nba_pivot_clean.tail(25)

In [None]:
# Dropping unneeded columns
all_star_clean = all_star_df.drop(columns=['team','lg','replaced'])
all_star_clean['All Star'] = 'Yes'
all_star_clean.head()

In [None]:
# Merging
first_merged_df = pd.merge(stats_df, all_nba_pivot_clean, on=['season','player'], how='left')
merged_df = pd.merge(first_merged_df, all_star_clean, on=['season','player'], how='left')

merged_df = merged_df[merged_df['lg'] == 'NBA']
merged_df.drop(columns=['season','player','pos','seas_id','player_id','birth_year','lg','tm','fg_per_game','fga_per_game','x3p_per_game','x2p_per_game',
'x2pa_per_game','x2p_percent','ft_per_game'], inplace=True)
merged_df.head()

# Creating the All Star Prediction Model

In [88]:
# Setting up the data sets for each awards models
all_star_data = merged_df.copy().drop(columns=['All-Defense','All-NBA','All-Rookie'])
all_star_data['All Star'].fillna('No', inplace=True)
y = all_star_data['All Star']
X = all_star_data.drop(columns=['All Star'])

# Splitting into training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)

# Scaling the x data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=2))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model