In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
import pandas as pd

In [None]:
df = pd.read_csv("../Resources/cleaned_drug_data.csv")
df.head()

In [None]:
df = df.drop(columns= ['VSA', 'Nicotine','Mushrooms','Meth','LSD','Legalh','Ketamine','Heroin','Ecstasy','Crack','Coke','Choc','Cannabis','Caff','illegal_score_sum','legal_score_sum','legal_use','Amyl','Amphet','Alcohol', 'ID','Benzos'])
df

In [None]:
numerical_data = pd.get_dummies(df)
numerical_data

In [None]:
X = numerical_data.drop(columns=["illegal_use"])
Y = numerical_data["illegal_use"]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=1)

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler() model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)

In [None]:
# Transform the training and testing data by using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    
    # Instantiate a Sequential model
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers.
    activation = hp.Choice('activation', ['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide the number of neurons in first layer and also
    # the activation function. 
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=150,
        step=2), activation=activation, input_dim=38))

    # Allow kerastuner to decide the number of hidden layers and number of 
    # neurons in each one
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    # Define the output layer
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model  

In [None]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [None]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled, Y_train,epochs=50, validation_data=(X_test_scaled,Y_test))

In [None]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
nn_model = tuner.hypermodel.build(best_hyper)
history = nn_model.fit(X_train_scaled, Y_train, epochs=50, validation_data=(X_test_scaled,Y_test))

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [None]:
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1,51)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='Test loss')
plt.title('Training and Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.savefig("../images/loss_plot.png", dpi=300, bbox_inches='tight',facecolor = "w",edgecolor="w")
plt.show()

In [None]:
loss_train = history.history['accuracy']
loss_val = history.history['val_accuracy']
epochs = range(1,51)
plt.plot(epochs, loss_train, 'g', label='Training accuracy')
plt.plot(epochs, loss_val, 'b', label='Test accuracy')
plt.title('Training and Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.savefig("../images/accuracy_plot.png", dpi=300, bbox_inches='tight',facecolor = "w",edgecolor="w")
plt.show()

In [None]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,Y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Summarize the best model
best_model.summary()