In [159]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import GridSearchCV
import joblib

## Preprocess the Data
* Separate the data into training and testing data.
* Use MinMaxScaler to scale the numerical data.

In [2]:
# Read the training data
training_df = pd.read_csv("Data/Cleaned_Data.csv") 
training_df.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_time0bk,koi_duration,ra,dec,koi_disposition
0,0,0,0,0,170.53875,2.9575,291.93423,48.141651,CONFIRMED
1,0,0,0,0,162.51384,4.507,291.93423,48.141651,CONFIRMED
2,0,1,0,0,175.850252,1.7822,297.00482,48.134129,FALSE POSITIVE
3,0,1,0,0,170.307565,2.40641,285.53461,48.28521,FALSE POSITIVE
4,0,0,0,0,171.59555,1.6545,288.75488,48.2262,CONFIRMED


In [3]:
target_feature = "koi_disposition"

In [4]:
# Split the data
X = training_df.drop(columns=[target_feature])
y = training_df[[target_feature]].values.ravel()
print(X.shape, y.shape)

(9564, 8) (9564,)


In [5]:
# Create a Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [6]:
# Scale the training data to better train
def scale_data(X, X_train, X_test):
    X_scaler = MinMaxScaler().fit(X)
    # apply the scale to training and testing data sets
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    #
    return X_train_scaled, X_test_scaled, X_scaler

X_train, X_test, X_scaler = scale_data(X, X_train, X_test)

In [7]:
# Label Encode the target feature from human words to machine id's
def label_data(y, y_train, y_test):
    label_encoder = LabelEncoder()
    label_encoder.fit(y)
    # create the label encoded values
    y_train_encoded = label_encoder.transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)
    # create the labeled data
    y_train_labeled = to_categorical(y_train_encoded)
    y_test_labeled = to_categorical(y_test_encoded)
    #
    return y_train_labeled, y_test_labeled, label_encoder
    
y_train, y_test, y_label_encoder = label_data(y, y_train, y_test)

## Tune Model Parameters

In [8]:
print(X_train.shape, y_train.shape)

(7173, 8) (7173, 3)


In [149]:
# Create the model and the layers
model = Sequential()
model.add(Dense(units=50, activation='relu', input_dim=X_train.shape[1]))

model.add(Dense(units=100, activation='selu'))
model.add(Dense(units=200, activation='elu'))
model.add(Dense(units=100, activation='selu'))
model.add(Dense(units=y_train.shape[1], activation='softmax'))

In [150]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_63 (Dense)             (None, 50)                450       
_________________________________________________________________
dense_64 (Dense)             (None, 100)               5100      
_________________________________________________________________
dense_65 (Dense)             (None, 200)               20200     
_________________________________________________________________
dense_66 (Dense)             (None, 100)               20100     
_________________________________________________________________
dense_67 (Dense)             (None, 3)                 303       
Total params: 46,153
Trainable params: 46,153
Non-trainable params: 0
_________________________________________________________________


In [155]:
# Fit the model to the training data!
model.fit(
    x=X_train,
    y=y_train,
    batch_size=None,
    epochs=500,
    verbose=2,
    callbacks=None,
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_freq=0.001,
    max_queue_size=10000,
    workers=4,
    use_multiprocessing=False,
)

Train on 7173 samples
Epoch 1/500
7173/7173 - 0s - loss: 0.3815 - accuracy: 0.8045
Epoch 2/500
7173/7173 - 0s - loss: 0.3801 - accuracy: 0.8100
Epoch 3/500
7173/7173 - 0s - loss: 0.3779 - accuracy: 0.8108
Epoch 4/500
7173/7173 - 0s - loss: 0.3801 - accuracy: 0.8124
Epoch 5/500
7173/7173 - 0s - loss: 0.3779 - accuracy: 0.8118
Epoch 6/500
7173/7173 - 0s - loss: 0.3788 - accuracy: 0.8094
Epoch 7/500
7173/7173 - 0s - loss: 0.3784 - accuracy: 0.8110
Epoch 8/500
7173/7173 - 0s - loss: 0.3761 - accuracy: 0.8153
Epoch 9/500
7173/7173 - 0s - loss: 0.3794 - accuracy: 0.8087
Epoch 10/500
7173/7173 - 0s - loss: 0.3798 - accuracy: 0.8075
Epoch 11/500
7173/7173 - 0s - loss: 0.3778 - accuracy: 0.8142
Epoch 12/500
7173/7173 - 0s - loss: 0.3781 - accuracy: 0.8090
Epoch 13/500
7173/7173 - 0s - loss: 0.3758 - accuracy: 0.8062
Epoch 14/500
7173/7173 - 0s - loss: 0.3771 - accuracy: 0.8087
Epoch 15/500
7173/7173 - 0s - loss: 0.3793 - accuracy: 0.8101
Epoch 16/500
7173/7173 - 0s - loss: 0.3766 - accuracy: 0.

Epoch 133/500
7173/7173 - 0s - loss: 0.3596 - accuracy: 0.8171
Epoch 134/500
7173/7173 - 0s - loss: 0.3600 - accuracy: 0.8143
Epoch 135/500
7173/7173 - 0s - loss: 0.3605 - accuracy: 0.8196
Epoch 136/500
7173/7173 - 0s - loss: 0.3611 - accuracy: 0.8185
Epoch 137/500
7173/7173 - 0s - loss: 0.3592 - accuracy: 0.8204
Epoch 138/500
7173/7173 - 0s - loss: 0.3576 - accuracy: 0.8168
Epoch 139/500
7173/7173 - 0s - loss: 0.3591 - accuracy: 0.8199
Epoch 140/500
7173/7173 - 0s - loss: 0.3555 - accuracy: 0.8200
Epoch 141/500
7173/7173 - 0s - loss: 0.3575 - accuracy: 0.8183
Epoch 142/500
7173/7173 - 0s - loss: 0.3633 - accuracy: 0.8126
Epoch 143/500
7173/7173 - 0s - loss: 0.3578 - accuracy: 0.8203
Epoch 144/500
7173/7173 - 0s - loss: 0.3583 - accuracy: 0.8174
Epoch 145/500
7173/7173 - 0s - loss: 0.3572 - accuracy: 0.8183
Epoch 146/500
7173/7173 - 0s - loss: 0.3583 - accuracy: 0.8221
Epoch 147/500
7173/7173 - 0s - loss: 0.3569 - accuracy: 0.8196
Epoch 148/500
7173/7173 - 0s - loss: 0.3572 - accuracy:

7173/7173 - 0s - loss: 0.3403 - accuracy: 0.8288
Epoch 264/500
7173/7173 - 0s - loss: 0.3393 - accuracy: 0.8259
Epoch 265/500
7173/7173 - 0s - loss: 0.3425 - accuracy: 0.8243
Epoch 266/500
7173/7173 - 0s - loss: 0.3410 - accuracy: 0.8267
Epoch 267/500
7173/7173 - 0s - loss: 0.3387 - accuracy: 0.8257
Epoch 268/500
7173/7173 - 0s - loss: 0.3406 - accuracy: 0.8294
Epoch 269/500
7173/7173 - 0s - loss: 0.3416 - accuracy: 0.8248
Epoch 270/500
7173/7173 - 0s - loss: 0.3418 - accuracy: 0.8253
Epoch 271/500
7173/7173 - 0s - loss: 0.3401 - accuracy: 0.8263
Epoch 272/500
7173/7173 - 0s - loss: 0.3392 - accuracy: 0.8248
Epoch 273/500
7173/7173 - 0s - loss: 0.3394 - accuracy: 0.8282
Epoch 274/500
7173/7173 - 0s - loss: 0.3418 - accuracy: 0.8253
Epoch 275/500
7173/7173 - 0s - loss: 0.3427 - accuracy: 0.8312
Epoch 276/500
7173/7173 - 0s - loss: 0.3407 - accuracy: 0.8269
Epoch 277/500
7173/7173 - 0s - loss: 0.3384 - accuracy: 0.8239
Epoch 278/500
7173/7173 - 0s - loss: 0.3388 - accuracy: 0.8269
Epoch 

Epoch 394/500
7173/7173 - 0s - loss: 0.3206 - accuracy: 0.8365
Epoch 395/500
7173/7173 - 0s - loss: 0.3195 - accuracy: 0.8308
Epoch 396/500
7173/7173 - 0s - loss: 0.3215 - accuracy: 0.8309
Epoch 397/500
7173/7173 - 0s - loss: 0.3191 - accuracy: 0.8363
Epoch 398/500
7173/7173 - 0s - loss: 0.3233 - accuracy: 0.8347
Epoch 399/500
7173/7173 - 0s - loss: 0.3206 - accuracy: 0.8337
Epoch 400/500
7173/7173 - 0s - loss: 0.3237 - accuracy: 0.8308
Epoch 401/500
7173/7173 - 0s - loss: 0.3211 - accuracy: 0.8362
Epoch 402/500
7173/7173 - 0s - loss: 0.3197 - accuracy: 0.8374
Epoch 403/500
7173/7173 - 0s - loss: 0.3172 - accuracy: 0.8355
Epoch 404/500
7173/7173 - 0s - loss: 0.3166 - accuracy: 0.8335
Epoch 405/500
7173/7173 - 0s - loss: 0.3303 - accuracy: 0.8264
Epoch 406/500
7173/7173 - 0s - loss: 0.3193 - accuracy: 0.8373
Epoch 407/500
7173/7173 - 0s - loss: 0.3249 - accuracy: 0.8379
Epoch 408/500
7173/7173 - 0s - loss: 0.3202 - accuracy: 0.8354
Epoch 409/500
7173/7173 - 0s - loss: 0.3186 - accuracy:

<tensorflow.python.keras.callbacks.History at 0x2157c42a7b8>

In [156]:
# Print the models loss and accuracy scores
model_loss, model_accuracy = model.evaluate(
    X_test, 
    y_test, 
    verbose=2
)

2391/2391 - 0s - loss: 0.5781 - accuracy: 0.7892
