In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

## Preprocess the Data
* Separate the data into training and testing data.
* Use MinMaxScaler to scale the numerical data.

In [4]:
# Read the training data
training_df = pd.read_csv("Data/Cleaned_Data.csv") 
training_df.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_time0bk,koi_duration,ra,dec,koi_disposition
0,0,0,0,0,170.53875,2.9575,291.93423,48.141651,CONFIRMED
1,0,0,0,0,162.51384,4.507,291.93423,48.141651,CONFIRMED
2,0,1,0,0,175.850252,1.7822,297.00482,48.134129,FALSE POSITIVE
3,0,1,0,0,170.307565,2.40641,285.53461,48.28521,FALSE POSITIVE
4,0,0,0,0,171.59555,1.6545,288.75488,48.2262,CONFIRMED


In [5]:
target_feature = "koi_disposition"

In [6]:
# Split the data
X = training_df.drop(columns=[target_feature])
y = training_df[[target_feature]].values.ravel()
print(X.shape, y.shape)

(9564, 8) (9564,)


In [7]:
# Create a Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [8]:
# Scale the training data to better train
def scale_data(X, X_train, X_test):
    X_scaler = MinMaxScaler().fit(X)
    # apply the scale to training and testing data sets
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    #
    return X_train_scaled, X_test_scaled, X_scaler

X_train, X_test, X_scaler = scale_data(X, X_train, X_test)

In [9]:
# Label Encode the target feature from human words to machine id's
def label_data(y, y_train, y_test):
    label_encoder = LabelEncoder()
    label_encoder.fit(y)
    # create the label encoded values
    y_train_encoded = label_encoder.transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)
    # create the labeled data
    y_train_labeled = to_categorical(y_train_encoded)
    y_test_labeled = to_categorical(y_test_encoded)
    #
    return y_train_labeled, y_test_labeled, label_encoder
    
y_train, y_test, y_label_encoder = label_data(y, y_train, y_test)

## Tune Model Parameters

In [10]:
print(X_train.shape, y_train.shape)

(7173, 8) (7173, 3)


In [11]:
# Create the model and the layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=y_train.shape[1], activation='softmax'))

In [12]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               900       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 303       
Total params: 11,303
Trainable params: 11,303
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Fit the model to the training data!
model.fit(
    X_train,
    y_train,
    epochs=500,
    shuffle=True,
    verbose=2
)

Train on 7173 samples
Epoch 1/500
7173/7173 - 1s - loss: 0.5219 - accuracy: 0.7198
Epoch 2/500
7173/7173 - 0s - loss: 0.4223 - accuracy: 0.7700
Epoch 3/500
7173/7173 - 0s - loss: 0.4184 - accuracy: 0.7753
Epoch 4/500
7173/7173 - 0s - loss: 0.4173 - accuracy: 0.7740
Epoch 5/500
7173/7173 - 0s - loss: 0.4156 - accuracy: 0.7769
Epoch 6/500
7173/7173 - 0s - loss: 0.4138 - accuracy: 0.7779
Epoch 7/500
7173/7173 - 0s - loss: 0.4141 - accuracy: 0.7739
Epoch 8/500
7173/7173 - 0s - loss: 0.4120 - accuracy: 0.7836
Epoch 9/500
7173/7173 - 0s - loss: 0.4116 - accuracy: 0.7817
Epoch 10/500
7173/7173 - 0s - loss: 0.4116 - accuracy: 0.7839
Epoch 11/500
7173/7173 - 0s - loss: 0.4111 - accuracy: 0.7828
Epoch 12/500
7173/7173 - 0s - loss: 0.4100 - accuracy: 0.7811
Epoch 13/500
7173/7173 - 0s - loss: 0.4105 - accuracy: 0.7808
Epoch 14/500
7173/7173 - 0s - loss: 0.4104 - accuracy: 0.7827
Epoch 15/500
7173/7173 - 0s - loss: 0.4108 - accuracy: 0.7849
Epoch 16/500
7173/7173 - 0s - loss: 0.4103 - accuracy: 0.

Epoch 133/500
7173/7173 - 0s - loss: 0.3823 - accuracy: 0.8094
Epoch 134/500
7173/7173 - 0s - loss: 0.3814 - accuracy: 0.8087
Epoch 135/500
7173/7173 - 0s - loss: 0.3812 - accuracy: 0.8087
Epoch 136/500
7173/7173 - 0s - loss: 0.3805 - accuracy: 0.8090
Epoch 137/500
7173/7173 - 0s - loss: 0.3804 - accuracy: 0.8114
Epoch 138/500
7173/7173 - 0s - loss: 0.3789 - accuracy: 0.8107
Epoch 139/500
7173/7173 - 0s - loss: 0.3800 - accuracy: 0.8128
Epoch 140/500
7173/7173 - 0s - loss: 0.3795 - accuracy: 0.8093
Epoch 141/500
7173/7173 - 0s - loss: 0.3793 - accuracy: 0.8107
Epoch 142/500
7173/7173 - 0s - loss: 0.3810 - accuracy: 0.8086
Epoch 143/500
7173/7173 - 0s - loss: 0.3787 - accuracy: 0.8105
Epoch 144/500
7173/7173 - 0s - loss: 0.3803 - accuracy: 0.8079
Epoch 145/500
7173/7173 - 0s - loss: 0.3783 - accuracy: 0.8090
Epoch 146/500
7173/7173 - 0s - loss: 0.3792 - accuracy: 0.8089
Epoch 147/500
7173/7173 - 0s - loss: 0.3783 - accuracy: 0.8115
Epoch 148/500
7173/7173 - 0s - loss: 0.3786 - accuracy:

7173/7173 - 0s - loss: 0.3579 - accuracy: 0.8217
Epoch 264/500
7173/7173 - 0s - loss: 0.3582 - accuracy: 0.8192
Epoch 265/500
7173/7173 - 0s - loss: 0.3579 - accuracy: 0.8209
Epoch 266/500
7173/7173 - 0s - loss: 0.3585 - accuracy: 0.8206
Epoch 267/500
7173/7173 - 0s - loss: 0.3576 - accuracy: 0.8217
Epoch 268/500
7173/7173 - 0s - loss: 0.3584 - accuracy: 0.8188
Epoch 269/500
7173/7173 - 0s - loss: 0.3580 - accuracy: 0.8243
Epoch 270/500
7173/7173 - 0s - loss: 0.3589 - accuracy: 0.8218
Epoch 271/500
7173/7173 - 0s - loss: 0.3574 - accuracy: 0.8190
Epoch 272/500
7173/7173 - 0s - loss: 0.3559 - accuracy: 0.8259
Epoch 273/500
7173/7173 - 0s - loss: 0.3558 - accuracy: 0.8235
Epoch 274/500
7173/7173 - 0s - loss: 0.3554 - accuracy: 0.8232
Epoch 275/500
7173/7173 - 0s - loss: 0.3560 - accuracy: 0.8211
Epoch 276/500
7173/7173 - 0s - loss: 0.3554 - accuracy: 0.8235
Epoch 277/500
7173/7173 - 0s - loss: 0.3559 - accuracy: 0.8256
Epoch 278/500
7173/7173 - 0s - loss: 0.3554 - accuracy: 0.8234
Epoch 

Epoch 394/500
7173/7173 - 0s - loss: 0.3377 - accuracy: 0.8285
Epoch 395/500
7173/7173 - 0s - loss: 0.3366 - accuracy: 0.8299
Epoch 396/500
7173/7173 - 0s - loss: 0.3358 - accuracy: 0.8328
Epoch 397/500
7173/7173 - 0s - loss: 0.3370 - accuracy: 0.8295
Epoch 398/500
7173/7173 - 0s - loss: 0.3365 - accuracy: 0.8287
Epoch 399/500
7173/7173 - 0s - loss: 0.3344 - accuracy: 0.8356
Epoch 400/500
7173/7173 - 0s - loss: 0.3355 - accuracy: 0.8302
Epoch 401/500
7173/7173 - 0s - loss: 0.3357 - accuracy: 0.8331
Epoch 402/500
7173/7173 - 0s - loss: 0.3346 - accuracy: 0.8315
Epoch 403/500
7173/7173 - 0s - loss: 0.3371 - accuracy: 0.8303
Epoch 404/500
7173/7173 - 0s - loss: 0.3338 - accuracy: 0.8319
Epoch 405/500
7173/7173 - 0s - loss: 0.3353 - accuracy: 0.8305
Epoch 406/500
7173/7173 - 0s - loss: 0.3345 - accuracy: 0.8340
Epoch 407/500
7173/7173 - 0s - loss: 0.3349 - accuracy: 0.8347
Epoch 408/500
7173/7173 - 0s - loss: 0.3347 - accuracy: 0.8327
Epoch 409/500
7173/7173 - 0s - loss: 0.3331 - accuracy:

<tensorflow.python.keras.callbacks.History at 0x1cf0b51b978>

In [14]:
# Print the models loss and accuracy scores
model_loss, model_accuracy = model.evaluate(
    X_test, 
    y_test, 
    verbose=2
)

2391/2391 - 0s - loss: 0.4566 - accuracy: 0.7834
