In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import GridSearchCV
import pickle

Using TensorFlow backend.


## Preprocess the Data
* Separate the data into training and testing data.
* Use MinMaxScaler to scale the numerical data.

In [2]:
# Read the training data
training_df = pd.read_csv("Data/Cleaned_Data.csv") 
training_df.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_steff,koi_slogg,koi_srad,ra,dec,koi_disposition
0,0,0,0,0,9.488036,170.53875,0.146,2.9575,615.8,2.26,793.0,93.59,35.8,5455.0,4.467,0.927,291.93423,48.141651,CONFIRMED
1,0,0,0,0,54.418383,162.51384,0.586,4.507,874.8,2.83,443.0,9.11,25.8,5455.0,4.467,0.927,291.93423,48.141651,CONFIRMED
2,0,1,0,0,19.89914,175.850252,0.969,1.7822,10829.0,14.6,638.0,39.3,76.3,5853.0,4.544,0.868,297.00482,48.134129,FALSE POSITIVE
3,0,1,0,0,1.736952,170.307565,1.276,2.40641,8079.2,33.46,1395.0,891.96,505.6,5805.0,4.564,0.791,285.53461,48.28521,FALSE POSITIVE
4,0,0,0,0,2.525592,171.59555,0.701,1.6545,603.3,2.75,1406.0,926.16,40.9,6031.0,4.438,1.046,288.75488,48.2262,CONFIRMED


In [3]:
target_feature = "koi_disposition"

In [4]:
# Split the data
X = training_df.drop(columns=[target_feature])
y = training_df[[target_feature]].values.ravel()
print(X.shape, y.shape)

(9201, 18) (9201,)


In [5]:
# Create a Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [6]:
# Scale the training data to better train
def scale_data(X, X_train, X_test):
    X_scaler = MinMaxScaler().fit(X)
    # apply the scale to training and testing data sets
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    #
    return X_train_scaled, X_test_scaled, X_scaler

X_train, X_test, X_scaler = scale_data(X, X_train, X_test)

In [7]:
# Label Encode the target feature from human words to machine id's
def label_data(y, y_train, y_test):
    label_encoder = LabelEncoder()
    label_encoder.fit(y)
    # create the label encoded values
    y_train_encoded = label_encoder.transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)
    # create the labeled data
    y_train_labeled = to_categorical(y_train_encoded)
    y_test_labeled = to_categorical(y_test_encoded)
    #
    return y_train_labeled, y_test_labeled, label_encoder
    
y_train, y_test, y_label_encoder = label_data(y, y_train, y_test)

## Tune Model Parameters

In [8]:
print(X_train.shape, y_train.shape)

(6900, 18) (6900, 3)


In [9]:
# Create the model and the layers
model = Sequential()
model.add(Dense(units=50, activation="relu", input_dim=X_train.shape[1]))
model.add(Dense(units=100, activation="selu"))
model.add(Dense(units=200, activation="elu"))
model.add(Dense(units=100, activation="selu"))
model.add(Dense(units=y_train.shape[1], activation="softmax"))

In [10]:
# Compile the model
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                950       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_2 (Dense)              (None, 200)               20200     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               20100     
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 303       
Total params: 46,653
Trainable params: 46,653
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Fit the model to the training data!
model.fit(
    x=X_train,
    y=y_train,
    batch_size=None,
    epochs=500,
    verbose=2,
    callbacks=None,
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_freq=0.001,
    max_queue_size=1000,
    workers=4,
    use_multiprocessing=False,
)

Train on 6900 samples
Epoch 1/500
6900/6900 - 1s - loss: 0.4438 - accuracy: 0.7558
Epoch 2/500
6900/6900 - 0s - loss: 0.4048 - accuracy: 0.7790
Epoch 3/500
6900/6900 - 0s - loss: 0.4034 - accuracy: 0.7836
Epoch 4/500
6900/6900 - 0s - loss: 0.4050 - accuracy: 0.7739
Epoch 5/500
6900/6900 - 0s - loss: 0.3974 - accuracy: 0.7868
Epoch 6/500
6900/6900 - 0s - loss: 0.3961 - accuracy: 0.7836
Epoch 7/500
6900/6900 - 0s - loss: 0.3925 - accuracy: 0.7958
Epoch 8/500
6900/6900 - 0s - loss: 0.3915 - accuracy: 0.7943
Epoch 9/500
6900/6900 - 0s - loss: 0.3948 - accuracy: 0.7835
Epoch 10/500
6900/6900 - 0s - loss: 0.3891 - accuracy: 0.7952
Epoch 11/500
6900/6900 - 0s - loss: 0.3886 - accuracy: 0.7962
Epoch 12/500
6900/6900 - 0s - loss: 0.3867 - accuracy: 0.7965
Epoch 13/500
6900/6900 - 0s - loss: 0.3859 - accuracy: 0.7986
Epoch 14/500
6900/6900 - 0s - loss: 0.3847 - accuracy: 0.8003
Epoch 15/500
6900/6900 - 0s - loss: 0.3825 - accuracy: 0.8001
Epoch 16/500
6900/6900 - 0s - loss: 0.3862 - accuracy: 0.

Epoch 133/500
6900/6900 - 0s - loss: 0.2466 - accuracy: 0.8954
Epoch 134/500
6900/6900 - 0s - loss: 0.2510 - accuracy: 0.8886
Epoch 135/500
6900/6900 - 0s - loss: 0.2423 - accuracy: 0.8975
Epoch 136/500
6900/6900 - 0s - loss: 0.2474 - accuracy: 0.8904
Epoch 137/500
6900/6900 - 0s - loss: 0.2445 - accuracy: 0.8958
Epoch 138/500
6900/6900 - 0s - loss: 0.2499 - accuracy: 0.8920
Epoch 139/500
6900/6900 - 0s - loss: 0.2433 - accuracy: 0.8948
Epoch 140/500
6900/6900 - 0s - loss: 0.2426 - accuracy: 0.8955
Epoch 141/500
6900/6900 - 0s - loss: 0.2415 - accuracy: 0.8928
Epoch 142/500
6900/6900 - 0s - loss: 0.2445 - accuracy: 0.8925
Epoch 143/500
6900/6900 - 0s - loss: 0.2505 - accuracy: 0.8886
Epoch 144/500
6900/6900 - 0s - loss: 0.2405 - accuracy: 0.8957
Epoch 145/500
6900/6900 - 0s - loss: 0.2457 - accuracy: 0.8923
Epoch 146/500
6900/6900 - 0s - loss: 0.2388 - accuracy: 0.8971
Epoch 147/500
6900/6900 - 0s - loss: 0.2443 - accuracy: 0.8901
Epoch 148/500
6900/6900 - 0s - loss: 0.2433 - accuracy:

6900/6900 - 0s - loss: 0.2043 - accuracy: 0.9086
Epoch 264/500
6900/6900 - 0s - loss: 0.2027 - accuracy: 0.9125
Epoch 265/500
6900/6900 - 0s - loss: 0.2031 - accuracy: 0.9135
Epoch 266/500
6900/6900 - 0s - loss: 0.2114 - accuracy: 0.9052
Epoch 267/500
6900/6900 - 0s - loss: 0.2236 - accuracy: 0.9043
Epoch 268/500
6900/6900 - 0s - loss: 0.2050 - accuracy: 0.9119
Epoch 269/500
6900/6900 - 0s - loss: 0.2070 - accuracy: 0.9080
Epoch 270/500
6900/6900 - 0s - loss: 0.2020 - accuracy: 0.9120
Epoch 271/500
6900/6900 - 0s - loss: 0.2021 - accuracy: 0.9083
Epoch 272/500
6900/6900 - 0s - loss: 0.2094 - accuracy: 0.9075
Epoch 273/500
6900/6900 - 0s - loss: 0.2057 - accuracy: 0.9113
Epoch 274/500
6900/6900 - 0s - loss: 0.2034 - accuracy: 0.9135
Epoch 275/500
6900/6900 - 0s - loss: 0.2181 - accuracy: 0.9077
Epoch 276/500
6900/6900 - 0s - loss: 0.2132 - accuracy: 0.9075
Epoch 277/500
6900/6900 - 0s - loss: 0.2074 - accuracy: 0.9094
Epoch 278/500
6900/6900 - 0s - loss: 0.2066 - accuracy: 0.9087
Epoch 

Epoch 394/500
6900/6900 - 0s - loss: 0.1704 - accuracy: 0.9251
Epoch 395/500
6900/6900 - 0s - loss: 0.1826 - accuracy: 0.9213
Epoch 396/500
6900/6900 - 0s - loss: 0.1891 - accuracy: 0.9187
Epoch 397/500
6900/6900 - 0s - loss: 0.1808 - accuracy: 0.9191
Epoch 398/500
6900/6900 - 0s - loss: 0.1693 - accuracy: 0.9264
Epoch 399/500
6900/6900 - 0s - loss: 0.1777 - accuracy: 0.9241
Epoch 400/500
6900/6900 - 0s - loss: 0.1715 - accuracy: 0.9230
Epoch 401/500
6900/6900 - 0s - loss: 0.1699 - accuracy: 0.9270
Epoch 402/500
6900/6900 - 0s - loss: 0.1720 - accuracy: 0.9246
Epoch 403/500
6900/6900 - 0s - loss: 0.1665 - accuracy: 0.9287
Epoch 404/500
6900/6900 - 0s - loss: 0.1771 - accuracy: 0.9251
Epoch 405/500
6900/6900 - 0s - loss: 0.1892 - accuracy: 0.9170
Epoch 406/500
6900/6900 - 0s - loss: 0.1796 - accuracy: 0.9203
Epoch 407/500
6900/6900 - 0s - loss: 0.1790 - accuracy: 0.9235
Epoch 408/500
6900/6900 - 0s - loss: 0.1677 - accuracy: 0.9287
Epoch 409/500
6900/6900 - 0s - loss: 0.1797 - accuracy:

<tensorflow.python.keras.callbacks.History at 0x2146ec72cf8>

In [12]:
# Print the models loss and accuracy scores
model_loss, model_accuracy = model.evaluate(
    X_test, 
    y_test, 
    verbose=2
)

2301/2301 - 0s - loss: 0.6855 - accuracy: 0.8653


In [17]:
# save the model to disk
model.save("Deep_Learning_Model.h5")