In [14]:
import numpy as np
import pandas as pd
import matplotlib as plt
%matplotlib inline

from sklearn.model_selection import train_test_split

In [15]:
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
from keras import initializers

### Load & Preprocessing data

In [16]:
from scipy.io import arff

data = arff.loadarff('HTRU_2.arff')
df = pd.DataFrame(data[0])
X = df.drop('class', axis=1)
y = df["class"]

In [17]:
print(f"X shape: {X.shape};") 
print(f"y shape: {y.shape};")

X shape: (17898, 8);
y shape: (17898,);


In [18]:
df.describe()

Unnamed: 0,Profile_mean,Profile_stdev,Profile_skewness,Profile_kurtosis,DM_mean,DM_stdev,DM_skewness,DM_kurtosis
count,17898.0,17898.0,17898.0,17898.0,17898.0,17898.0,17898.0,17898.0
mean,111.079968,46.549532,0.477857,1.770279,12.6144,26.326515,8.303556,104.857709
std,25.652935,6.843189,1.06404,6.167913,29.472897,19.470572,4.506092,106.51454
min,5.8125,24.772042,-1.876011,-1.791886,0.213211,7.370432,-3.13927,-1.976976
25%,100.929688,42.376018,0.027098,-0.188572,1.923077,14.437332,5.781506,34.960504
50%,115.078125,46.947479,0.22324,0.19871,2.801839,18.461316,8.433515,83.064556
75%,127.085938,51.023202,0.473325,0.927783,5.464256,28.428104,10.702959,139.30933
max,192.617188,98.778911,8.069522,68.101622,223.392141,110.642211,34.539844,1191.000837


In [19]:
df.head()

Unnamed: 0,Profile_mean,Profile_stdev,Profile_skewness,Profile_kurtosis,DM_mean,DM_stdev,DM_skewness,DM_kurtosis,class
0,140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225,b'0'
1,102.507812,58.88243,0.465318,-0.515088,1.677258,14.860146,10.576487,127.39358,b'0'
2,103.015625,39.341649,0.323328,1.051164,3.121237,21.744669,7.735822,63.171909,b'0'
3,136.75,57.178449,-0.068415,-0.636238,3.642977,20.95928,6.896499,53.593661,b'0'
4,88.726562,40.672225,0.600866,1.123492,1.17893,11.46872,14.269573,252.567306,b'0'


In [20]:
df.columns

Index(['Profile_mean', 'Profile_stdev', 'Profile_skewness', 'Profile_kurtosis',
       'DM_mean', 'DM_stdev', 'DM_skewness', 'DM_kurtosis', 'class'],
      dtype='object')

### Split data on train test set

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [22]:
print(f"X_train shape: {X.shape}; y_train shape: {y.shape};")
print(f"X_test shape: {X.shape};  y_test shape: {y.shape};")

X_train shape: (17898, 8); y_train shape: (17898,);
X_test shape: (17898, 8);  y_test shape: (17898,);


### Function for creating model for specific hyperparameters

In [23]:
def build_model(var_activation='relu', var_optimizer='adam'):
    """ Uses arguments to build Keras model. """
    kernel_init = initializers.TruncatedNormal(mean=0, stddev=0.01, seed=42)
    bias_init = initializers.Constant(value=1e-3)

    model = Sequential()
    model.add(Dense(24, input_dim=8, activation=var_activation, kernel_initializer=kernel_init, bias_initializer=bias_init))
    model.add(Dense(8, activation=var_activation, kernel_initializer=kernel_init, bias_initializer=bias_init))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer=var_optimizer, metrics=['accuracy'])
    return model

### Layers summary

In [24]:
model_ = build_model()
model_.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 24)                216       
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 200       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 9         
Total params: 425
Trainable params: 425
Non-trainable params: 0
_________________________________________________________________


### List of hyperparameters

In [25]:
_activations=['sigmoid', 'tanh','relu']
_optimizers=['sgd','adam']
_batch_size=[128, 64, 32]
_epochs=[10, 20]

params=dict(var_activation=_activations,
            var_optimizer=_optimizers,
            batch_size=_batch_size,
            epochs=_epochs)
print(params)

{'var_activation': ['sigmoid', 'tanh', 'relu'], 'var_optimizer': ['sgd', 'adam'], 'batch_size': [128, 64, 32], 'epochs': [10, 20]}


### Hyperparameters tunning with GridSearchCV

In [26]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

model_search = KerasClassifier(build_fn=build_model, verbose=0)
grid = GridSearchCV(model_search, param_grid=params, cv=2, n_jobs=1)

In [27]:
%%time
grid.fit(X_train, y_train)

Wall time: 2min 24s


GridSearchCV(cv=2, error_score=nan,
             estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x00000172D973A248>,
             iid='deprecated', n_jobs=1,
             param_grid={'batch_size': [128, 64, 32], 'epochs': [10, 20],
                         'var_activation': ['sigmoid', 'tanh', 'relu'],
                         'var_optimizer': ['sgd', 'adam']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [28]:
params = grid.best_params_
res = [print(f"{key}: {val}") for key, val in params.items()]

batch_size: 128
epochs: 20
var_activation: tanh
var_optimizer: adam


### Training Neural Network on the best hyperparameters

In [29]:
model = build_model(var_activation=params["var_activation"], 
                     var_optimizer=params["var_optimizer"])
model.fit(X_train,
          y_train, 
          batch_size=params["batch_size"],
          epochs=params["epochs"])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x172eadbc4c8>

In [30]:
cost, accuracy = model.evaluate(X_test, y_test)
print("Cost: ", cost)
print('Accuracy: %.2f' % (accuracy*100))

Cost:  0.017952666598288914
Accuracy: 97.88


### Make a few predictions

In [31]:
predictions = model.predict(X_test)
rounded = [round(x[0], 3) for x in predictions]
print(rounded[0:10])

[0.011, 0.012, 0.011, 0.012, 0.012, 0.011, 0.011, 0.295, 0.985, 0.013]
