In [14]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.layers import Dense,Conv2D,Input,LeakyReLU,BatchNormalization
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model 
import numpy as np

# synthetic classification dataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# visualization

import matplotlib.pyplot as plt

In [64]:
# define dataset
n_features =  100
X, y = make_classification(n_samples=1000, n_features=n_features, n_informative=10, n_redundant=90, random_state=1)
# summarize the dataset
print(X.shape, y.shape)

(1000, 100) (1000,)


In [65]:
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)

In [66]:

n_inputs =  n_features
# define encoder
visible = Input(shape=(n_inputs,))
# encoder level 1
e = Dense(n_inputs*2)(visible)
e = BatchNormalization()(e)
e = LeakyReLU()(e)
# encoder level 2
e = Dense(n_inputs)(e)
e = BatchNormalization()(e)
e = LeakyReLU()(e)
# bottleneck
n_bottleneck = round(float(n_inputs) / 5.0)
bottleneck = Dense(n_bottleneck)(e)

In [67]:
# define decoder, level 1
d = Dense(n_inputs)(bottleneck)
d = BatchNormalization()(d)
d = LeakyReLU()(d)
# decoder level 2
d = Dense(n_inputs*2)(d)
d = BatchNormalization()(d)
d = LeakyReLU()(d)
# output layer
output = Dense(n_inputs, activation='linear')(d)
# define autoencoder model
model = Model(inputs=visible, outputs=output)

In [68]:
...
# compile autoencoder model
model.compile(optimizer='adam', loss='mse')

In [69]:
model.summary()

Model: "model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 100)]             0         
_________________________________________________________________
dense_24 (Dense)             (None, 200)               20200     
_________________________________________________________________
batch_normalization_16 (Batc (None, 200)               800       
_________________________________________________________________
leaky_re_lu_16 (LeakyReLU)   (None, 200)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 100)               20100     
_________________________________________________________________
batch_normalization_17 (Batc (None, 100)               400       
_________________________________________________________________
leaky_re_lu_17 (LeakyReLU)   (None, 100)               0  

In [None]:
# fit the autoencoder model to reconstruct input
history = model.fit(X_train, X_train, epochs=200, batch_size=16, verbose=2, validation_data=(X_test,X_test))

Epoch 1/200
42/42 - 1s - loss: 0.2190 - val_loss: 0.1980
Epoch 2/200
42/42 - 0s - loss: 0.0366 - val_loss: 0.1291
Epoch 3/200
42/42 - 0s - loss: 0.0250 - val_loss: 0.0694
Epoch 4/200
42/42 - 0s - loss: 0.0199 - val_loss: 0.0370
Epoch 5/200
42/42 - 0s - loss: 0.0180 - val_loss: 0.0238
Epoch 6/200
42/42 - 0s - loss: 0.0157 - val_loss: 0.0160
Epoch 7/200
42/42 - 0s - loss: 0.0144 - val_loss: 0.0122
Epoch 8/200
42/42 - 0s - loss: 0.0131 - val_loss: 0.0099
Epoch 9/200
42/42 - 0s - loss: 0.0128 - val_loss: 0.0087
Epoch 10/200
42/42 - 0s - loss: 0.0113 - val_loss: 0.0070
Epoch 11/200
42/42 - 0s - loss: 0.0107 - val_loss: 0.0069
Epoch 12/200
42/42 - 0s - loss: 0.0103 - val_loss: 0.0064
Epoch 13/200
42/42 - 0s - loss: 0.0105 - val_loss: 0.0058
Epoch 14/200
42/42 - 0s - loss: 0.0092 - val_loss: 0.0063
Epoch 15/200
42/42 - 0s - loss: 0.0090 - val_loss: 0.0059
Epoch 16/200
42/42 - 0s - loss: 0.0094 - val_loss: 0.0055
Epoch 17/200
42/42 - 0s - loss: 0.0089 - val_loss: 0.0054
Epoch 18/200
42/42 - 0s

Epoch 142/200
42/42 - 0s - loss: 0.0039 - val_loss: 0.0016
Epoch 143/200
42/42 - 0s - loss: 0.0035 - val_loss: 0.0017
Epoch 144/200
42/42 - 0s - loss: 0.0036 - val_loss: 0.0021
Epoch 145/200
42/42 - 0s - loss: 0.0037 - val_loss: 0.0018
Epoch 146/200
42/42 - 0s - loss: 0.0036 - val_loss: 0.0013
Epoch 147/200
42/42 - 0s - loss: 0.0035 - val_loss: 0.0016
Epoch 148/200
42/42 - 0s - loss: 0.0036 - val_loss: 0.0021
Epoch 149/200
42/42 - 0s - loss: 0.0033 - val_loss: 0.0011
Epoch 150/200
42/42 - 0s - loss: 0.0037 - val_loss: 0.0016
Epoch 151/200
42/42 - 0s - loss: 0.0034 - val_loss: 0.0013
Epoch 152/200
42/42 - 0s - loss: 0.0039 - val_loss: 0.0016
Epoch 153/200
42/42 - 0s - loss: 0.0035 - val_loss: 0.0015
Epoch 154/200
42/42 - 0s - loss: 0.0033 - val_loss: 0.0013
Epoch 155/200
42/42 - 0s - loss: 0.0034 - val_loss: 0.0018
Epoch 156/200
42/42 - 0s - loss: 0.0035 - val_loss: 0.0016
Epoch 157/200
42/42 - 0s - loss: 0.0035 - val_loss: 0.0013
Epoch 158/200
42/42 - 0s - loss: 0.0037 - val_loss: 9.90

In [None]:
# plot loss
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
# define an encoder model (without the decoder)
encoder = Model(inputs=visible, outputs=bottleneck)
# plot_model(encoder, 'encoder_no_compress.png', show_shapes=True)
# save the encoder to file
encoder.save('models/encoder.h5')

In [None]:
# baseline in performance with logistic regression model
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# define dataset
X, y = make_classification(n_samples=1000, n_features=n_features, n_informative=10, n_redundant=90, random_state=1)
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
# define model
model = LogisticRegression()
# fit model on training set
model.fit(X_train, y_train)
# make prediction on test set
yhat = model.predict(X_test)
# calculate accuracy
acc = accuracy_score(y_test, yhat)
print(acc)

In [None]:
# load the model from file
encoder = load_model('models/encoder.h5')

In [None]:
# encode the train data
X_train_encode = encoder.predict(X_train)
# encode the test data
X_test_encode = encoder.predict(X_test)

In [None]:
# define the model
model = LogisticRegression()
# fit the model on the training set
model.fit(X_train_encode, y_train)
# make predictions on the test set
yhat = model.predict(X_test_encode)
# calculate classification accuracy
acc = accuracy_score(y_test, yhat)
print(acc)