In [38]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
import matplotlib.pyplot as plt

In [39]:
#CREATING SYNTHETIC DATA:

In [40]:
normal_data = np.random.normal(0,1,(1000,10))

In [41]:
print(f"Normal samples: {len(normal_data)}")

Normal samples: 1000


In [42]:
anomaly_data = np.random.normal(5,1,(250,10))

In [43]:
print(f"Anomaly samples: {len(anomaly_data)}")

Anomaly samples: 250


In [44]:
##COMBINE ALL DATA:

In [45]:
data = np.vstack([normal_data, anomaly_data])

In [46]:
#CREATE LABELS (to tell our model which are normal and which are anomalies) for every datapoint in x_train there will exists an label in y_train

In [47]:
labels = np.hstack([np.zeros(1000), np.ones(250)])

In [48]:
print(f"  Total samples: {len(data)}")

  Total samples: 1250


In [49]:
#SPLITTING DATA:

In [50]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)

In [51]:
print(f"Training samples: {len(x_train)}")

Training samples: 875


In [52]:
print(f"Test sample: {len(x_test)}")

Test sample: 375


In [53]:
#BUILD AUTOENCODER MODEL:

In [54]:
input_dim = x_train.shape[1]

In [55]:
encoding_dim = 3

In [60]:
print(f"Input dimension: {input_dim}")

Input dimension: 10


In [61]:
print(f"Bottleneck dimension: {encoding_dim}")

Bottleneck dimension: 3


In [64]:
## INPUT LAYER:

In [65]:
input_layer = Input(shape=(input_dim,))

In [66]:
#ENCODER LAYER:

In [67]:
encoder_layer = Dense(encoding_dim, activation='relu')(input_layer)

In [68]:
#DECODER LAYER:

In [69]:
decoder_layer = Dense(input_dim, activation='linear')(encoder_layer)

In [70]:
#COMBINE EVERYTHING IN MODEL:

In [71]:
autoencoder = Model(input_layer, decoder_layer)

In [72]:
#COMPILE MODEL:

In [73]:
autoencoder.compile(optimizer='adam', loss='mse')

In [74]:
#TRAIN MODEL:

In [75]:
x_train_normal = x_train[y_train == 0]

In [76]:
autoencoder.fit(x_train_normal, x_train_normal, epochs=10, batch_size=32, verbose=1)

Epoch 1/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 1.1722
Epoch 2/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.1170
Epoch 3/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0736
Epoch 4/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0382 
Epoch 5/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0097
Epoch 6/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9855
Epoch 7/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9661 
Epoch 8/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9487 
Epoch 9/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.9336
Epoch 10/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.9202


<keras.src.callbacks.history.History at 0x26fbe6a57f0>

In [77]:
x_pred = autoencoder.predict(x_test, verbose=0)

In [78]:
mse = ((x_test - x_pred) ** 2).mean(axis=1)

In [80]:
x_train_pred = autoencoder.predict(x_train_normal, verbose=0)

In [81]:
mse_train = ((x_train_normal - x_train_pred) ** 2).mean(axis=1)

In [83]:
threshold = np.mean(mse_train) + 3*np.std(mse_train)

In [84]:
print(threshold)

2.200445857983697


In [85]:
print(f"Mean training error: {np.mean(mse_train)}")

Mean training error: 0.9135595353487306


In [86]:
print(f"Std Deviation: {np.std(mse_train)}")

Std Deviation: 0.4289621075449889


In [87]:
print(f"Calculated threshold: {threshold:.4f}")

Calculated threshold: 2.2004


In [88]:
#CLASSIFY TEST DATA AS NORMAL OR ANOMALY

In [89]:
y_pred = (mse > threshold).astype(int)

In [90]:
print(f"True labels: {y_test[:20]}")

True labels: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 1.]


In [91]:
print(f"Predicted labels: {y_pred[:20]}")

Predicted labels: [1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 1 1]


In [92]:
accuracy = accuracy_score(y_test, y_pred)

In [93]:
print(accuracy)

0.992


In [94]:
print(classification_report(y_test, y_pred, target_names=['Normal', 'Anomaly']))

              precision    recall  f1-score   support

      Normal       1.00      0.99      1.00       306
     Anomaly       0.96      1.00      0.98        69

    accuracy                           0.99       375
   macro avg       0.98      1.00      0.99       375
weighted avg       0.99      0.99      0.99       375



In [95]:
cm = confusion_matrix(y_test, y_pred)

In [96]:
print(cm)

[[303   3]
 [  0  69]]
