In [13]:
# Import Required Libraries
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense


In [14]:
#Step 1: Create Synthetic Data
# Normal data (mean=0, std=1)
normal_data = np.random.normal(0, 1, (1000, 10))

# Anomaly data (mean=5, std=1)
anomaly_data = np.random.normal(5, 1, (250, 10))

# Combine both into one dataset
data = np.vstack([normal_data, anomaly_data])
labels = np.hstack([np.zeros(1000), np.ones(250)])        # 0 = normal, 1 = anomaly


In [15]:
#Step 2: Split into Training and Testing Sets
x_train, x_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.3, random_state=42
)


In [16]:
#Step 3: Define Autoencoder Architecture
input_dim = x_train.shape[1]   # number of features = 10
encoding_dim = 3               # compressed latent dimension

# Input Layer
input_layer = Input(shape=(input_dim,))

# Encoder Layer
encoder_layer = Dense(encoding_dim, activation='relu')(input_layer)

# Decoder Layer
decoder_layer = Dense(input_dim, activation='linear')(encoder_layer)

# Build the Autoencoder Model
autoencoder = Model(input_layer, decoder_layer)
autoencoder.compile(optimizer='adam', loss='mse')


In [17]:
#Step 4: Train Autoencoder (Only on Normal Data)
autoencoder.fit(
    x_train[y_train == 0],
    x_train[y_train == 0],
    epochs=10
)


Epoch 1/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.3103
Epoch 2/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2315
Epoch 3/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.1714
Epoch 4/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.1202 
Epoch 5/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0822 
Epoch 6/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0493 
Epoch 7/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0229 
Epoch 8/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0004 
Epoch 9/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9813
Epoch 10/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9644


<keras.src.callbacks.history.History at 0x218e16f8bd0>

In [18]:
#Step 5: Predict and Detect Anomalies
x_pred = autoencoder.predict(x_test)
mse = np.mean((x_test - x_pred) ** 2, axis=1)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


In [19]:
#Step 6: Calculate Dynamic Threshold using traning normal data
x_train_pred = autoencoder.predict(x_train[y_train == 0])
mse_train = np.mean((x_train[y_train == 0] - x_train_pred) ** 2, axis=1)

threshold = np.mean(mse_train) + 3 * np.std(mse_train)   # Dynamic threshold


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


In [20]:
#Step 7: Predict Anomalies Using Dynamic Threshold
y_pred = (mse > threshold).astype(int)   # 1 = anomaly, 0 = normal


In [21]:
#Step 8: Display results
print("Calculated Threshold:", threshold)
print("True labels (first 20):", y_test[:20])
print("Predicted labels (first 20):", y_pred[:20])
print("MSE (first 20 samples):", mse[:20])


Calculated Threshold: 2.3406554469208687
True labels (first 20): [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 1.]
Predicted labels (first 20): [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 1 1]
MSE (first 20 samples): [ 1.39153521 47.69406879  0.93959884  0.49208056  1.29968583  0.39762181
  0.67706072  1.24636946  1.55907355  0.82153786 33.59149585  0.49936903
  0.56807116  0.61349535 55.28422118 50.88831203 46.55837723  0.53809539
 46.79130652 36.93160743]
