In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam

In [2]:
data = pd.read_csv("ecg_autoencoder_dataset.csv")

x = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train_normal = x_train[y_train==0]

In [3]:
input_layer = Input(shape = (x_train_normal.shape[-1],))
encoder = Dense(128,activation="relu")(input_layer)
encoder = Dense(64,activation="relu")(encoder)
latent = Dense(16,activation="relu")(encoder)

In [4]:
decoder = Dense(64,activation="relu")(latent)
decoder = Dense(128,activation = "relu")(decoder)
ouput_layer = Dense(x_train_normal.shape[1],activation="linear")(decoder)

In [5]:
autoencoder = Model(input_layer,ouput_layer)

In [6]:
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['accuracy'])
autoencoder.summary()

In [7]:
history = autoencoder.fit(x_train_normal,x_train_normal,batch_size=256,epochs=20,shuffle=True)

Epoch 1/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.0056 - loss: 1.2999  
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.0278 - loss: 1.1450
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0303 - loss: 0.9188 
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.0243 - loss: 0.7397
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.0252 - loss: 0.5556
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0276 - loss: 0.5473
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0446 - loss: 0.4943
Epoch 8/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0446 - loss: 0.4912
Epoch 9/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [8]:
# 1. Predict and calculate MSE for the NORMAL TRAINING DATA
x_train_normal_pred = autoencoder.predict(x_train_normal)
mse_train_normal = np.mean(np.square(x_train_normal_pred - x_train_normal), axis=1)

# 2. Set the threshold based on the distribution of normal training MSE
# Using the 95th percentile as an example. Adjust this value!
# A lower percentile will increase 'recall' but decrease 'precision'.
threshold = np.percentile(mse_train_normal, 95) 
print(f"Threshold for anomaly detection: {threshold:.4f}")

# --- Predict Anomalies on Test Set --
x_test_pred = autoencoder.predict(x_test)
mse_test = np.mean(np.square(x_test_pred - x_test), axis=1)

# 3. Use the new threshold to predict anomalies on the test set
y_pred = (mse_test > threshold).astype(int)
# ... rest of your evaluation code ...

[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Threshold for anomaly detection: 0.4332
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [9]:
from sklearn.metrics import confusion_matrix, classification_report

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[581  42]
 [447 430]]

Classification Report:
              precision    recall  f1-score   support

           0       0.57      0.93      0.70       623
           1       0.91      0.49      0.64       877

    accuracy                           0.67      1500
   macro avg       0.74      0.71      0.67      1500
weighted avg       0.77      0.67      0.67      1500

