In [1]:
# ====== 1. Import Required Libraries ======
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

# ====== 2. Load Dataset ======
data = pd.read_csv("creditcard.csv")
X = data.drop(columns=['Class'])
y = data['Class']

# Scale 'Time' and 'Amount' features
scaler = StandardScaler()
X[['Time', 'Amount']] = scaler.fit_transform(X[['Time', 'Amount']])

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Isolate normal transactions for training
X_train_normal = X_train[y_train == 0]

# ====== 3. Build Autoencoder ======
input_dim = X_train_normal.shape[1]
input_layer = Input(shape=(input_dim,))

# Encoder
encoder = Dense(14, activation='relu')(input_layer)
encoder = Dense(7, activation='relu')(encoder)
latent = Dense(3, activation='relu')(encoder)

# Decoder
decoder = Dense(7, activation='relu')(latent)
decoder = Dense(14, activation='relu')(decoder)
output_layer = Dense(input_dim, activation='linear')(decoder)

# Autoencoder model
autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
autoencoder.summary()

# ====== 4. Train Autoencoder ======
history = autoencoder.fit(
    X_train_normal, X_train_normal,
    epochs=50,          # Increased epochs for better convergence
    batch_size=256,
    shuffle=True,
    validation_split=0.2,
    verbose=1
)

# ====== 5. Determine Anomaly Threshold ======
# Predict reconstruction for normal training data
X_train_normal_pred = autoencoder.predict(X_train_normal)
mse_train_normal = np.mean(np.square(X_train_normal_pred - X_train_normal), axis=1)

# Threshold: 95th percentile of normal reconstruction errors
threshold = np.percentile(mse_train_normal, 95)
print(f"\nAnomaly Detection Threshold: {threshold:.6f}")

# ====== 6. Predict Anomalies on Test Set ======
X_test_pred = autoencoder.predict(X_test)
mse_test = np.mean(np.square(X_test_pred - X_test), axis=1)

# Classify anomalies
y_pred = (mse_test > threshold).astype(int)

# ====== 7. Evaluate Performance ======
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Epoch 1/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.9731 - mae: 0.6366 - val_loss: 0.7300 - val_mae: 0.5692
Epoch 2/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.7072 - mae: 0.5572 - val_loss: 0.6705 - val_mae: 0.5413
Epoch 3/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6660 - mae: 0.5370 - val_loss: 0.6388 - val_mae: 0.5306
Epoch 4/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6275 - mae: 0.5263 - val_loss: 0.6152 - val_mae: 0.5245
Epoch 5/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6101 - mae: 0.5208 - val_loss: 0.5907 - val_mae: 0.5131
Epoch 6/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5882 - mae: 0.5115 - val_loss: 0.5760 - val_mae: 0.5091
Epoch 7/50
[1m622/622[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - 