In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# Loading the dataset
data = pd.read_csv("creditcard.csv")




In [20]:
# Standardize 'Time' and 'Amount' columns
scaler = StandardScaler()
data['scaled_time'] = scaler.fit_transform(data['Time'].values.reshape(-1, 1))
data['scaled_amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))



In [21]:
# Drop the original 'Time' and 'Amount' columns
data = data.drop(['Time', 'Amount'], axis=1)

# Spliting data into features and labels
X = data.drop('Class', axis=1)
y = data['Class']

# Spliting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)



In [22]:
# Defining the autoencoder architecture
def create_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    
    # Encoder
    encoded = Dense(64, activation='relu')(input_layer)
    encoded = Dense(32, activation='relu')(encoded)
    
    # Decoder
    decoded = Dense(32, activation='relu')(encoded)
    decoded = Dense(64, activation='relu')(decoded)
    decoded = Dense(input_dim, activation='linear')(decoded)
    
    autoencoder = Model(inputs=input_layer, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    
    return autoencoder



In [23]:
# Filtering out the normal transactions from the training set
normal_train = X_train[y_train == 0]

# Training the autoencoder
autoencoder = create_autoencoder(normal_train.shape[1])
autoencoder.fit(normal_train, normal_train, epochs=50, batch_size=256, validation_split=0.2, shuffle=True)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f5e6ec5e8b0>

In [24]:
# Predicting and calculate mean squared error for the test set
y_pred = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - y_pred, 2), axis=1)

# Determining the threshold for anomaly detection
normal_mse = mse[y_test == 0]
threshold = np.quantile(normal_mse, 0.995)

# Classifying transactions based on the threshold
y_pred = (mse > threshold).astype(int)

# Evaluating the model performance
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))



[[56579   285]
 [   17    81]]
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     56864
           1       0.22      0.83      0.35        98

    accuracy                           0.99     56962
   macro avg       0.61      0.91      0.67     56962
weighted avg       1.00      0.99      1.00     56962



In [25]:
# Saving the trained model
autoencoder.save("fraud_detection_autoencoder.h5")


Following the evaluation of our anomaly detection model, we found that it is successful in identifying a majority of fraudulent transactions. 

# **The key takeaways are as follows:**

 - The model has an overall accuracy of 99%. However, due to the imbalanced nature of the dataset, accuracy is not the most suitable metric. Instead, we should focus on precision, recall, and F1-score for the minority class (fraudulent transactions).

 - The precision of our model is 0.22. This means that out of all transactions classified as fraudulent, only 22% are actually fraudulent. As a result, a significant number of normal transactions are misclassified as fraudulent, leading to potential false alarms.

 - The recall of our model is 0.83. This means that our model is able to correctly identify 83% of actual fraudulent transactions. A high recall is essential for a fraud detection system to capture as many fraudulent transactions as possible.

 - The F1-score, which is the harmonic mean of precision and recall, is 0.35. It is important to balance both precision and recall in order to minimize the number of false alarms while also detecting a high percentage of fraudulent transactions.

 - To improve the model's performance, we can consider experimenting with different neural network architectures or hyperparameters for the autoencoder. Additionally, addressing the class imbalance problem using techniques like oversampling, undersampling, or SMOTE may yield better results. Finally, comparing the performance of our model with other anomaly detection algorithms such as Isolation Forest or One-Class SVM can help us identify the most suitable approach for our fraud detection system.