# Import the necessary libraries

In [1]:
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import pandas as pd
 
# Configure logging
logging.basicConfig(filename='../logs/',
                    level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')
  

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../scripts')))
from load_csv_data import Load_CSV_Data


# Load data

In [2]:
credit_df = Load_CSV_Data('../../data/normalized_scaled_credit_data.csv')
credit_df.load_csv_data()
credit_df = credit_df.get_data()

Data successfully loaded from ../../data/normalized_scaled_credit_data.csv


# Feature and Target separation

In [3]:
# Feature and Target separation for credit card data
X_credit = credit_df.drop(columns=['Class'])  # Features (remove target column)
y_credit = credit_df['Class']                # Target (Class column)


# Check the shape of the datasets to ensure separation is correct
print(f"Credit card data - Features: {X_credit.shape}, Target: {y_credit.shape}")


Credit card data - Features: (283726, 30), Target: (283726,)


# Logistic Regression 

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Credit Card Data - Logistic Regression
print("Logistic Regression for Credit Card Data")

# Split the credit card data into training and testing sets (80% train, 20% test)
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42, stratify=y_credit)

# Initialize the Logistic Regression model
log_reg_credit = LogisticRegression(max_iter=1000)

# Train the model on the training data
log_reg_credit.fit(X_train_credit, y_train_credit)

# Make predictions on the test data
y_pred_credit = log_reg_credit.predict(X_test_credit)

# Evaluate the model performance
accuracy_credit = accuracy_score(y_test_credit, y_pred_credit)
precision_credit = precision_score(y_test_credit, y_pred_credit)
recall_credit = recall_score(y_test_credit, y_pred_credit)
f1_credit = f1_score(y_test_credit, y_pred_credit)

# Print evaluation metrics
print(f"Credit Card Data - Accuracy: {accuracy_credit:.4f}")
print(f"Credit Card Data - Precision: {precision_credit:.4f}")
print(f"Credit Card Data - Recall: {recall_credit:.4f}")
print(f"Credit Card Data - F1-Score: {f1_credit:.4f}")
print("\nClassification Report for Credit Card Data:\n", classification_report(y_test_credit, y_pred_credit))
print("\nConfusion Matrix for Credit Card Data:\n", confusion_matrix(y_test_credit, y_pred_credit))


Logistic Regression for Credit Card Data
Credit Card Data - Accuracy: 0.9991
Credit Card Data - Precision: 0.8485
Credit Card Data - Recall: 0.5895
Credit Card Data - F1-Score: 0.6957

Classification Report for Credit Card Data:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.85      0.59      0.70        95

    accuracy                           1.00     56746
   macro avg       0.92      0.79      0.85     56746
weighted avg       1.00      1.00      1.00     56746


Confusion Matrix for Credit Card Data:
 [[56641    10]
 [   39    56]]


# Decision Tree

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming X_credit and y_credit are already defined (features and target)
# Train-test split
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt_credit = DecisionTreeClassifier(random_state=42)

# Train the model
dt_credit.fit(X_train_credit, y_train_credit)

# Make predictions on the test set
y_pred_credit = dt_credit.predict(X_test_credit)

# Evaluate the model
accuracy_credit = accuracy_score(y_test_credit, y_pred_credit)
precision_credit = precision_score(y_test_credit, y_pred_credit)
recall_credit = recall_score(y_test_credit, y_pred_credit)
f1_credit = f1_score(y_test_credit, y_pred_credit)

# Print the evaluation results
print(f"Credit Data - Decision Tree Results:")
print(f"Accuracy: {accuracy_credit:.4f}")
print(f"Precision: {precision_credit:.4f}")
print(f"Recall: {recall_credit:.4f}")
print(f"F1-Score: {f1_credit:.4f}")


Credit Data - Decision Tree Results:
Accuracy: 0.9990
Precision: 0.6667
Recall: 0.7111
F1-Score: 0.6882


# Random Forest 

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming X_credit and y_credit are already defined (features and target)
# Train-test split
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
rf_credit = RandomForestClassifier(random_state=42, n_estimators=100)

# Train the model
rf_credit.fit(X_train_credit, y_train_credit)

# Make predictions on the test set
y_pred_credit = rf_credit.predict(X_test_credit)

# Evaluate the model
accuracy_credit = accuracy_score(y_test_credit, y_pred_credit)
precision_credit = precision_score(y_test_credit, y_pred_credit)
recall_credit = recall_score(y_test_credit, y_pred_credit)
f1_credit = f1_score(y_test_credit, y_pred_credit)

# Print the evaluation results
print(f"Credit Data - Random Forest Results:")
print(f"Accuracy: {accuracy_credit:.4f}")
print(f"Precision: {precision_credit:.4f}")
print(f"Recall: {recall_credit:.4f}")
print(f"F1-Score: {f1_credit:.4f}")


Credit Data - Random Forest Results:
Accuracy: 0.9995
Precision: 0.9706
Recall: 0.7333
F1-Score: 0.8354


# Gradient Boosting

In [7]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming X_credit and y_credit are already defined (features and target)
# Train-test split
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Initialize the Gradient Boosting Classifier
gb_credit = GradientBoostingClassifier(random_state=42, n_estimators=100, learning_rate=0.1)

# Train the model
gb_credit.fit(X_train_credit, y_train_credit)

# Make predictions on the test set
y_pred_credit = gb_credit.predict(X_test_credit)

# Evaluate the model
accuracy_credit = accuracy_score(y_test_credit, y_pred_credit)
precision_credit = precision_score(y_test_credit, y_pred_credit)
recall_credit = recall_score(y_test_credit, y_pred_credit)
f1_credit = f1_score(y_test_credit, y_pred_credit)

# Print the evaluation results
print(f"Credit Data - Gradient Boosting Results:")
print(f"Accuracy: {accuracy_credit:.4f}")
print(f"Precision: {precision_credit:.4f}")
print(f"Recall: {recall_credit:.4f}")
print(f"F1-Score: {f1_credit:.4f}")


Credit Data - Gradient Boosting Results:
Accuracy: 0.9993
Precision: 0.8906
Recall: 0.6333
F1-Score: 0.7403


# Multi-Layer Perceptron (MLP)

In [8]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Train-test split for credit card data
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Initialize the MLP Classifier
mlp_credit = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, random_state=42)

# Train the model
mlp_credit.fit(X_train_credit, y_train_credit)

# Make predictions on the test set
y_pred_credit = mlp_credit.predict(X_test_credit)

# Evaluate the model
accuracy_credit = accuracy_score(y_test_credit, y_pred_credit)
precision_credit = precision_score(y_test_credit, y_pred_credit)
recall_credit = recall_score(y_test_credit, y_pred_credit)
f1_credit = f1_score(y_test_credit, y_pred_credit)

# Print the evaluation results
print(f"Credit Data - MLP Results:")
print(f"Accuracy: {accuracy_credit:.4f}")
print(f"Precision: {precision_credit:.4f}")
print(f"Recall: {recall_credit:.4f}")
print(f"F1-Score: {f1_credit:.4f}")


Credit Data - MLP Results:
Accuracy: 0.9995
Precision: 0.9041
Recall: 0.7333
F1-Score: 0.8098


# Convolutional Neural Network (CNN)

In [9]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Reshape data for CNN
X_credit_reshaped = np.expand_dims(X_credit, axis=2)  # Adding a channel dimension (like a grayscale image)

# Train-test split
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit_reshaped, y_credit, test_size=0.2, random_state=42)

# Define the CNN model
def create_cnn_model(input_shape):
    model = tf.keras.models.Sequential()
    
    # First Conv1D layer
    model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    
    # Second Conv1D layer
    model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    
    # Flatten and add dense layers
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Binary classification

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Create CNN model
cnn_model_credit = create_cnn_model(input_shape=(X_train_credit.shape[1], 1))

# Train the model
cnn_model_credit.fit(X_train_credit, y_train_credit, epochs=10, batch_size=32, validation_split=0.2)

# Make predictions on the test set
y_pred_credit = (cnn_model_credit.predict(X_test_credit) > 0.5).astype("int32")

# Evaluate the model
accuracy_credit = accuracy_score(y_test_credit, y_pred_credit)
precision_credit = precision_score(y_test_credit, y_pred_credit)
recall_credit = recall_score(y_test_credit, y_pred_credit)
f1_credit = f1_score(y_test_credit, y_pred_credit)

# Print the evaluation results
print(f"Credit Data - CNN Results:")
print(f"Accuracy: {accuracy_credit:.4f}")
print(f"Precision: {precision_credit:.4f}")
print(f"Recall: {recall_credit:.4f}")
print(f"F1-Score: {f1_credit:.4f}")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5675/5675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - accuracy: 0.9976 - loss: 0.0186 - val_accuracy: 0.9992 - val_loss: 0.0037
Epoch 2/10
[1m5675/5675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0038 - val_accuracy: 0.9991 - val_loss: 0.0046
Epoch 3/10
[1m5675/5675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9993 - loss: 0.0037 - val_accuracy: 0.9993 - val_loss: 0.0036
Epoch 4/10
[1m5675/5675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0038 - val_accuracy: 0.9993 - val_loss: 0.0035
Epoch 5/10
[1m5675/5675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9993 - loss: 0.0035 - val_accuracy: 0.9994 - val_loss: 0.0038
Epoch 6/10
[1m5675/5675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0037 - val_accuracy: 0.9992 - val_loss: 0.0036
Epoch 7/10
[1m5675/5

# Recurrent Neural Network (RNN)


In [10]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Assuming X_credit and y_credit are already prepared

# Create RNN Model
def create_rnn_model(input_shape):
    model = Sequential()
    
    # RNN layer with 64 units
    model.add(SimpleRNN(units=64, activation='tanh', input_shape=input_shape))
    
    # Dropout to prevent overfitting
    model.add(Dropout(0.5))
    
    # Fully connected layers
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification output
    
    # Compile the model
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Train-test split for credit data
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Reshape data to (samples, timesteps, features) for RNN input
X_credit_train_rnn = np.expand_dims(X_credit_train, axis=1)
X_credit_test_rnn = np.expand_dims(X_credit_test, axis=1)

# Build and train the RNN model for credit data
rnn_model_credit = create_rnn_model(X_credit_train_rnn.shape[1:])
rnn_model_credit.fit(X_credit_train_rnn, y_credit_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the RNN model for credit data
credit_pred_rnn = rnn_model_credit.predict(X_credit_test_rnn)
credit_pred_rnn = np.round(credit_pred_rnn)

print("Credit Data - RNN Model")
print(classification_report(y_credit_test, credit_pred_rnn))


Epoch 1/10


  super().__init__(**kwargs)


[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9765 - loss: 0.0723 - val_accuracy: 0.9992 - val_loss: 0.0039
Epoch 2/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9993 - loss: 0.0031 - val_accuracy: 0.9992 - val_loss: 0.0036
Epoch 3/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9993 - loss: 0.0031 - val_accuracy: 0.9992 - val_loss: 0.0039
Epoch 4/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9993 - loss: 0.0030 - val_accuracy: 0.9993 - val_loss: 0.0033
Epoch 5/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9994 - loss: 0.0032 - val_accuracy: 0.9992 - val_loss: 0.0035
Epoch 6/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9993 - loss: 0.0033 - val_accuracy: 0.9992 - val_loss: 0.0036
Epoch 7/10
[1m2838/2838[0

# Long Short-Term Memory (LSTM)


In [11]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Assuming X_credit and y_credit are already prepared

# Create LSTM Model
def create_lstm_model(input_shape):
    model = Sequential()
    
    # LSTM layer with 64 units
    model.add(LSTM(units=64, activation='tanh', input_shape=input_shape))
    
    # Dropout to prevent overfitting
    model.add(Dropout(0.5))
    
    # Fully connected layers
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification output
    
    # Compile the model
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Train-test split for credit data
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Reshape data to (samples, timesteps, features) for LSTM input
X_credit_train_lstm = np.expand_dims(X_credit_train, axis=1)
X_credit_test_lstm = np.expand_dims(X_credit_test, axis=1)

# Build and train the LSTM model for credit data
lstm_model_credit = create_lstm_model(X_credit_train_lstm.shape[1:])
lstm_model_credit.fit(X_credit_train_lstm, y_credit_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the LSTM model for credit data
credit_pred_lstm = lstm_model_credit.predict(X_credit_test_lstm)
credit_pred_lstm = np.round(credit_pred_lstm)

print("Credit Data - LSTM Model")
print(classification_report(y_credit_test, credit_pred_lstm))


Epoch 1/10


  super().__init__(**kwargs)


[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 0.9932 - loss: 0.0717 - val_accuracy: 0.9993 - val_loss: 0.0041
Epoch 2/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9994 - loss: 0.0029 - val_accuracy: 0.9993 - val_loss: 0.0036
Epoch 3/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9994 - loss: 0.0031 - val_accuracy: 0.9992 - val_loss: 0.0036
Epoch 4/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9995 - loss: 0.0025 - val_accuracy: 0.9993 - val_loss: 0.0033
Epoch 5/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0024 - val_accuracy: 0.9993 - val_loss: 0.0034
Epoch 6/10
[1m2838/2838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0025 - val_accuracy: 0.9992 - val_loss: 0.0036
Epoch 7/10
[1m2838/2838[0