# Import the necessary libraries

In [1]:
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import pandas as pd
import joblib

 
# Configure logging
logging.basicConfig(filename='../logs/',
                    level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')
  

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../scripts')))
from load_csv_data import Load_CSV_Data


# Credit_Fraud_Detection_Experiment

In [2]:
import mlflow
import mlflow.sklearn

# Set experiment name (same as credit data to log in the same experiment)
mlflow.set_experiment("Credit_Fraud_Detection_Experiment")

# Function to run and log model experiments for fraud data
def log_model_with_mlflow_fraud(model, X_train, X_test, y_train, y_test, model_name):
    with mlflow.start_run(run_name=model_name):
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict
        y_pred = model.predict(X_test)

        # Log parameters and metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Log metrics in MLflow
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)

        # Log the model itself
        mlflow.sklearn.log_model(model, model_name)
        
        print(f"{model_name} logged in MLflow with accuracy: {accuracy}")


# Load data

In [5]:
fraud_df = Load_CSV_Data('../../data/encoded_fraud_data.csv')
fraud_df.load_csv_data()
fraud_df = fraud_df.get_data()

Data successfully loaded from ../../data/encoded_fraud_data.csv


In [8]:
# Display all columns with the first two rows
pd.set_option('display.max_columns', None)  # Ensure all columns are visible
print(fraud_df.head(2))  # Print the first two rows

   user_id  purchase_value  age    ip_address  class  lower_bound_ip_address  \
0   247547              47   30  1.677886e+07      0              16778240.0   
1   220737              15   34  1.684205e+07      0              16809984.0   

   upper_bound_ip_address  transaction_velocity  purchase_hour  \
0              16779263.0             3632215.0              3   
1              16842751.0             1231637.0             20   

   purchase_day_of_week  country_encoded  browser_encoded  source_encoded  \
0                     6             1844            21074           51960   
1                     2              291            52560           51960   

   sex_M  device_id_target_encoded  signup_year  signup_month  signup_day  \
0  False                  0.082634         2015             6          28   
1  False                  0.082634         2015             1          28   

   signup_hour  purchase_year  purchase_month  purchase_day  time_diff_hours  
0            3   

# Feature and target separation

In [10]:


# Select important features for the fraud detection model
important_features = [
    'purchase_value',
    'age',
    'transaction_velocity',
    'purchase_hour',
    'purchase_day_of_week',
    'country_encoded',
    'browser_encoded',
    'source_encoded',
    'sex_M',
    'signup_year',
    'signup_month',
    'signup_hour',
    'time_diff_hours'
]

# Feature and Target separation
X_fraud = fraud_df[important_features]  # Features
y_fraud = fraud_df['class']              # Target (Class column)

# Check the shape of the datasets to ensure separation is correct
print(f"Credit card data - Features: {X_fraud.shape}, Target: {y_fraud.shape}")

# Save X_fraud and y_fraud as CSV files for future use
X_fraud.to_csv('../../data/splittedData/X_fraud.csv', index=False)
y_fraud.to_csv('../../data/splittedData/y_fraud.csv', index=False)


Credit card data - Features: (129146, 13), Target: (129146,)


# Logistic regression

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Fraud Data - Logistic Regression
print("\n\nLogistic Regression for Fraud Data")

# Split the fraud data into training and testing sets (80% train, 20% test)
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42, stratify=y_fraud)

# Initialize the Logistic Regression model
log_reg_fraud = LogisticRegression(max_iter=1000)

log_model_with_mlflow_fraud(log_reg_fraud, X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud, "Logistic_Regression_Fraud")


# Train the model on the training data
log_reg_fraud.fit(X_train_fraud, y_train_fraud)

# Save the trained Logistic Regression model
joblib.dump(log_reg_fraud, '../../saved_models/logistic_regression_fraud_model.pkl')

# Make predictions on the test data
y_pred_fraud = log_reg_fraud.predict(X_test_fraud)

# Evaluate the model performance
accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)
precision_fraud = precision_score(y_test_fraud, y_pred_fraud)
recall_fraud = recall_score(y_test_fraud, y_pred_fraud)
f1_fraud = f1_score(y_test_fraud, y_pred_fraud)

# Print evaluation metrics
print(f"Fraud Data - Accuracy: {accuracy_fraud:.4f}")
print(f"Fraud Data - Precision: {precision_fraud:.4f}")
print(f"Fraud Data - Recall: {recall_fraud:.4f}")
print(f"Fraud Data - F1-Score: {f1_fraud:.4f}")
print("\nClassification Report for Fraud Data:\n", classification_report(y_test_fraud, y_pred_fraud))
print("\nConfusion Matrix for Fraud Data:\n", confusion_matrix(y_test_fraud, y_pred_fraud))



Logistic Regression for Fraud Data


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic_Regression_Fraud logged in MLflow with accuracy: 0.9049941927990709
Fraud Data - Accuracy: 0.9050
Fraud Data - Precision: 0.0000
Fraud Data - Recall: 0.0000
Fraud Data - F1-Score: 0.0000

Classification Report for Fraud Data:
               precision    recall  f1-score   support

           0       0.90      1.00      0.95     23376
           1       0.00      0.00      0.00      2454

    accuracy                           0.90     25830
   macro avg       0.45      0.50      0.48     25830
weighted avg       0.82      0.90      0.86     25830


Confusion Matrix for Fraud Data:
 [[23376     0]
 [ 2454     0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Decision tree

In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming X_fraud and y_fraud are already defined (features and target)
# Train-test split
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
dt_fraud = DecisionTreeClassifier(random_state=42)

log_model_with_mlflow_fraud(dt_fraud, X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud, "Decision_Tree_Fraud")


# Train the model
dt_fraud.fit(X_train_fraud, y_train_fraud)

# Save the trained  model
joblib.dump(dt_fraud, '../../saved_models/decision_tree_fraud_model.pkl')

# Make predictions on the test set
y_pred_fraud = dt_fraud.predict(X_test_fraud)

# Evaluate the model
accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)
precision_fraud = precision_score(y_test_fraud, y_pred_fraud)
recall_fraud = recall_score(y_test_fraud, y_pred_fraud)
f1_fraud = f1_score(y_test_fraud, y_pred_fraud)

# Print the evaluation results
print(f"Fraud Data - Decision Tree Results:")
print(f"Accuracy: {accuracy_fraud:.4f}")
print(f"Precision: {precision_fraud:.4f}")
print(f"Recall: {recall_fraud:.4f}")
print(f"F1-Score: {f1_fraud:.4f}")




Decision_Tree_Fraud logged in MLflow with accuracy: 0.902826171118854
Fraud Data - Decision Tree Results:
Accuracy: 0.9028
Precision: 0.4826
Recall: 0.5682
F1-Score: 0.5219


# Random Forest

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming X_fraud and y_fraud are already defined (features and target)
# Train-test split
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
rf_fraud = RandomForestClassifier(random_state=42, n_estimators=100)

log_model_with_mlflow_fraud(rf_fraud, X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud, "Random_Forest_Fraud")


# Train the model
rf_fraud.fit(X_train_fraud, y_train_fraud)

# Save the trained  model
joblib.dump(rf_fraud, '../../saved_models/random_forest_fraud_model.pkl')

# Make predictions on the test set
y_pred_fraud = rf_fraud.predict(X_test_fraud)

# Evaluate the model
accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)
precision_fraud = precision_score(y_test_fraud, y_pred_fraud)
recall_fraud = recall_score(y_test_fraud, y_pred_fraud)
f1_fraud = f1_score(y_test_fraud, y_pred_fraud)

# Print the evaluation results
print(f"Fraud Data - Random Forest Results:")
print(f"Accuracy: {accuracy_fraud:.4f}")
print(f"Precision: {precision_fraud:.4f}")
print(f"Recall: {recall_fraud:.4f}")
print(f"F1-Score: {f1_fraud:.4f}")




Random_Forest_Fraud logged in MLflow with accuracy: 0.9569879984514131
Fraud Data - Random Forest Results:
Accuracy: 0.9570
Precision: 1.0000
Recall: 0.5392
F1-Score: 0.7006


# Gradient Boosting

In [16]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming X_fraud and y_fraud are already defined (features and target)
# Train-test split
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

# Initialize the Gradient Boosting Classifier
gb_fraud = GradientBoostingClassifier(random_state=42, n_estimators=100, learning_rate=0.1)

log_model_with_mlflow_fraud(gb_fraud, X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud, "Gradient_Boosting_Fraud")


# Train the model
gb_fraud.fit(X_train_fraud, y_train_fraud)

# Save the trained  model
joblib.dump(gb_fraud, '../../saved_models/gradient_boosting_fraud_model.pkl')

# Make predictions on the test set
y_pred_fraud = gb_fraud.predict(X_test_fraud)

# Evaluate the model
accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)
precision_fraud = precision_score(y_test_fraud, y_pred_fraud)
recall_fraud = recall_score(y_test_fraud, y_pred_fraud)
f1_fraud = f1_score(y_test_fraud, y_pred_fraud)

# Print the evaluation results
print(f"Fraud Data - Gradient Boosting Results:")
print(f"Accuracy: {accuracy_fraud:.4f}")
print(f"Precision: {precision_fraud:.4f}")
print(f"Recall: {recall_fraud:.4f}")
print(f"F1-Score: {f1_fraud:.4f}")




Gradient_Boosting_Fraud logged in MLflow with accuracy: 0.9569879984514131
Fraud Data - Gradient Boosting Results:
Accuracy: 0.9570
Precision: 1.0000
Recall: 0.5392
F1-Score: 0.7006


# Multi-Layer Perceptron (MLP)

In [17]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Train-test split for fraud data
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

# Initialize the MLP Classifier
mlp_fraud = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, random_state=42)

log_model_with_mlflow_fraud(mlp_fraud, X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud, "MLP_Fraud")


# Train the model
mlp_fraud.fit(X_train_fraud, y_train_fraud)

# Save the trained  model
joblib.dump(mlp_fraud, '../../saved_models/MLP_fraud_model.pkl')

# Make predictions on the test set
y_pred_fraud = mlp_fraud.predict(X_test_fraud)

# Evaluate the model
accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)
precision_fraud = precision_score(y_test_fraud, y_pred_fraud)
recall_fraud = recall_score(y_test_fraud, y_pred_fraud)
f1_fraud = f1_score(y_test_fraud, y_pred_fraud)

# Print the evaluation results
print(f"Fraud Data - MLP Results:")
print(f"Accuracy: {accuracy_fraud:.4f}")
print(f"Precision: {precision_fraud:.4f}")
print(f"Recall: {recall_fraud:.4f}")
print(f"F1-Score: {f1_fraud:.4f}")




MLP_Fraud logged in MLflow with accuracy: 0.9569105691056911
Fraud Data - MLP Results:
Accuracy: 0.9569
Precision: 0.9977
Recall: 0.5396
F1-Score: 0.7004


# Convolutional Neural Network (CNN)

In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.keras

# Assuming fraud_df is your DataFrame
# fraud_df = pd.read_csv('your_data.csv')  # Load your dataset here

# Step 1: Check for non-numeric columns and convert them
for column in fraud_df.columns:
    if fraud_df[column].dtype == 'object':
        # Convert object columns to numeric using LabelEncoder
        label_encoder = LabelEncoder()
        fraud_df[column] = label_encoder.fit_transform(fraud_df[column])
    elif fraud_df[column].dtype == 'bool':
        # Convert boolean columns to int (0, 1)
        fraud_df[column] = fraud_df[column].astype(int)

# Step 2: Convert all integer columns to float to ensure compatibility with TensorFlow
fraud_df = fraud_df.astype(float)

# Step 3: Feature and target separation
X_fraud = fraud_df.drop(columns=['class'])  # Assuming 'class' is the target
y_fraud = fraud_df['class']

# Step 4: Reshape the data for CNN (add a channel dimension for 1D CNN)
X_fraud_reshaped = np.expand_dims(X_fraud.values, axis=2)

# Step 5: Train-test split
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud_reshaped, y_fraud, test_size=0.2, random_state=42)

# Step 6: CNN model definition
def create_cnn_model(input_shape):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Binary classification
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Step 7: Create and train the CNN model with MLflow logging
with mlflow.start_run(run_name="CNN_Fraud_Detection"):
    cnn_model_fraud = create_cnn_model(input_shape=(X_train_fraud.shape[1], 1))
    
    # Fit the model
    history = cnn_model_fraud.fit(X_train_fraud, y_train_fraud, epochs=10, batch_size=32, validation_split=0.2)
    
    # Log the model and metrics
    mlflow.keras.log_model(cnn_model_fraud, "CNN_Fraud_Model")
    mlflow.log_param("epochs", 10)
    mlflow.log_param("batch_size", 32)
    
    # Evaluate the model
    y_pred_fraud = (cnn_model_fraud.predict(X_test_fraud) > 0.5).astype("int32")
    
    # Calculate and log evaluation metrics
    accuracy_fraud = accuracy_score(y_test_fraud, y_pred_fraud)
    precision_fraud = precision_score(y_test_fraud, y_pred_fraud)
    recall_fraud = recall_score(y_test_fraud, y_pred_fraud)
    f1_fraud = f1_score(y_test_fraud, y_pred_fraud)

    mlflow.log_metric("accuracy", accuracy_fraud)
    mlflow.log_metric("precision", precision_fraud)
    mlflow.log_metric("recall", recall_fraud)
    mlflow.log_metric("f1_score", f1_fraud)

    # Save the trained model using TensorFlow's save method
    cnn_model_fraud.save('../../saved_models/CNN_fraud_model.h5')

# Print evaluation results
print(f"Fraud Data - CNN Results:")
print(f"Accuracy: {accuracy_fraud:.4f}")
print(f"Precision: {precision_fraud:.4f}")
print(f"Recall: {recall_fraud:.4f}")
print(f"F1-Score: {f1_fraud:.4f}")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2583/2583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.8792 - loss: 1423826.6250 - val_accuracy: 0.9038 - val_loss: 1.3671
Epoch 2/10
[1m2583/2583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9045 - loss: 0.3362 - val_accuracy: 0.9029 - val_loss: 2.4110
Epoch 3/10
[1m2583/2583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9032 - loss: 0.6091 - val_accuracy: 0.9038 - val_loss: 1.3875
Epoch 4/10
[1m2583/2583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9061 - loss: 0.4762 - val_accuracy: 0.9038 - val_loss: 0.3166
Epoch 5/10
[1m2583/2583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9054 - loss: 0.3130 - val_accuracy: 0.9038 - val_loss: 0.3166
Epoch 6/10
[1m2583/2583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9046 - loss: 0.3149 - val_accuracy: 0.9038 - val_loss: 0.3166
Epoch 7/10
[1m2583/



[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Fraud Data - CNN Results:
Accuracy: 0.9067
Precision: 0.0000
Recall: 0.0000
F1-Score: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Recurrent Neural Network (RNN)


In [19]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import mlflow
import mlflow.keras
from sklearn.preprocessing import LabelEncoder

# Assuming fraud_df is your DataFrame
# fraud_df = pd.read_csv('your_data.csv')  # Load your dataset here

# Step 1: Check for non-numeric columns and convert them
for column in fraud_df.columns:
    if fraud_df[column].dtype == 'object':
        # Convert object columns to numeric using LabelEncoder
        label_encoder = LabelEncoder()
        fraud_df[column] = label_encoder.fit_transform(fraud_df[column])
    elif fraud_df[column].dtype == 'bool':
        # Convert boolean columns to int (0, 1)
        fraud_df[column] = fraud_df[column].astype(int)

# Step 2: Convert all integer columns to float to ensure compatibility with TensorFlow
fraud_df = fraud_df.astype(float)

# Step 3: Feature and target separation
X_fraud = fraud_df.drop(columns=['class'])  # Assuming 'class' is the target
y_fraud = fraud_df['class']

# Step 4: Train-test split
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

# Step 5: Reshape data to (samples, timesteps, features) for RNN input
X_fraud_train_rnn = np.expand_dims(X_fraud_train.values, axis=1)
X_fraud_test_rnn = np.expand_dims(X_fraud_test.values, axis=1)

# Step 6: Create RNN Model
def create_rnn_model(input_shape):
    model = Sequential()
    
    # RNN layer with 64 units
    model.add(SimpleRNN(units=64, activation='tanh', input_shape=input_shape))
    
    # Dropout to prevent overfitting
    model.add(Dropout(0.5))
    
    # Fully connected layers
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification output
    
    # Compile the model
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Step 7: Create and train the RNN model with MLflow logging
with mlflow.start_run(run_name="RNN_Fraud_Detection"):
    rnn_model_fraud = create_rnn_model(X_fraud_train_rnn.shape[1:])
    
    # Fit the model
    history = rnn_model_fraud.fit(X_fraud_train_rnn, y_fraud_train, epochs=10, batch_size=64, validation_split=0.2)
    
    # Log the model and metrics
    mlflow.keras.log_model(rnn_model_fraud, "RNN_Fraud_Model")
    mlflow.log_param("epochs", 10)
    mlflow.log_param("batch_size", 64)
    
    # Evaluate the model
    fraud_pred_rnn = rnn_model_fraud.predict(X_fraud_test_rnn)
    fraud_pred_rnn = np.round(fraud_pred_rnn).astype(int).flatten()

    # Calculate classification report
    report = classification_report(y_fraud_test, fraud_pred_rnn, output_dict=True)

    # Safe extraction of metrics
    accuracy_fraud = report.get('accuracy', 0.0)
    precision_fraud = report.get('1', {}).get('precision', 0.0)
    recall_fraud = report.get('1', {}).get('recall', 0.0)
    f1_fraud = report.get('1', {}).get('f1-score', 0.0)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy_fraud)
    mlflow.log_metric("precision", precision_fraud)
    mlflow.log_metric("recall", recall_fraud)
    mlflow.log_metric("f1_score", f1_fraud)

    # Save the trained model using TensorFlow's save method
    rnn_model_fraud.save('../../saved_models/RNN_fraud_model.h5')

# Print evaluation results
print("Fraud Data - RNN Model")
print(classification_report(y_fraud_test, fraud_pred_rnn))


Epoch 1/10


  super().__init__(**kwargs)


[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8979 - loss: 0.3408 - val_accuracy: 0.9038 - val_loss: 0.3259
Epoch 2/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9044 - loss: 0.3185 - val_accuracy: 0.9038 - val_loss: 0.3171
Epoch 3/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9054 - loss: 0.3148 - val_accuracy: 0.9038 - val_loss: 0.3169
Epoch 4/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9061 - loss: 0.3132 - val_accuracy: 0.9038 - val_loss: 0.3158
Epoch 5/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9055 - loss: 0.3141 - val_accuracy: 0.9038 - val_loss: 0.3188
Epoch 6/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9037 - loss: 0.3172 - val_accuracy: 0.9038 - val_loss: 0.3164
Epoch 7/10
[1m1292/1292[0



[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Fraud Data - RNN Model
              precision    recall  f1-score   support

         0.0       0.91      1.00      0.95     23419
         1.0       0.00      0.00      0.00      2411

    accuracy                           0.91     25830
   macro avg       0.45      0.50      0.48     25830
weighted avg       0.82      0.91      0.86     25830



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Long Short-Term Memory (LSTM)


In [20]:
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Assuming X_fraud and y_fraud are already prepared

# Create LSTM Model
def create_lstm_model(input_shape):
    model = Sequential()
    
    # LSTM layer with 64 units
    model.add(LSTM(units=64, activation='tanh', input_shape=input_shape))
    
    # Dropout to prevent overfitting
    model.add(Dropout(0.5))
    
    # Fully connected layers
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification output
    
    # Compile the model
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Train-test split for fraud data
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

# Reshape data to (samples, timesteps, features) for LSTM input
X_fraud_train_lstm = np.expand_dims(X_fraud_train, axis=1)
X_fraud_test_lstm = np.expand_dims(X_fraud_test, axis=1)

# Build and train the LSTM model for fraud data
lstm_model_fraud = create_lstm_model(X_fraud_train_lstm.shape[1:])
lstm_model_fraud.fit(X_fraud_train_lstm, y_fraud_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the LSTM model for fraud data
fraud_pred_lstm = lstm_model_fraud.predict(X_fraud_test_lstm)
fraud_pred_lstm = np.round(fraud_pred_lstm)

# Print evaluation results
print("Fraud Data - LSTM Model")
print(classification_report(y_fraud_test, fraud_pred_lstm))

# Save the trained model using TensorFlow's save method
lstm_model_fraud.save('../../saved_models/LSTM_fraud_model.h5')


Epoch 1/10


  super().__init__(**kwargs)


[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8921 - loss: 0.3466 - val_accuracy: 0.9038 - val_loss: 0.3240
Epoch 2/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9051 - loss: 0.3165 - val_accuracy: 0.9038 - val_loss: 0.3488
Epoch 3/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9042 - loss: 0.3176 - val_accuracy: 0.9038 - val_loss: 0.3330
Epoch 4/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9039 - loss: 0.3177 - val_accuracy: 0.9038 - val_loss: 0.3216
Epoch 5/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9058 - loss: 0.3137 - val_accuracy: 0.9038 - val_loss: 0.3218
Epoch 6/10
[1m1292/1292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9055 - loss: 0.3138 - val_accuracy: 0.9038 - val_loss: 0.3219
Epoch 7/10
[1m1292/1292[0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
