In [2]:
# Import necessary PyTorch libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import mlflow
from sklearn.metrics import accuracy_score

In [3]:
# Load the cleaned datasets (from Task 1)
fraud_data = pd.read_csv('C:/Users/Administrator/Documents/kifiya/Week_8/clean_data/merged_data.csv')
creditcard_data = pd.read_csv('C:/Users/Administrator/Documents/kifiya/Week_8/clean_data/Preprocessed_Creditcard_Data.csv')

# dropping unessasary columns for training
fraud_data = fraud_data.drop(columns=['signup_time', 'purchase_time','user_id','device_id',
                                      'ip_address', 'lower_bound_ip_address', 'upper_bound_ip_address'], errors='ignore')

display(fraud_data.head())

Unnamed: 0,purchase_value,sex,age,class,transaction_count,hour_of_day,day_of_week,purchase_value_scaled,source_Direct,source_SEO,...,country_United States,country_Uruguay,country_Uzbekistan,country_Vanuatu,country_Venezuela,country_Viet Nam,country_Virgin Islands (U.S.),country_Yemen,country_Zambia,country_Zimbabwe
0,47.0,0,30.0,0.0,1,3,6,0.549607,False,True,...,False,False,False,False,False,False,False,False,False,False
1,15.0,0,34.0,0.0,1,20,2,-1.197335,False,True,...,False,False,False,False,False,False,False,False,False,False
2,44.0,1,29.0,0.0,1,23,5,0.385831,False,False,...,False,False,False,False,False,False,False,False,False,False
3,55.0,0,30.0,0.0,1,16,5,0.986342,True,False,...,False,False,False,False,False,False,False,False,False,False
4,51.0,0,37.0,0.0,1,4,1,0.767974,False,True,...,False,False,False,False,False,False,False,False,False,False


In [4]:
# Separate features and targets for Fraud_Data
X_fraud = fraud_data.drop(columns=['class'])  # Feature set
y_fraud = fraud_data['class']  # Target

# Separate features and targets for CreditCard Data
X_credit = creditcard_data.drop(columns=['Class'])  # Feature set
y_credit = creditcard_data['Class']  # Target

# Train-Test Split for both datasets
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Normalize the data (Standard Scaling)
scaler = StandardScaler()
X_fraud_train = scaler.fit_transform(X_fraud_train)
X_fraud_test = scaler.transform(X_fraud_test)
X_credit_train = scaler.fit_transform(X_credit_train)
X_credit_test = scaler.transform(X_credit_test)

# Convert the datasets into PyTorch tensors
X_fraud_train_tensor = torch.tensor(X_fraud_train, dtype=torch.float32)
y_fraud_train_tensor = torch.tensor(y_fraud_train.values, dtype=torch.float32)
X_fraud_test_tensor = torch.tensor(X_fraud_test, dtype=torch.float32)
y_fraud_test_tensor = torch.tensor(y_fraud_test.values, dtype=torch.float32)

X_credit_train_tensor = torch.tensor(X_credit_train, dtype=torch.float32)
y_credit_train_tensor = torch.tensor(y_credit_train.values, dtype=torch.float32)
X_credit_test_tensor = torch.tensor(X_credit_test, dtype=torch.float32)
y_credit_test_tensor = torch.tensor(y_credit_test.values, dtype=torch.float32)

# DataLoader for batching
batch_size = 8
train_loader_fraud = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_fraud_train_tensor, y_fraud_train_tensor), batch_size=batch_size, shuffle=True)
test_loader_fraud = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_fraud_test_tensor, y_fraud_test_tensor), batch_size=batch_size)

train_loader_credit = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_credit_train_tensor, y_credit_train_tensor), batch_size=batch_size, shuffle=True)
test_loader_credit = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_credit_test_tensor, y_credit_test_tensor), batch_size=batch_size)


## 2. Model Selection - Using PyTorch

#### function to train models

In [5]:
def train_model(model, train_loader, optimizer, criterion, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()  # Clear gradients
            y_pred = model(X_batch).squeeze()  # Forward pass
            loss = criterion(y_pred, y_batch)  # Compute loss
            loss.backward()  # Backward pass (gradient computation)
            optimizer.step()  # Update weights
            total_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')

#### Multi-Layer Perceptron (MLP)

In [6]:
class MLPModel(nn.Module):
    def __init__(self, input_size):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

# Initialize MLP models for both fraud and credit datasets
mlp_model_fraud = MLPModel(X_fraud_train.shape[1])
mlp_model_credit = MLPModel(X_credit_train.shape[1])

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer_fraud = torch.optim.Adam(mlp_model_fraud.parameters(), lr=0.001)
optimizer_credit = torch.optim.Adam(mlp_model_credit.parameters(), lr=0.001)

In [7]:
# Train the MLP model on Fraud Data
train_model(mlp_model_fraud, train_loader_fraud, optimizer_fraud, criterion)

Epoch [1/10], Loss: 0.3175
Epoch [2/10], Loss: 0.3131
Epoch [3/10], Loss: 0.3109
Epoch [4/10], Loss: 0.3083
Epoch [5/10], Loss: 0.3055
Epoch [6/10], Loss: 0.3026
Epoch [7/10], Loss: 0.2995
Epoch [8/10], Loss: 0.2964
Epoch [9/10], Loss: 0.2935
Epoch [10/10], Loss: 0.2905


In [8]:
# Train the MLP model on Credit Card Data
train_model(mlp_model_credit, train_loader_credit, optimizer_credit, criterion)

Epoch [1/10], Loss: 0.0054
Epoch [2/10], Loss: 0.0032
Epoch [3/10], Loss: 0.0030
Epoch [4/10], Loss: 0.0027
Epoch [5/10], Loss: 0.0028
Epoch [6/10], Loss: 0.0026
Epoch [7/10], Loss: 0.0026
Epoch [8/10], Loss: 0.0024
Epoch [9/10], Loss: 0.0024
Epoch [10/10], Loss: 0.0024


#### Convolutional Neural Network (CNN)

In [9]:
class CNNModel(nn.Module):
    def __init__(self, input_size):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(1, 64, kernel_size=3)
        self.fc1 = nn.Linear(64 * (input_size - 2), 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension for Conv1D
        x = torch.relu(self.conv1(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Initialize CNN models
cnn_model_fraud = CNNModel(X_fraud_train.shape[1])
cnn_model_credit = CNNModel(X_credit_train.shape[1])

# Optimizers for CNN models
optimizer_cnn_fraud = torch.optim.Adam(cnn_model_fraud.parameters(), lr=0.001)
optimizer_cnn_credit = torch.optim.Adam(cnn_model_credit.parameters(), lr=0.001)

In [None]:
# Train the CNN model on Fraud Data
train_model(cnn_model_fraud, train_loader_fraud, optimizer_cnn_fraud, criterion)

Epoch [1/10], Loss: 0.3178
Epoch [2/10], Loss: 0.3145


In [None]:
# Train the CNN model on Credit Card Data
train_model(cnn_model_credit, train_loader_credit, optimizer_cnn_credit, criterion)

#### Recurrent Neural Network (RNN)

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, 64, batch_first=True)
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x, _ = self.rnn(x)
        x = x[:, -1, :]  # Use the last output of the sequence
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Reshape fraud data for RNN input
X_fraud_train_rnn = X_fraud_train_tensor.unsqueeze(-1)
X_fraud_test_rnn = X_fraud_test_tensor.unsqueeze(-1)

X_credit_train_rnn = X_credit_train_tensor.unsqueeze(-1)
X_credit_test_rnn = X_credit_test_tensor.unsqueeze(-1)

# Initialize and train RNN model
rnn_model_fraud = RNNModel(X_fraud_train_rnn.shape[2])
rnn_model_credit= RNNModel(X_credit_train_rnn.shape[2])

In [None]:
# Train the RNN model on fraud Data
train_model(rnn_model_fraud, torch.utils.data.DataLoader(TensorDataset(X_fraud_train_rnn, y_fraud_train_tensor), batch_size=batch_size), optimizer_fraud, criterion)

In [None]:
# Train the RNN model on Credit Card Data
train_model(rnn_model_credit, torch.utils.data.DataLoader(TensorDataset(X_credit_train_rnn, y_credit_train_tensor), batch_size=batch_size), optimizer_fraud, criterion)

#### Long short-term memory Model

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, 64, batch_first=True)
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Use the last output of the sequence
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Initialize and train LSTM model
lstm_model_fraud = LSTMModel(X_fraud_train_rnn.shape[2])
lstm_model_credit = LSTMModel(X_credit_train_rnn.shape[2])


In [None]:
# # Train the LSTM model on fraud data
train_model(lstm_model_fraud, torch.utils.data.DataLoader(TensorDataset(X_fraud_train_rnn, y_fraud_train_tensor), batch_size=batch_size), optimizer_fraud, criterion)

In [None]:
# Train the LSTM model on Credit Card Data
train_model(lstm_model_credit, torch.utils.data.DataLoader(TensorDataset(X_credit_train_rnn, y_credit_train_tensor), batch_size=batch_size), optimizer_fraud, criterion)

## 3. model evaluation

In [None]:


def evaluate_model(model, test_loader):
    model.eval()  # Set to evaluation mode
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch).squeeze()
            preds = (y_pred > 0.5).float()  # Convert probabilities to 0/1
            all_preds.extend(preds.numpy())
            all_labels.extend(y_batch.numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    print(f'Accuracy: {accuracy:.4f}')

# Evaluate models
evaluate_model(mlp_model_fraud, test_loader_fraud)
evaluate_model(mlp_model_credit, test_loader_credit)
evaluate_model(cnn_model_fraud, test_loader_fraud)
evaluate_model(cnn_model_credit, test_loader_credit)
evaluate_model(rnn_model_fraud, torch.utils.data.DataLoader(TensorDataset(X_fraud_test_rnn, y_fraud_test_tensor), batch_size=batch_size))
evaluate_model(rnn_model_credit, torch.utils.data.DataLoader(TensorDataset(X_credit_test_rnn, y_credit_test_tensor), batch_size=batch_size))
evaluate_model(lstm_model_fraud, torch.utils.data.DataLoader(TensorDataset(X_fraud_test_rnn, y_fraud_test_tensor), batch_size=batch_size))
evaluate_model(lstm_model_credit, torch.utils.data.DataLoader(TensorDataset(X_credit_test_rnn, y_credit_test_tensor), batch_size=batch_size))


## 4. MLOps with MLflow

In [None]:
mlflow.start_run()
# Train and log models
mlflow.pytorch.log_model(mlp_model_fraud, "MLP_Fraud")
mlflow.pytorch.log_model(cnn_model_fraud, "CNN_Fraud")
mlflow.pytorch.log_model(rnn_model_fraud, "RNN_Fraud")
mlflow.pytorch.log_model(lstm_model_fraud, "LSTM_Fraud")

mlflow.pytorch.log_model(mlp_model_credit, "MLP_credit")
mlflow.pytorch.log_model(cnn_model_credit, "CNN_credit")
mlflow.pytorch.log_model(rnn_model_credit, "RNN_credit")
mlflow.pytorch.log_model(lstm_model_credit, "LSTM_credit")

mlflow.end_run()
