### Imports

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer
import joblib
from sklearn.base import BaseEstimator, ClassifierMixin



### Load dataset

In [2]:
df = pd.read_csv("synthetic_mobile_money_transaction_dataset.csv")

### Data preprocessing

In [3]:
df.dropna(subset=['isFraud'], inplace=True)
df['transactionType'] = df['transactionType'].astype('category').cat.codes
df['initiator'] = df['initiator'].astype('category').cat.codes
df['recipient'] = df['recipient'].astype('category').cat.codes

for col in ['amount', 'oldBalRecipient', 'newBalRecipient']:
    df[col] = np.log1p(df[col])

features = ["step", "initiator", "recipient", "transactionType", "amount", "oldBalInitiator", "newBalInitiator", "oldBalRecipient", "newBalRecipient"]
target = "isFraud"

X = df[features]
y = df[target]

imputer = SimpleImputer(strategy='median')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

  result = getattr(ufunc, method)(*inputs, **kwargs)


### Define DNN Model

In [4]:
class FraudDetector(nn.Module):
    def __init__(self, input_dim):
        super(FraudDetector, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

### Initialize models

In [5]:
input_dim = X_train.shape[1]
dnn_model = FraudDetector(input_dim)

### Define Loss & Optimizer for DNN

In [6]:
criterion_dnn = nn.BCELoss()
optimizer_dnn = optim.Adam(dnn_model.parameters(), lr=0.001)

### Train DNN Model

In [7]:
num_epochs = 20
for epoch in range(num_epochs):
    dnn_model.train()
    optimizer_dnn.zero_grad()
    y_pred = dnn_model(X_train_tensor).squeeze()
    loss = criterion_dnn(y_pred, y_train_tensor.squeeze())
    loss.backward()
    optimizer_dnn.step()
    if (epoch+1) % 5 == 0:
        print(f"DNN Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

DNN Epoch [5/20], Loss: 0.6668
DNN Epoch [10/20], Loss: 0.6169
DNN Epoch [15/20], Loss: 0.5735
DNN Epoch [20/20], Loss: 0.5336


### Evaluate models

In [8]:
def evaluate_model(model, X_test_tensor, y_test_tensor, is_dnn=False):
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_tensor)
        if is_dnn:
            predictions = (outputs.squeeze() > 0.5).float().cpu().numpy()
        else:
            predictions = torch.argmax(outputs, dim=1).cpu().numpy()
        labels = y_test_tensor.cpu().numpy()
    
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions)
    recall = recall_score(labels, predictions)
    f1 = f1_score(labels, predictions)
    
    return accuracy, precision, recall, f1

### Evaluate DNN

In [9]:
dnn_metrics = evaluate_model(dnn_model, X_test_tensor, y_test_tensor, is_dnn=True)
print(f"DNN Model Performance:\n Accuracy: {dnn_metrics[0]:.4f}, Precision: {dnn_metrics[1]:.4f}, Recall: {dnn_metrics[2]:.4f}, F1-score: {dnn_metrics[3]:.4f}")

DNN Model Performance:
 Accuracy: 0.8317, Precision: 0.2946, Recall: 0.4680, F1-score: 0.3616


### Save the model

In [10]:
class PyTorchModelWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        pass  # No fitting needed, as the model is pre-trained

    def predict(self, X):
        with torch.no_grad():
            X_tensor = torch.tensor(X, dtype=torch.float32)
            outputs = self.model(X_tensor)
            return (outputs > 0.5).numpy()

joblib.dump(PyTorchModelWrapper(dnn_model), "DNN.joblib")

['DNN.joblib']