### Imports

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.impute import SimpleImputer
import joblib



### Load dataset

In [2]:
df = pd.read_csv("synthetic_mobile_money_transaction_dataset.csv")

### Data preprocessing

In [3]:
df.dropna(subset=['isFraud'], inplace=True)
df['transactionType'] = df['transactionType'].astype('category').cat.codes
df['initiator'] = df['initiator'].astype('category').cat.codes
df['recipient'] = df['recipient'].astype('category').cat.codes

for col in ['amount', 'oldBalRecipient', 'newBalRecipient']:
    df[col] = np.log1p(df[col])

features = ["step", "initiator", "recipient", "transactionType", "amount", "oldBalInitiator", "newBalInitiator", "oldBalRecipient", "newBalRecipient"]
target = "isFraud"

X = df[features]
y = df[target]

imputer = SimpleImputer(strategy='median')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

  result = getattr(ufunc, method)(*inputs, **kwargs)


### Convert data to PyTorch tensors

In [4]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

### Define and train the Teacher Model (DNN)

In [5]:
class FraudDetector(nn.Module):
    def __init__(self, input_dim):
        super(FraudDetector, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

input_dim = X_train.shape[1]
teacher_model = FraudDetector(input_dim)

### Define Loss & Optimizer for Teacher

In [None]:
criterion = nn.BCELoss()
optimizer = optim.Adam(teacher_model.parameters(), lr=0.001)

: 

### Train the Teacher Model

In [None]:
num_epochs = 20
for epoch in range(num_epochs):
    teacher_model.train()
    optimizer.zero_grad()
    y_pred = teacher_model(X_train_tensor).squeeze()
    loss = criterion(y_pred, y_train_tensor.squeeze())
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 5 == 0:
        print(f"Teacher Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

### Get Teacher Predictions (Soft Labels)

In [None]:
teacher_model.eval()
with torch.no_grad():
    teacher_train_preds = teacher_model(X_train_tensor).squeeze().numpy()
    teacher_test_preds = teacher_model(X_test_tensor).squeeze().numpy()

### Train the Student Model (Logistic Regression) using Teacher's Soft Labels

In [None]:
student_model = LogisticRegression(max_iter=1000, solver='lbfgs')
student_model.fit(X_train_scaled, teacher_train_preds)

### Evaluate the Student Model

In [None]:
y_pred_student = student_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred_student)
f1 = f1_score(y_test, y_pred_student)
print(f"Student Model (Logistic Regression) Evaluation:")
print(f"Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

### Reinforcement Learning (Optional)

In [None]:
# Use F1 score as a reward to further refine the student model
# For simplicity, we'll use a basic loop to fine-tune the student model
best_f1 = f1
best_student_model = student_model

for _ in range(10):  # 10 iterations of reinforcement
    student_model.fit(X_train_scaled, teacher_train_preds)
    y_pred_student = student_model.predict(X_test_scaled)
    current_f1 = f1_score(y_test, y_pred_student)
    
    if current_f1 > best_f1:
        best_f1 = current_f1
        best_student_model = student_model

print(f"Best Student Model F1 Score after Reinforcement: {best_f1:.4f}")

### Save the Best Student Model

In [None]:
joblib.dump(best_student_model, "best_student_model.joblib")
print("Best Student Model saved as 'best_student_model.joblib'")