In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import optuna

In [2]:
class LoanDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [3]:
class LoanPreprocessor:
    def __init__(self):
        self.categorical_features = [
            'person_home_ownership', 
            'loan_intent', 
            'loan_grade', 
            'cb_person_default_on_file'
        ]
        
        self.numerical_features = [
            'person_age', 
            'person_income', 
            'person_emp_length', 
            'loan_amnt', 
            'loan_int_rate', 
            'loan_percent_income', 
            'cb_person_cred_hist_length'
        ]
        
        self.preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), self.numerical_features),
                ('cat', OneHotEncoder(handle_unknown='ignore'), self.categorical_features)
            ])
    
    def fit_transform(self, X, y=None):
        return self.preprocessor.fit_transform(X)
    
    def transform(self, X):
        return self.preprocessor.transform(X)

In [4]:
class LoanNeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, dropout_rate):
        super(LoanNeuralNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, hidden_size1),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size1, hidden_size2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size2, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.network(x)

In [9]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score:  # Assuming lower score is better (e.g., validation loss)
            self.best_score = score
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping triggered.")

In [6]:
def validate_model(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            loss = criterion(outputs.view(-1), labels)
            total_loss += loss.item()
    return total_loss / len(val_loader)

In [7]:
def train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=50, patience=5):
    model.train()
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    best_val_loss = float('inf')  # Initialize best validation loss
    best_model_weights = None  # To store the best model weights

    for epoch in range(epochs):
        total_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs.view(-1), labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_loss = total_loss / len(train_loader)
        val_loss = validate_model(model, val_loader, criterion, device)
        print(f'Epoch [{epoch+1}/{epochs}], Training Loss: {avg_loss:.4f}, Validation Loss: {val_loss:.4f}')

        # Check early stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Training stopped early.")
            break

        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_weights = model.state_dict()  # Save the model weights
            print("Best model weights saved.")

    return best_model_weights  # Return the best model weights

In [8]:
def objective(trial):
    hidden_size1 = trial.suggest_int('hidden_size1', 32, 128)
    hidden_size2 = trial.suggest_int('hidden_size2', 16, 64)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 64)

    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    model = LoanNeuralNetwork(input_size=X_train.shape[1], 
                               hidden_size1=hidden_size1, 
                               hidden_size2=hidden_size2, 
                               dropout_rate=dropout_rate).to(device)

    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model and get the best model weights
    best_model_weights = train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=50)

    # Evaluate the model
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            predicted = (outputs.view(-1) > 0.5).float()  # Ensure outputs are flattened
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total

    # Save the best model weights to a file
    model_save_path = f"best_model_trial_{trial.number}.pth"
    torch.save(best_model_weights, model_save_path)
    print(f"Best model weights saved to {model_save_path}")

    return accuracy