In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

class LoanDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

class LoanPreprocessor:
    def __init__(self):
        # Categorical features for one-hot encoding
        self.categorical_features = [
            'person_home_ownership', 
            'loan_intent', 
            'loan_grade', 
            'cb_person_default_on_file'
        ]
        
        # Numerical features for scaling
        self.numerical_features = [
            'person_age', 
            'person_income', 
            'person_emp_length', 
            'loan_amnt', 
            'loan_int_rate', 
            'loan_percent_income', 
            'cb_person_cred_hist_length'
        ]
        
        # Preprocessors
        self.preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), self.numerical_features),
                ('cat', OneHotEncoder(handle_unknown='ignore'), self.categorical_features)
            ])
    
    def fit_transform(self, X, y=None):
        # Fit and transform the data
        X_processed = self.preprocessor.fit_transform(X)
        return X_processed
    
    def transform(self, X):
        # Transform new data
        X_processed = self.preprocessor.transform(X)
        return X_processed

class LoanNeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(LoanNeuralNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.network(x)

def train_model(model, train_loader, criterion, optimizer, device, epochs=50):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(features)
            loss = criterion(outputs.squeeze(), labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        if epoch % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}')

def main():
    # Load the data
    data = pd.read_csv('./data/train.csv')  # Assume data is saved to CSV
    print(len(data))
    # Separate features and target
    X = data.drop('loan_status', axis=1)
    y = data['loan_status']
    
    # Preprocess the data
    preprocessor = LoanPreprocessor()
    X_processed = preprocessor.fit_transform(X)
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X_processed, y, test_size=0.2, random_state=42
    )
    print(len(X_train))
    # Create PyTorch datasets
    train_dataset = LoanDataset(X_train, y_train.values)
    test_dataset = LoanDataset(X_test, y_test.values)
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # # Set up device
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # # Initialize the model
    # model = LoanNeuralNetwork(input_size=X_train.shape[1]).to(device)
    
    # # Loss and optimizer
    # criterion = nn.BCELoss()
    # optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # # Train the model
    # train_model(model, train_loader, criterion, optimizer, device)
    
    # # Evaluate the model
    # model.eval()
    # correct = 0
    # total = 0
    # with torch.no_grad():
    #     for features, labels in test_loader:
    #         features, labels = features.to(device), labels.to(device)
    #         outputs = model(features)
    #         predicted = (outputs.squeeze() > 0.5).float()
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    
    # print(f'Accuracy on test set: {100 * correct / total:.2f}%')

if __name__ == '__main__':
    main()

58645
46916
