In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [4]:
%run -i functions.ipynb

In [2]:
# Load the data
data = pd.read_csv('./data/train.csv')  # Assume data is saved to CSV
print(len(data))
# Separate features and target
X = data.drop('loan_status', axis=1)
y = data['loan_status']

58645


In [5]:
# Preprocess the data
preprocessor = LoanPreprocessor()
X_processed = preprocessor.fit_transform(X)

In [8]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42
)
print(len(X_train))

46916


In [9]:
# Create PyTorch datasets
train_dataset = LoanDataset(X_train, y_train.values)
test_dataset = LoanDataset(X_test, y_test.values)

In [11]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [12]:
# Set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
# Initialize the model
model = LoanNeuralNetwork(input_size=X_train.shape[1]).to(device)

In [15]:
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [16]:
# Train the model
train_model(model, train_loader, criterion, optimizer, device)

Epoch [1/50], Loss: 0.2478
Epoch [11/50], Loss: 0.1859
Epoch [21/50], Loss: 0.1814
Epoch [31/50], Loss: 0.1813
Epoch [41/50], Loss: 0.1786


In [18]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = model(features)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on test set: {100 * correct / total:.2f}%')

Accuracy on test set: 95.04%


In [17]:
# def main():
#     # Load the data
#     data = pd.read_csv('./data/train.csv')  # Assume data is saved to CSV
#     print(len(data))
#     # Separate features and target
#     X = data.drop('loan_status', axis=1)
#     y = data['loan_status']
    
    # # Preprocess the data
    # preprocessor = LoanPreprocessor()
    # X_processed = preprocessor.fit_transform(X)
    
    # # Split the data
    # X_train, X_test, y_train, y_test = train_test_split(
    #     X_processed, y, test_size=0.2, random_state=42
    # )
    # print(len(X_train))
    # # Create PyTorch datasets
    # train_dataset = LoanDataset(X_train, y_train.values)
    # test_dataset = LoanDataset(X_test, y_test.values)
    
    # # Create data loaders
    # train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    # test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # # Set up device
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # # Initialize the model
    # model = LoanNeuralNetwork(input_size=X_train.shape[1]).to(device)
    
    # # Loss and optimizer
    # criterion = nn.BCELoss()
    # optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # # Train the model
    # train_model(model, train_loader, criterion, optimizer, device)
    
    # # Evaluate the model
    # model.eval()
    # correct = 0
    # total = 0
    # with torch.no_grad():
    #     for features, labels in test_loader:
    #         features, labels = features.to(device), labels.to(device)
    #         outputs = model(features)
    #         predicted = (outputs.squeeze() > 0.5).float()
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    
    # print(f'Accuracy on test set: {100 * correct / total:.2f}%')
# if __name__ == '__main__':
#     main()