## 5305-Final Project -Model

### This files executes second

In [2]:
import torch 
import pandas as pd
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import seaborn as sns 
import matplotlib.pyplot as plt
import torch.optim as optim

## Defining a custom `InsuranceDataset` class that inherits the `Dataset` class.

In [3]:
class InsuranceDataset(Dataset):
    def __init__(self):
        self.data = pd.read_csv('./insurance_data_imputed.csv')
        self.X = self.data.drop('premium_amount', axis=1)
        self.y = self.data['premium_amount']

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.X.iloc[idx].values, dtype=torch.float32), torch.tensor(self.y.iloc[idx], dtype=torch.float32)

dataset = InsuranceDataset()

In [4]:
dataset.X.columns


Index(['age', 'gender', 'annual_income', 'marital_status',
       'number_of_dependents', 'education_level', 'health_score', 'location',
       'policy_type', 'previous_claims', 'credit_score', 'insurance_duration',
       'smoking_status', 'exercise_frequency', 'occupation_employed',
       'occupation_self_employed', 'occupation_unemployed'],
      dtype='object')

In [None]:
# Feedforward Neural Network with 2 hidden layers, with 64 and 32 neurons respectively

class FeedForwardNN(nn.Module):
    def __init__(self):
        super(FeedForwardNN, self).__init__()
        self.fc1 = nn.Linear(17, 32)
        self.fc2 = nn.Linear(32, 16)
        self.output = nn.Linear(16, 1)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5) #Dropout rate

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)

        return self.output(x)

model = FeedForwardNN()

In [40]:
# training, testing and validation data
train_size = int(0.7 * len(dataset))
test_size = int(0.15 * len(dataset))
val_size = len(dataset) - train_size - test_size

train_dataset, test_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [41]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
criteria = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training the model
num_epochs = 100

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.4, patience=4, verbose=True)

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.float()  # Convert inputs to float

        # Forward pass
        outputs = model(inputs)
        loss = criteria(outputs.squeeze(), labels.float())  # Convert labels to float

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    # Calculate validation loss
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs.float())
            val_loss += criteria(outputs.squeeze(), labels.float()).item()
    val_loss /= len(val_loader)

    # Step the scheduler
    scheduler.step(val_loss)

Epoch [1/100], Step [100/4091], Loss: 617.9430
Epoch [1/100], Step [200/4091], Loss: 800.2217
Epoch [1/100], Step [300/4091], Loss: 672.3403
Epoch [1/100], Step [400/4091], Loss: 815.2747
Epoch [1/100], Step [500/4091], Loss: 600.0973
Epoch [1/100], Step [600/4091], Loss: 589.1080
Epoch [1/100], Step [700/4091], Loss: 541.2977
Epoch [1/100], Step [800/4091], Loss: 565.2228
Epoch [1/100], Step [900/4091], Loss: 670.4905
Epoch [1/100], Step [1000/4091], Loss: 735.6033
Epoch [1/100], Step [1100/4091], Loss: 775.3809
Epoch [1/100], Step [1200/4091], Loss: 867.5320
Epoch [1/100], Step [1300/4091], Loss: 920.0582
Epoch [1/100], Step [1400/4091], Loss: 647.6493
Epoch [1/100], Step [1500/4091], Loss: 583.6855
Epoch [1/100], Step [1600/4091], Loss: 733.9309
Epoch [1/100], Step [1700/4091], Loss: 862.0131
Epoch [1/100], Step [1800/4091], Loss: 590.0995
Epoch [1/100], Step [1900/4091], Loss: 613.2690
Epoch [1/100], Step [2000/4091], Loss: 477.7712
Epoch [1/100], Step [2100/4091], Loss: 666.1180
E

In [46]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# Function to evaluate metrics
def evaluate_metrics(model, data_loader):
    model.eval()  # Set model to evaluation mode
    all_predictions = []
    all_labels = []

    with torch.no_grad():  # Disable gradient computation
        for inputs, labels in data_loader:
            # Forward pass
            outputs = model(inputs.float()).squeeze()  # Model predictions
            all_predictions.extend(outputs.numpy())  # Convert to NumPy
            all_labels.extend(labels.float().numpy())  # Convert to NumPy

    # Convert to NumPy arrays
    y_true = torch.tensor(all_labels).numpy()
    y_pred = torch.tensor(all_predictions).numpy()

    # Calculate metrics
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2

# Example: Evaluate on test set
mse, mae, r2 = evaluate_metrics(model, test_loader)
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R²): {r2:.4f}")

Mean Squared Error (MSE): 907331.1875
Mean Absolute Error (MAE): 659.8949
R-squared (R²): -0.1118
