In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

# Load the CSV file
file_path = "combined_customers_balanced_large.csv"
data = pd.read_csv(file_path)

# Remove the "Total" row
data = data[data['Day'] != 'Total']

# Define input features (X) and output target (Y)
X = data[['NumDeliveries', 'TotalDistance']].values  # Add more features if available
Y = data['TotalDistance'].values  # Example: Predicting TotalDistance

In [2]:
# Split into train and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
Y_train = torch.tensor(Y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
Y_val = torch.tensor(Y_val, dtype=torch.float32)

# Define Neural Network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Hyperparameter tuning: hidden sizes and random seeds
hidden_sizes = list(range(200, 301 , 10))  # Range of hidden layer sizes to test
seed_range = range(1, 50)  # Range of random seeds to test

val_mses = []  # To store validation MSE for each configuration
for seed in seed_range:
    for hidden_size in hidden_sizes:
        # Set random seeds for reproducibility
        torch.manual_seed(seed)
        np.random.seed(seed)
        
        # Initialize model
        input_size = X_train.shape[1]  # Number of features
        model = NeuralNet(input_size, hidden_size)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Early stopping
        best_val_loss = float('inf')
        patience = 10
        early_stop_counter = 0

        # Training loop
        for epoch in range(2000):  # Train for a maximum of 2000 epochs
            # Forward propagation
            outputs = model(X_train)
            loss = criterion(outputs.squeeze(), Y_train)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Validation loss
            with torch.no_grad():
                val_outputs = model(X_val)
                val_loss = criterion(val_outputs.squeeze(), Y_val)

            # Early stopping logic
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                early_stop_counter = 0
            else:
                early_stop_counter += 1
                if early_stop_counter >= patience:  # Stop if no improvement
                    break

        # Log the validation MSE for this configuration
        val_mses.append((seed, hidden_size, best_val_loss.item()))

# Find the best configuration
best_seed, best_hidden_size, best_val_mse = min(val_mses, key=lambda x: x[2])
print(f"Optimal Random Seed: {best_seed}, Optimal Hidden Layer Size: {best_hidden_size}, Validation MSE: {best_val_mse}")

Optimal Random Seed: 49, Optimal Hidden Layer Size: 290, Validation MSE: 0.00015556269499938935


In [3]:
# Split into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
Y_train = torch.tensor(Y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
Y_val = torch.tensor(Y_val, dtype=torch.float32)

# Main Neural Network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Random seeding
torch.manual_seed(49)
np.random.seed(49)

# Initialize model, loss, optimizer
input_size = X_train.shape[1]
hidden_size = 290  # Adjust as needed
model = NeuralNet(input_size, hidden_size)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Epoch tuning variables
best_val_loss = float('inf')
optimal_epoch = None

# Training loop with epoch tuning
for epoch in range(4000):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs.squeeze(), Y_train)

    # Backpropagation and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Validation
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = mean_squared_error(val_outputs.squeeze().numpy(), Y_val.numpy())

    # Update best validation loss and epoch
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        optimal_epoch = epoch + 1

    # Print progress every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/4000], Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')

# Final results
print(f'Optimal Epoch: {optimal_epoch}, Validation MSE: {best_val_loss:.8f}')

Epoch [100/4000], Loss: 0.3887, Validation Loss: 0.3560
Epoch [200/4000], Loss: 0.1056, Validation Loss: 0.1177
Epoch [300/4000], Loss: 0.0372, Validation Loss: 0.0490
Epoch [400/4000], Loss: 0.0154, Validation Loss: 0.0216
Epoch [500/4000], Loss: 0.0075, Validation Loss: 0.0090
Epoch [600/4000], Loss: 0.0041, Validation Loss: 0.0043
Epoch [700/4000], Loss: 0.0024, Validation Loss: 0.0022
Epoch [800/4000], Loss: 0.0014, Validation Loss: 0.0012
Epoch [900/4000], Loss: 0.0009, Validation Loss: 0.0007
Epoch [1000/4000], Loss: 0.0006, Validation Loss: 0.0005
Epoch [1100/4000], Loss: 0.0004, Validation Loss: 0.0003
Epoch [1200/4000], Loss: 0.0003, Validation Loss: 0.0002
Epoch [1300/4000], Loss: 0.0002, Validation Loss: 0.0002
Epoch [1400/4000], Loss: 0.0002, Validation Loss: 0.0002
Epoch [1500/4000], Loss: 0.0002, Validation Loss: 0.0002
Epoch [1600/4000], Loss: 0.0001, Validation Loss: 0.0002
Epoch [1700/4000], Loss: 0.0001, Validation Loss: 0.0002
Epoch [1800/4000], Loss: 0.0001, Validat