In [1]:
import glob
import copy
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.backends.cudnn as cudnn
from torch.utils.checkpoint import checkpoint
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast

import warnings
warnings.filterwarnings("ignore")


# Part 1: Preparing the Dataset

In [2]:
class DepthDataset(Dataset):
    def __init__(self, rgb, depth):
        self.input = rgb
        self.output = depth

    def __len__(self):
        return len(self.input)

    def __getitem__(self, index):
        input_path = self.input[index]
        output_path = self.output[index]

        with Image.open(input_path) as input_image, Image.open(output_path) as output_image:
            rgb = torch.tensor(np.array(input_image) / 255, dtype=torch.float).reshape(3,480,640)
            depth = torch.tensor(np.array(output_image), dtype=torch.float)

        return rgb, depth


In [3]:
rgb = glob.glob('./sync/**/rgb*', recursive = True)
depth = glob.glob('./sync/**/sync*', recursive = True)
rgb.sort()
depth.sort()

In [4]:
X_train, X_test, y_train, y_test  = train_test_split(rgb, depth, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)

# Part 2: Creating the Model

In [5]:
# Enable cudnn benchmark mode
cudnn.benchmark = True

class ConvNeuralNet(nn.Module):
    def __init__(self):
        super(ConvNeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=1, kernel_size=3, stride=1, padding=1)

        # Initialize the weights of conv layers
        init.xavier_uniform_(self.conv1.weight.data)
        init.xavier_uniform_(self.conv2.weight.data)
        init.xavier_uniform_(self.conv3.weight.data)
        init.xavier_uniform_(self.conv4.weight.data)

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.maxpool(out)
        out = self.conv3(out)
        out = self.relu3(out)
        out = self.upsample(out)
        out = self.conv4(out)
        return out



In [6]:
learning_rate = 0.0005
num_epochs = 5
batch_size = 8
wd = 0.001

params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 0}
train_loader = DataLoader(DepthDataset(X_train, y_train), **params)
val_loader = DataLoader(DepthDataset(X_val, y_val), **params)
test_loader = DataLoader(DepthDataset(X_test, y_test), **params)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ConvNeuralNet().to(device)

criterion = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=wd)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.01, patience=3)

print(device)

cuda


# Running the Model

In [7]:
# We use the pre-defined number of epochs to determine how many iterations to train the network on

scaler = torch.cuda.amp.GradScaler()

# Initialize early stopping variables
best_val_loss = float('inf')
patience = 3  # Number of epochs to wait for improvement
threshold = 1000
counter = 0  # Counter to track the number of epochs without improvement
best_weights = None
history = []

for epoch in range(1,num_epochs+1):
    running_loss = 0.0

    # Training
    for i, (local_batch, local_labels) in enumerate(train_loader, 1):
        # Move tensors to the configured device
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        optimizer.zero_grad()
        # Forward pass
        with autocast():
            outputs = model(local_batch)
            loss = criterion(outputs, local_labels)

        # Backward and optimize
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        # Free up GPU Memory cache
        torch.cuda.empty_cache()

    train_loss = running_loss / len(train_loader)
    print('Epoch [{}/{}], Training Loss: {:.4f}'.format(epoch, num_epochs, train_loss))

    # Validation
    with torch.no_grad():
        loss_sum = 0
        correct = 0
        
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss_sum += criterion(outputs, labels).item()
        
        val_loss = loss_sum / len(val_loader)
        print('Epoch [{}/{}], Validation Loss: {:.4f}'.format(epoch, num_epochs, val_loss))
        history.append(("Epoch " + str(epoch), val_loss))
        
        # Early stopping
        if val_loss < best_val_loss - threshold:
            print("New best validation loss at epoch", epoch)
            best_val_loss = val_loss
            counter = 0  # Reset the counter when there is improvement
            best_weights = copy.deepcopy(model.state_dict())
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping at epoch", epoch)
                break
        scheduler.step(val_loss)

Epoch [1/10], Training Loss: 2897115.8999
New best validation loss at epoch 1
Epoch [1/10], Validation Loss: 2792465.7228
Accuracy on the validation set: 32.533%
Epoch [2/10], Training Loss: 2734593.1695
New best validation loss at epoch 2
Epoch [2/10], Validation Loss: 2754384.7781
Accuracy on the validation set: 32.533%
Epoch [3/10], Training Loss: 2678832.0774
New best validation loss at epoch 3
Epoch [3/10], Validation Loss: 2718544.9523
Accuracy on the validation set: 32.533%
Epoch [4/10], Training Loss: 2643485.5892
New best validation loss at epoch 4
Epoch [4/10], Validation Loss: 2671863.4641
Accuracy on the validation set: 32.533%
Epoch [5/10], Training Loss: 2622481.3601
New best validation loss at epoch 5
Epoch [5/10], Validation Loss: 2634731.2530
Accuracy on the validation set: 32.533%
Epoch [6/10], Training Loss: 2603454.5298
New best validation loss at epoch 6
Epoch [6/10], Validation Loss: 2621532.0092
Accuracy on the validation set: 32.533%
Epoch [7/10], Training Loss:

In [9]:
torch.save(best_weights, './model.pth')

[('Epoch 1', 2792465.722802198), ('Epoch 2', 2754384.7780906595), ('Epoch 3', 2718544.9522664836), ('Epoch 4', 2671863.4641483515), ('Epoch 5', 2634731.253021978), ('Epoch 6', 2621532.0092032966), ('Epoch 7', 2646670.7837912086), ('Epoch 8', 2601600.3776785713), ('Epoch 9', 2602133.744024725), ('Epoch 10', 2626383.282554945)]


In [10]:
model.load_state_dict(best_weights)

with torch.no_grad():
    mse = 0.0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        mse += criterion(outputs, labels).item() * images.size(0)
    
    mse /= len(X_test)
    print('Mean Squared Error on the test set: {:.3f}'.format(mse))


Mean Squared Error on the test set: 2547449.192
