In [1]:
import glob
import os
import shutil
import torch
import csv
import random

import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor
from torch import nn

from sklearn.model_selection import train_test_split
import albumentations as A
import cv2
import matplotlib.pyplot as plt

RANDOM_SEED = 42
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using:", device)


Using: cuda


  check_for_updates()


In [2]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m122.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.18.0


In [None]:
# working on Google Colab
import zipfile
import os

zip_path = '/content/dataset.zip'

extract_dir = '/'

os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f'Files extracted to: {extract_dir}')


Files extracted to: /


In [None]:
# the dataset consisted of:
#   - original images from the dataset (removed the ones with negative forward signal)
#   - images with horizontal flip + changed left signal
paths = glob.glob("/dataset/*csv")

im_list = []
for path in glob.glob("/dataset/*csv"):
    target_df = pd.read_csv(path, header=None)
    im_dir = path.removesuffix('.csv')
    for i in range(len(target_df.index)):
        row = target_df.iloc[i].to_list()
        im_path = f"{im_dir}/{int(row[0]):04d}.jpg"
        im_list.append((im_path, row[1:]))

print(len(im_list))


10912


In [11]:
class TrackDataset(Dataset):
    def __init__(self, dataset_dir="dataset", split='train', val_size=0.2):
        self.dataset_dir = dataset_dir
        self.split = split
        self.val_size = val_size

        self.image_list = []

        for path in glob.glob(f"/{self.dataset_dir}/*.csv"):
            target_df = pd.read_csv(path, header=None)
            im_dir = path.removesuffix('.csv')
            for i in range(len(target_df.index)):
                row = target_df.iloc[i].to_list()
                im_path = f"{im_dir}/{int(row[0]):04d}.jpg"
                self.image_list.append((im_path, row[1:]))

        train_files, val_files = train_test_split(self.image_list, test_size=self.val_size, random_state=RANDOM_SEED)

        if self.split == 'train':
            self.image_list = train_files
        elif self.split == 'val':
            self.image_list = val_files
        else:
            raise ValueError("split must be either 'train' or 'val'")

        self.im_transform = A.Compose([
            # slight rotation
            A.Rotate(limit=(-5,5), p =0.3),
            # brightness, contrast set slightly higher than default
            A.RandomBrightnessContrast(brightness_limit=[-0.3,0.3],contrast_limit=[-0.3,0.3], p=0.5),
            #blur
            A.Blur(p=0.25)
        ])

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image = cv2.imread(self.image_list[idx][0]) # H W C
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # H W C (224, 224, 1)
        if self.split == 'train':
            image = self.im_transform(image=image)['image']
        image = to_tensor(image) # C H W -> transpose((2, 0, 1)) + normalize [0.0, 1.0] + type float32
        image = image.to(device)

        target = np.array(self.image_list[idx][1])
        target = torch.tensor(target, dtype=torch.float32).to(device)

        return image, target


In [12]:
train_dataset = TrackDataset(split='train')
val_dataset = TrackDataset(split='val')
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
# check if the dataloader is working
for images, targets in train_loader:
    print(images.shape)
    print(targets.shape)
    min_val = images.min().item()
    max_val = images.max().item()
    print(f"Min value: {min_val}, Max value: {max_val}")
    break

print(len(train_loader))
print(len(val_loader))

torch.Size([64, 1, 224, 224])
torch.Size([64, 2])
Min value: 0.0, Max value: 1.0
137
35


In [13]:
class NeuralRegressor(nn.Module):
    def __init__(self) -> None:
        super(NeuralRegressor, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((7, 7)),
            nn.Dropout(0.25)

        )
        self.flat = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(1568, 256),
            nn.ReLU(),
            nn.Linear(256,64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64,2)
        )

    def forward(self, x):
        x = self.conv(x)
        # x = self.flat(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x1 = torch.sigmoid(x[:, 0]) # forward signal in range 0.0 to 1.0
        x2 = torch.tanh(x[:, 1]) # left signal in range -1.0 to 1.0
        x = torch.stack((x1, x2), dim=1)
        return x

In [14]:
model = NeuralRegressor()
pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Total number of parameters:", pytorch_total_params)

Total number of parameters: 424130


In [16]:
model = NeuralRegressor()
model.to(device)

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

best_val_loss = float('inf')
onnx_export_path = "best_model.onnx"

epochs = 200
since_improvement = 0
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for i, (image, target_y) in enumerate(train_loader):
        image = image
        image = image.to(device)
        target_y = target_y
        target_y = target_y.to(device)

        optimizer.zero_grad()
        # pred_y = model(image).unsqueeze(1)
        pred_y = model(image)
        loss = loss_fn(pred_y, target_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        if i % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Iteration [{i}], Train Loss: {loss.item():.4f}")

    train_loss /= len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}] - Average Train Loss: {train_loss:.4f}")

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for image, target_y in val_loader:
            image = image
            target_y = target_y

            # pred_y = model(image).unsqueeze(1)
            pred_y = model(image)
            loss = loss_fn(pred_y, target_y)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch [{epoch+1}/{epochs}] - Validation Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        since_improvement = 0
        best_val_loss = val_loss
        print(f"New best model found at epoch {epoch+1}, saving to {onnx_export_path}...")

        dummy_input = next(iter(val_loader))[0].to(device)
        dummy_input = dummy_input[:1]
        print(dummy_input.shape)
        torch.onnx.export(
            model,
            dummy_input,
            onnx_export_path,
            opset_version=11,
        )
    else:
        since_improvement+=1
    if since_improvement > 30:
        print("Early stopping")
        break
print(f"Total loss {best_val_loss}")


Epoch [1/200], Iteration [0], Train Loss: 0.3521
Epoch [1/200], Iteration [10], Train Loss: 0.3183
Epoch [1/200], Iteration [20], Train Loss: 0.3076
Epoch [1/200], Iteration [30], Train Loss: 0.2989
Epoch [1/200], Iteration [40], Train Loss: 0.3185
Epoch [1/200], Iteration [50], Train Loss: 0.3758
Epoch [1/200], Iteration [60], Train Loss: 0.3276
Epoch [1/200], Iteration [70], Train Loss: 0.3012
Epoch [1/200], Iteration [80], Train Loss: 0.3434
Epoch [1/200], Iteration [90], Train Loss: 0.2626
Epoch [1/200], Iteration [100], Train Loss: 0.3064
Epoch [1/200], Iteration [110], Train Loss: 0.3063
Epoch [1/200], Iteration [120], Train Loss: 0.3056
Epoch [1/200], Iteration [130], Train Loss: 0.2778
Epoch [1/200] - Average Train Loss: 0.3154
Epoch [1/200] - Validation Loss: 0.3034
New best model found at epoch 1, saving to best_model.onnx...
torch.Size([1, 1, 224, 224])
Epoch [2/200], Iteration [0], Train Loss: 0.2736
Epoch [2/200], Iteration [10], Train Loss: 0.2982
Epoch [2/200], Iteration