In [1]:
import torch
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import cv2
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset,random_split
from PIL import Image
import numpy as np
import os
import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

2024-05-20 01:52:23.212288: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-20 01:52:23.212375: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-20 01:52:23.336427: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
class SpatialPyramidPooling(nn.Module):
    def __init__(self, pool_list):
        super(SpatialPyramidPooling, self).__init__()
        self.pool_list = pool_list

    def forward(self, x):
        # Get the batch size and number of channels
        batch_size, num_channels, _, _ = x.size()

        # Initialize the output list
        pooled_outputs = []

        # Iterate over each pooling size in the pool_list
        for pool_size in self.pool_list:
            # Perform max pooling with the specified pool size
            pooled = F.adaptive_max_pool2d(x, output_size=(pool_size, pool_size))
            # Reshape the pooled output to have shape (batch_size, num_channels, 1, 1)
            pooled = pooled.view(batch_size, num_channels, -1)
            # Append the pooled output to the output list
            pooled_outputs.append(pooled)

        # Concatenate the pooled outputs along the channel dimension
        output = torch.cat(pooled_outputs, dim=-1)

        # Reshape the output to have shape (batch_size, num_channels * sum(pool_list))
        output = output.view(batch_size, -1)

        return output

In [3]:
# Define the backbone model (ResNet or MobileNet)
backbone = models.resnet18(pretrained=True)  # You can change to other models like MobileNet
#backbone = models.mobilenet_v2(pretrained=True)

# Remove the fully connected layer at the end
backbone = nn.Sequential(*list(backbone.children())[:-1])
input_channels = 512

backbone = backbone.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 138MB/s]


In [4]:
# Define the basic CNN architecture to predict steering angle
class SteeringCNN(nn.Module):
    def __init__(self ,backbone,input_shape, hidden_size, output_size):
        super(SteeringCNN, self).__init__()
        self.backbone = backbone
        self.spp = SpatialPyramidPooling(pool_list=[1, 2, 4])
        self.fc1 = nn.Linear(input_shape* (1 + 4 + 16), hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.backbone(x)
        x = self.spp(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
class SteeringDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = self.read_data_file()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name, angle = self.data[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path)
        angle = angle * 3.14159265 / 180
        if self.transform:
            image = self.transform(image)
        return image, angle

    def read_data_file(self):
        data_file = os.path.join(self.root_dir, 'data.txt')
        with open(data_file, 'r') as file:
            lines = file.readlines()
            data = [(os.path.join(self.root_dir,line.split()[0]), float(line.split()[1])) for line in lines]
        return data

In [6]:
# Define data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize input images to match backbone input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

In [7]:
# Define hyperparameters
input_size = 512  # Output feature size of ResNet18
hidden_size = 256
output_size = 1
learning_rate = 0.001
batch_size = 32
num_epochs = 20

In [8]:
# Create dataset and dataloaders
dataset = SteeringDataset(root_dir = r"/kaggle/input/driving-dataset/driving_dataset", transform=transform)
train_size = int(0.8*len(dataset))
val_size = int(0.1*len(dataset))
test_size = len(dataset) - train_size - val_size
train_ds,val_ds,test_ds = random_split(dataset,[train_size,val_size,test_size])
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

In [9]:
# Create models
model = SteeringCNN(backbone,input_channels, hidden_size, output_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
# Training loop
training_losses = []
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for images, angles in tqdm(train_dataloader, desc=f'Epoch {epoch+1}', leave=False):
        images = images.to(device)
        angles = angles.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, angles.float())
        train_loss += loss.item()

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, angles in tqdm(val_dataloader,desc=f'Epoch {epoch+1}', leave=False):
            images = images.to(device)
            angles = angles.to(device)
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, angles.unsqueeze(1).float())

            val_loss += loss.item()

    # Average losses
    train_loss = train_loss / len(train_dataloader)
    training_losses.append(train_loss)
    val_loss = val_loss / len(val_dataloader)


    print(f'Epoch [{epoch+1}],Train Loss: {train_loss:.4f},Val Loss: {val_loss:.4f}')

                                                          

Epoch [1],Train Loss: 0.8502,Val Loss: 0.3449


                                                          

Epoch [2],Train Loss: 0.3061,Val Loss: 0.4170


                                                          

Epoch [3],Train Loss: 0.3033,Val Loss: 0.3188


                                                          

Epoch [4],Train Loss: 0.2930,Val Loss: 0.3187


                                                          

Epoch [5],Train Loss: 0.2884,Val Loss: 0.3197


                                                          

Epoch [6],Train Loss: 0.2881,Val Loss: 0.3190


                                                          

Epoch [7],Train Loss: 0.2879,Val Loss: 0.3187


                                                          

Epoch [8],Train Loss: 0.2879,Val Loss: 0.3186


                                                          

Epoch [9],Train Loss: 0.2879,Val Loss: 0.3186


                                                           

Epoch [10],Train Loss: 0.2879,Val Loss: 0.3186


                                                           

Epoch [11],Train Loss: 0.2879,Val Loss: 0.3186


                                                           

Epoch [12],Train Loss: 0.2879,Val Loss: 0.3187


                                                           

Epoch [13],Train Loss: 0.2883,Val Loss: 0.3187


                                                           

Epoch [14],Train Loss: 0.2879,Val Loss: 0.3187


                                                           

Epoch [15],Train Loss: 0.2882,Val Loss: 0.3187


                                                           

Epoch [16],Train Loss: 0.2880,Val Loss: 0.3188


                                                           

Epoch [17],Train Loss: 0.2879,Val Loss: 0.3187


                                                           

Epoch [18],Train Loss: 0.2879,Val Loss: 0.3187


                                                           

Epoch [19],Train Loss: 0.2879,Val Loss: 0.3186


                                                           

Epoch [20],Train Loss: 0.2879,Val Loss: 0.3186




In [11]:
torch.save(model.state_dict(), 'steering_model.pth')

In [12]:
# Testing
model.eval()
test_loss = 0.0
with torch.no_grad():
    for images, angles in tqdm(test_dataloader,desc=f'Epoch {epoch+1}', leave=False):
        images = images.to(device)
        angles = angles.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, angles.unsqueeze(1).float())

        test_loss += loss.item()
print(f"TestLoss: {test_loss:.4f}")

                                                           

TestLoss: 40.9581


