<a href="https://colab.research.google.com/github/vivorima/YOLOv5-CarDetection/blob/main/Car_Detection_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Pre-requisites:**

1.   having a google account (sorry about that...)
2.   having the dataset.zip in drive



In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# initially I had a zip file but now I use the folder directly, change the paths accordingly
# !unzip "path/to/dataset.zip" -d "path/to/extract"

# Creating the dataset class

First Attempt: *we need to create a class derived from pytorch's Dataset class able to understand our data format to later on create a dataloader and feed it to our model*

After choosing Yolo, I realized we don't really need a class, as Yolo takes direct paths to the data, so the class is not needed for this but could be uselful for other models.

In [None]:
import cv2
import matplotlib.pyplot as plt
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import os
from sklearn.model_selection import train_test_split
# this is for the progress bar
from tqdm.notebook import tqdm
from torchvision.transforms.functional import to_pil_image

folder = "/content/drive/MyDrive/Bureau/VMI/Cars/"

In [None]:
class CarDataset(Dataset):
  def __init__(self, root_dir,transform=True):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.annotations = []

        # Load image filenames and annotations
        for filename in tqdm(os.listdir(os.path.join(root_dir, 'images/'))):
            if filename.endswith(".png"):
                image_filename = os.path.join(os.path.join(root_dir, 'images/'), filename)
                annotation_filename = os.path.join(os.path.join(root_dir, 'labels/'), filename).replace(".png", ".txt")

                # Load the image
                image = cv2.imread(image_filename)

                # Apply the transformations
                if self.transform is not None:
                   image = self.transform(image)

                # populating my transformed data
                self.images.append(image)

                # Load the annotations
                with open(annotation_filename, 'r') as f:
                    annos = []
                    for line in f:
                        # Extract the bounding box coordinates
                        class_label, x_min, y_min, x_max, y_max = line.strip().split(' ')
                        class_label, x_min, y_min, x_max, y_max = str(class_label),float(x_min), float(y_min), float(x_max), float(y_max)

                        # Just checking, in case there are some infiltrations
                        if class_label == "Car":

                          # Convert to YOLO format (x_center, y_center, width, height)
                          x_center = (x_min + x_max) / 2.0
                          y_center = (y_min + y_max) / 2.0
                          width = x_max - x_min
                          height = y_max - y_min

                          # Normalize coordinates
                          x_center /= image.shape[2]
                          y_center /= image.shape[1]
                          width /= image.shape[2]
                          height /= image.shape[1]

                          # Ensure values are within the valid range [0, 1]
                          x_center = max(0.0, min(1.0, x_center))
                          y_center = max(0.0, min(1.0, y_center))
                          width = max(0.0, min(1.0, width))
                          height = max(0.0, min(1.0, height))

                          # since we only have one class, 'Car' we put the idx 0
                          anno = (0, x_center, y_center, width, height)
                          annos.append(anno)

                    self.annotations.append(annos)

        # split into train, val, test randomly
        # im going to split 70-20-10 for this
        self.train_images, rest_images, self.train_annotations, rest_annotations = train_test_split(self.images, self.annotations, test_size = 0.3, random_state = 1)
        self.val_images, self.test_images, self.val_annotations, self.test_annotations = train_test_split(rest_images, rest_annotations, test_size = 0.3, random_state = 1)


        # Save datasets into YOLOv5 format
        self.save_dataset_yolov5('train', self.train_images, self.train_annotations)
        self.save_dataset_yolov5('val', self.val_images, self.val_annotations)
        self.save_dataset_yolov5('test', self.test_images, self.test_annotations)


  # basic function that saves the data into the correct folder
  def save_dataset_yolov5(self, split, images, annotations):
      save_dir = os.path.join(self.root_dir, f'transformed/{split}/')
      os.makedirs(save_dir, exist_ok=True)

      for idx, (image, annotations_for_image) in enumerate(zip(images, annotations)):
          # Save image
          image_path = os.path.join(save_dir, f'{idx}.png')
          # Convert PyTorch tensor back to PIL image
          pil_image = to_pil_image(image)
          pil_image.save(image_path)

          # Save YOLOv5 format annotation
          annotation_path = os.path.join(save_dir, f'{idx}.txt')
          with open(annotation_path, 'w') as f:
              for anno in annotations_for_image:
                  f.write(f"{anno[0]} {anno[1]} {anno[2]} {anno[3]} {anno[4]}\n")


  def __len__(self):
      return len(self.images)

  def __getitem__(self, idx):
      image = self.images[idx]
      annotation = self.annotations[idx]
      # Convert annotations to a tensor
      annotation = torch.as_tensor(annotation, dtype=torch.float32)  # annotations are floats

      return image, annotation

The images in the dataset have different resolutions and sizes, although they are really close, every model needs similar resolutions in the same dataset so we are going to resize them to a standard resolution

In [None]:
# My root folder, CHANGE PATH
data_folder = folder + "dataset/"

# This is the uniformed size I want my images to be (I'd rather upscale them to max)
target_size = (376, 1242)

# All the transformation I want to apply to images
# The mean and std values are commonly used precalculated values for the ImageNet dataset.
# normalized_value = (original_value - mean) / std
transform = torchvision.transforms.Compose([
      torchvision.transforms.ToTensor(),
      torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
      torchvision.transforms.Resize(target_size)
])

print("Transforming the dataset and creating the folders necessary for Yolov5, takes a few minutes.. ")
dataset = CarDataset(data_folder, transform)

# Let's create our YAML file

So Yolo needs a YAML file that contains some infos about our dataset.

In [None]:
import yaml
import os

# Extract dataset information
num_classes = 1
class_names = ['Car']
# original images size after uniform, yolov5 usually works better with squared images
#  but here the aspect ratio is too big so I wont square them
image_size = [376, 1242]

# Extract paths from the dataset object

# Create YAML dictionary
yaml_data = {
    'train': data_folder + "transformed/train",
    'val': data_folder + "/transformed/val",
    'img_size': image_size,
    "nc" : 1,    # number of classes
    "names": [ 'car' ] # List of class names
}

# Save YAML file
yaml_path = folder + 'cars.yaml'
with open(yaml_path, 'w') as yaml_file:
    yaml.dump(yaml_data, yaml_file, default_flow_style=False)

print(f'YAML file saved at {yaml_path}')


# Yolov5
Ive cloned yolov5 and put the folder in my drive to make the access easier .

In [None]:
import sys
sys.path.append(folder + '/yolov5/')
# Install requirements, all libraries used are here with their versions listed
# PATH MUST BE FORCED HERE
!pip install -U -r /content/drive/MyDrive/Bureau/VMI/yolov5/requirements.txt

# Using Yolo's Plotting utilities for our metrics

In [None]:
import sys
sys.path.append('/content/yolov5/utils')
from plots import plot_results

**LETS TRAIN**

* For the number of epochs, I started training for 10 then attempted 100 but it timedout for some Google-y reason so I ran it for 30 epochs and the results were good enough.
* The most stable batch size was 8 this way the memory doesnt crash.
* The parameters of the raining can be found in the cfg file.
* Since we have a pretty limited dataset it is recommended that we used pre trained weights to initiate the training








In [None]:
!python yolov5/train.py --epochs 20 --batch-size 8 --data /content/drive/MyDrive/Bureau/VMI/cars.yaml --cfg yolov5/models/yolov5s.yaml --weights /content/drive/MyDrive/Bureau/VMI/cars/yolov5/runs/train/exp/weights/best.pt

In [None]:
Yolo_Folder = folder + 'yolov5/'
plot_results(Yolo_Folder + 'runs/train/exp/results.csv')
# Display results of training
fig, ax = plt.subplots(1)
image = cv2.imread(Yolo_Folder + "runs/train/exp/results.png")
ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.show()

In [None]:
!python yolov5/val.py --data /content/drive/MyDrive/Bureau/VMI/cars/cars.yaml --weights /content/drive/MyDrive/Bureau/VMI/cars/yolov5/runs/train/exp/weights/best.pt


# EVALUATION PIPELINE

The source here is the test images, and the weights is our trained model:

In [None]:
!python yolov5/detect.py --source /content/drive/MyDrive/Bureau/VMI/cars/dataset/transformed/test --weights /content/drive/MyDrive/Bureau/VMI/cars/yolov5/runs/train/exp/weights/best.pt


# Displaying bounding boxes directly on our test imgaes

In [None]:
# Specify the image filename and annotation filename
import cv2
import numpy as np
import torchvision.transforms as transforms
from torchvision import datasets

# Define the mean and standard deviation used for reverse normalization
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

for filename in tqdm(os.listdir("/content/drive/MyDrive/Bureau/VMI/cars/yolov5/runs/detect/exp")):
            if filename.endswith(".png"):
                image = cv2.imread(os.path.join("/content/drive/MyDrive/Bureau/VMI/cars/yolov5/runs/detect/exp", filename)).astype(np.float32) / 255.0
                # Define the mean and standard deviation used for normalization
                mean = [0.485, 0.456, 0.406]
                std = [0.229, 0.224, 0.225]

                # Reverse the normalization using torchvision.transforms.Normalize
                reverse_normalize = transforms.Compose([
                    transforms.Normalize(mean=[-m/s for m, s in zip(mean, std)], std=[1/s for s in std]),
                ])

                # Apply the reverse normalization
                original_image = reverse_normalize(torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0)).squeeze().permute(1, 2, 0).numpy()

                # Clip values to ensure they are in the valid range [0, 1]
                original_image = np.clip(original_image, 0, 1)

                # Convert back to uint8 (if necessary)
                original_image = (original_image * 255).astype(np.uint8)

                # Display the image with bounding box
                # Create a new figure and axis for each image
                fig, ax = plt.subplots()

                # Since we read the image with cv2 (BGR) we have to display we plot and so in RGB
                ax.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))

                # Display the image and close the figure
                plt.show()
                plt.close(fig)