# YOLOv8 training
This part of the project explores capabilites of the YOLOv8 model.

In [None]:
! pip install ultralytics

In [None]:
import os
from ultralytics import YOLO

In [None]:
BASE_PATH = "C:\\Users\\tlust\\Downloads\\mtsd\\yolov8"
detect_path = os.path.join(BASE_PATH, "detect", "dataset.yaml")
classify_path = os.path.join(BASE_PATH, "classify")

DOUBLE_STEP = False

## One-step fully taxonomy detection + classification to 313 classes
Initial experiment to assess the model's default performance across the entire taxonomy. Anticipated to yield suboptimal results due to the extensive number of classes.

In [None]:
if DOUBLE_STEP == False:
    #model = YOLO('yolov8n.yaml')  # build a new model from YAML
    #model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
    model = YOLO("yolov8m.pt")  # load a pretrained model
    results = model.train(data=detect_path, epochs=100, imgsz=640, batch=16, fliplr=0)
    print(results)

## 2-stage pipeline
Anticipated to yield improved outcomes as a result of decoupling.

### 1. train binary sign detector

In [None]:
if DOUBLE_STEP:
    #model = YOLO('yolov8n.yaml')                           # build a new model from YAML
    #model = YOLO('yolov8n.yaml').load('yolov8n.pt')        # build from YAML and transfer weights
    model = YOLO('yolov8n.pt')  # load a pretrained model
    results = model.train(data=detect_path, epochs=10, imgsz=640)
    print(results)

### 2. train sign classifier

In [None]:
if DOUBLE_STEP:
    #model = YOLO('yolov8n-cls.yaml')                           # build a new model from YAML
    #model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt')    # build from YAML and transfer weights
    model = YOLO('yolov8x-cls.pt')  # load a pretrained model
    results = model.train(data=classify_path, epochs=100, imgsz=224, batch=128)
    print(results)

# DETR

In [None]:
import os
import torchvision

class MappilaryDataset(torchvision.datasets.CocoDetection):
    def __init__(self, dir, processor, train=True):
        super(MappilaryDataset, self).__init__(
            os.path.join(dir, "images"),
            os.path.join(dir, "annotations", "train.json" if train else "val.json")
        )
        self.processor = processor

    def __getitem__(self, idx):
        img, target = super(MappilaryDataset, self).__getitem__(idx)
        image_id = self.ids[idx]
        target = {
            'image_id': image_id, 
            'annotations': target
        }
        encoding = self.processor(
            images=img, 
            annotations=target, 
            return_tensors="pt"
        )
        pixel_values = encoding["pixel_values"].squeeze()
        target = encoding["labels"][0]
        return pixel_values, target

In [None]:
from transformers import DetrImageProcessor

processor = DetrImageProcessor.from_pretrained(
    "facebook/detr-resnet-50"
)
train_dataset = MappilaryDataset(
    dir='C:/Users/tlust/Downloads/mtsd/coco', 
    processor=processor
)
val_dataset = MappilaryDataset(
    dir='C:/Users/tlust/Downloads/mtsd/coco', 
    processor=processor, train=False
)

In [None]:
from torch.utils.data import DataLoader

def collate_fn(batch):
  pixel_values = [item[0] for item in batch]
  encoding = processor.pad(pixel_values, return_tensors="pt")
  labels = [item[1] for item in batch]
  batch = {}
  batch['pixel_values'] = encoding['pixel_values']
  batch['pixel_mask'] = encoding['pixel_mask']
  batch['labels'] = labels
  return batch

train_dataloader = DataLoader(
  train_dataset, 
  collate_fn=collate_fn, 
  batch_size=4, shuffle=True
)
val_dataloader = DataLoader(
  val_dataset, 
  collate_fn=collate_fn, 
  batch_size=2
)
batch = next(iter(train_dataloader))

In [None]:
import pytorch_lightning as pl
from transformers import DetrConfig, DetrForObjectDetection
import torch

class Detr(pl.LightningModule):
     def __init__(self, lr, lr_backbone, weight_decay):
         super().__init__()
         self.model = DetrForObjectDetection.from_pretrained(
            "facebook/detr-resnet-50",
            num_labels=257,
            ignore_mismatched_sizes=True
         )
         self.lr = lr
         self.lr_backbone = lr_backbone
         self.weight_decay = weight_decay

     def forward(self, pixel_values, pixel_mask):
       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)
       return outputs
     
     def common_step(self, batch, batch_idx):
       pixel_values = batch["pixel_values"]
       pixel_mask = batch["pixel_mask"]
       labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]
       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)
       loss = outputs.loss
       loss_dict = outputs.loss_dict
       return loss, loss_dict

     def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)     
        self.log("training_loss", loss)
        for k,v in loss_dict.items():
          self.log("train_" + k, v.item())
        return loss

     def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)     
        self.log("validation_loss", loss)
        for k,v in loss_dict.items():
          self.log("validation_" + k, v.item())

        return loss

     def configure_optimizers(self):
        param_dicts = [
              {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
              {
                  "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                  "lr": self.lr_backbone,
              },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                  weight_decay=self.weight_decay)
        
        return optimizer

     def train_dataloader(self):
        return train_dataloader

     def val_dataloader(self):
        return val_dataloader

In [6]:
from pytorch_lightning import Trainer

MAX_EPOCHS = 50

# pytorch_lightning < 2.0.0
# trainer = Trainer(gpus=1, max_epochs=MAX_EPOCHS, gradient_clip_val=0.1, accumulate_grad_batches=8, log_every_n_steps=5)

# pytorch_lightning >= 2.0.0
trainer = Trainer(
    devices=1, 
    accelerator="gpu",
    max_epochs=MAX_EPOCHS, 
    gradient_clip_val=0.1, 
    accumulate_grad_batches=8, 
    log_every_n_steps=5
)

model = Detr(lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4)

trainer.fit(model)