#Laborator 5

In cadrul acestui laborator vom implementa o solutie de detectie. Datasetul folosit se numeste [American Sign Language Letters Dataset](https://public.roboflow.com/object-detection/american-sign-language-letters/1/download/coco).

In [None]:
!curl -L "https://public.roboflow.com/ds/KamceLFGGS?key=dqp8HADMki" > roboflow.zip; unzip -q roboflow.zip; rm roboflow.zip

In [None]:
! ls

In [None]:
!ls train | grep "json"

## Crearea Dataloader-ului

In continuare, pentru a incarca date, folosim un obiect de tipul torch.utils.data.Dataset. Acesta are 3 metode importante:

```
__init__()
__len__()
__get_item__()
```





In [None]:
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import torch as t
import torch
from PIL import Image
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor, normalize
import random
from pycocotools.coco import COCO
import os

from torchvision.transforms.functional import to_tensor

random.seed(42)

class HandsDataset(Dataset):
  def __init__(self, coco_root, coco_annos, coco_imgs, img_size=(320, 320)):
    """
    Args:
        dataset_lines (array): array of strings of form "{image_path}|{rectangle_coordinates}".
        width (int): target image width.
        height (int): target image height.
    """
    self.coco_root = coco_root
    self.coco_annos = coco_annos
    self.coco_imgs = coco_imgs

    self.coco_anno_file = os.path.join(coco_root, coco_annos)
    self.coco_imgs_dir = os.path.join(coco_root, coco_imgs)

    self.coco = COCO(self.coco_anno_file)

    self.img_size = img_size

    self.init_dataset()

  def init_dataset(self):
    self.cat_ids = self.coco.getCatIds()

    self.img_ids = self.coco.getImgIds()
    self.ann_ids = self.coco.getAnnIds(self.img_ids)

    print("Dataset size {}".format(len(self.img_ids)))

  def __len__(self):
    return len(self.img_ids)

  def __getitem__(self, idx):
    img_meta = self.coco.loadImgs(self.img_ids[idx])[0]
    img_path = os.path.join(self.coco_imgs_dir, img_meta['file_name'])
    img = Image.open(img_path)
    ann_id = self.coco.getAnnIds(self.img_ids[idx])
    annos = self.coco.loadAnns(ann_id)[0]

    original_width, original_height = img.size

    img = img.resize(self.img_size)
    img = np.array(img)

    if len(img.shape) == 2:
      img = np.expand_dims(img, axis=2)
      img = np.repeat(img, 3, axis=2)

    img = to_tensor(img)

    bbox = annos['bbox']  # box is xywh
    cat_id = annos['category_id']

    x1, y1, w, h = bbox
    x2, y2 = x1 + w, y1 + h

    # x1 = x1 / original_width * self.img_size[0]
    # x2 = x2 / original_width * self.img_size[0]
    # y1 = y1 / original_height * self.img_size[1]
    # y2 = y2 / original_height * self.img_size[1]

    x1 = x1 / original_width
    x2 = x2 / original_width
    y1 = y1 / original_height
    y2 = y2 / original_height

    coordinates = np.array([x1, y1, x2, y2])
    coordinates =  coordinates.astype(np.float32)

    return img, coordinates, cat_id

Construire Dataset si vizualizare date.



In [None]:
from IPython.display import clear_output
import time

coco_root = ""
coco_anno_train = os.path.join(coco_root, "train/_annotations.coco.json")
coco_imgs_train = os.path.join(coco_root, "train")

coco_anno_valid = os.path.join(coco_root, "valid/_annotations.coco.json")
coco_imgs_valid = os.path.join(coco_root, "valid")

dataset_train = HandsDataset(coco_root, coco_anno_train, coco_imgs_train, img_size=(224, 224))
train_loader = DataLoader(dataset_train, batch_size=16, shuffle=True, num_workers=1)

dataset_valid = HandsDataset(coco_root, coco_anno_valid, coco_imgs_valid, img_size=(224, 224))
valid_loader = DataLoader(dataset_valid, batch_size=1, shuffle=False, num_workers=1) # keep batch_size=1

In [None]:
see_examples = 10
for i, (imgs, coordinates, cat_id) in enumerate(train_loader):
    clear_output(wait=True)
    imgs = np.transpose(imgs, (0, 2, 3, 1))
    print(imgs.shape)

    plt.imshow(imgs[0])

    x1, y1, x2, y2 = coordinates[0]
    x1 = x1 * dataset_train.img_size[1]
    y1 = y1 * dataset_train.img_size[0]
    x2 = x2 * dataset_train.img_size[1]
    y2 = y2 * dataset_train.img_size[0]

    rect = patches.Rectangle((x1,y1),x2-x1+1,y2-y1+1,linewidth=1,edgecolor='r',facecolor='none')
    plt.gca().add_patch(rect)
    plt.show()

    if i >= see_examples - 1:
      break
    time.sleep(1)

# Crearea unei retele neurale convolutionale

### Cerinte
* Creati o arhitectura de retea neuronala convolutionala pentru regresie pe cele 4 coordonate alte imaginilor din dataset.
* Punctaj: 7 puncte pentru o retea cu rezultate *bune*.

#### Hint


  


*   Numarul de exemple de antrenare este prea mic pentru a antrena o retea "from scratch". Folositi o retea prea-antrenata pe ImagetNet, care a invatat deja sa recunoasca trasaturi utile pentru detectia de obiecte. Arhitectura recomandata este ResNet18 din Pytorch. Alte arhitecturi pot fi incercate.
*   La final trebuie utilizata o functie de activare care acopera [0,1]



### Definirea obiectelor folosite in timpul antrenarii
  * Numarul de epoci
  * Retea
  * Optimizator
  * Functie de loss

Experimentati cu valorile hiper-parametrilor de mai sus astfel incat reteaua sa invete *bine*.

In [None]:
import torch.optim as optim

# Definiti numarul de epoci
epochs = None

# Definiti reteaua
network = None

# Definiti optimizatorul
optimizer = None
# Dupa definirea optimizatorului si dupa fiecare iteratie trebuie apelata functia zero_grad().
# Aceasta face toti gradientii zero.
# Completati codul pentru a face gradientii zero aici


# Definiti functia cost pentru pentru regressie
loss_fn = None

## Definirea functiei de antrenare

In [None]:
def bb_intersection_over_union(boxA, boxB):
	# determine the (x, y)-coordinates of the intersection rectangle
  # import pdb; pdb.set_trace()
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])
	# compute the area of intersection rectangle
  interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
  # compute the area of both the prediction and ground-truth
  # rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
  # compute the intersection over union by taking the intersection
  # area and dividing it by the sum of prediction + ground-truth
  # areas - the interesection area
  iou = interArea / float(boxAArea + boxBArea - interArea)
  # return the intersection over union value
  return iou

def train_fn(epochs: int, train_loader: DataLoader, test_loader: DataLoader, 
             net: torch.nn.Module, loss_fn: torch.nn.Module, optimizer: optim.Optimizer):
  # Iteram prin numarul de epoci
  for e in range(epochs):
    train_loss = 0.0
    valid_loss = 0.0

    # Iteram prin fiecare exemplu din dataset
    net.train()
    for idx, (images, labels, _) in enumerate(train_loader):

      images = images.cuda()
      # Aplicam reteaua neurala pe imaginile de intrare
      out = net(images)
      # Aplicam functia cost pe iesirea retelei neurale si pe adnotarile imaginilor 
      loss = loss_fn(out, labels.cuda())
      # Aplicam algoritmul de back-propagation
      loss.backward()
      # Facem pasul de optimizare, pentru a aplica gradientii pe parametrii retelei
      optimizer.step()
      # Apelam functia zero_grad() pentru a uita gradientii de la iteratie curenta
      optimizer.zero_grad()

      train_loss += loss.item()*images.size(0)

    
    # Caluculul acuratetii
    count = len(test_loader)
    IOU_list = []
    net.eval()

    for test_image, box_gt, _ in test_loader:
      test_image = test_image.cuda()
      bbox_pred = net(test_image)

      loss = loss_fn(bbox_pred, box_gt.cuda())

      bbox_pred = bbox_pred.detach().cpu().numpy()

      IOU_list.append(bb_intersection_over_union(bbox_pred[0], box_gt[0]))

      valid_loss += loss.item()*test_image.size(0)

    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)

    print(f"Average train loss : {train_loss}")
    print(f"Average valid loss : {valid_loss}")

    IOU_list = np.array(IOU_list)
    IOU_list_50 = (IOU_list > 0.5).sum()
    IOU_list_75 = (IOU_list > 0.75).sum()
    IOU_list_90 = (IOU_list > 0.90).sum()

    print("Acuratetea IOU 50% la finalul epocii {} este {:.2f}%".format(e, (IOU_list_50 / count)))
    print("Acuratetea IOU 75% la finalul epocii {} este {:.2f}%".format(e, (IOU_list_75 / count)))
    print("Acuratetea IOU 90% la finalul epocii {} este {:.2f}%".format(e, (IOU_list_90 / count)))


In [None]:
train_fn(epochs, train_loader, valid_loader, network, loss_fn, optimizer)

## Augmentare date

O metoda de imbunatatire a performantei modelului este augmentarea setului de date. Aceste augmentari trebuie sa tina cont de natura taskului si de natura etichetelor.

### Cerinta

(3p) Modificati Dataset-ul precedent astfel incat sa augmenteze datele de antrenare cu o probabilitate aleasa de voi. Cum afecteaza acest lucru performanta modelului?

### Bonus
(2p) Dataloaderul intoarce si clasa fiecarui obiect de detectat (litera corespunzatoare fiecarui semn din limbajul Sign Language). Creati o arhitectura care prezice si acesta clasa, pe langa regresia pe box. Antrenati reteaua. 