# FASTER R-CNN

paper:https://arxiv.org/abs/1506.01497

for training:
- input Channel(C) x Height(H) x Width(W) images as torch.Tensor[C, H, W] object
- N boxes coordinates : xmin, ymin, xmax, ymax (in this order) as torch.FloatTensor[N, 4]
- labels for each N boxes: "photoreceptor" as torch.Int64

In [1]:
import torch
import torchvision

from torch.utils.data import Dataset

from torchvision import datasets 
from torchvision import transforms 
from torchvision.io import read_image

from pathlib import Path 
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
FAKE_DATA_DIR="../../res/fake_vignette/"

definition du modele

In [24]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights


def make_model():
    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

    num_classes = 2 
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

Definition de notre classe dataset

In [25]:
class FakeImgDataset(Dataset): 
    def __init__(self, root):
        self.root = Path(root)
        self.path_imgs = list(self.root.glob("imgs/*.png"))
        self.df = pd.read_csv(self.root/"annotations.csv")

    def __len__(self):
        return len(self.path_imgs)

    def __getitem__(self, idx): 
        #lazy eval

        # get image to Tensor
        image = read_image(self.path_imgs[idx].as_posix(), torchvision.io.ImageReadMode.GRAY)/255
        
        #get boxes
        image_name = self.path_imgs[idx].name
        np_boxes = self.df[self.df.filename == image_name].iloc[:, 2:].values
        boxes = torch.FloatTensor(np_boxes)

        #get labels, just 1, we have only 1 class
        labels = torch.ones(boxes.shape[0], dtype=torch.int64)

        targets = {
            "boxes" : boxes,
            "labels" : labels
        }

        return image, targets

def my_collate_fun(batch): # wrap the outpur of __getitem__() for DataLoader
    im_lst, tgt_lst = [], []
    for im, tgt in batch:
        im_lst.append(im)
        tgt_lst.append(tgt)
    return im_lst, tgt_lst

In [26]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

On instencie note classe dataset

In [20]:
fake_train_dataset = FakeImgDataset(FAKE_DATA_DIR)

fake_train_loader = torch.utils.data.DataLoader(    
    fake_train_dataset, batch_size=10, shuffle=True, collate_fn=my_collate_fun, drop_last=True
)

On instancie notre model et on définit l'optimiseur à utiliser (classique avec un SDG -> stochastic descent gradient)
learning rate à 0.005 au début puis diminura toutes les 3 epochs 

In [27]:
model = make_model()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9)#, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 30

In [22]:
for epoch in range(num_epochs):

    for itr, (images, targets) in enumerate(fake_train_loader):
        
        images = [im.to(device) for im in images]
        targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        print(f"\titer:{itr}, loss:{loss_value}")

        lr_scheduler.step()
    
    print(f"epoch:{epoch}/{num_epochs}, loss:{loss_value}")

	iter:0, loss:1.263409972190857
	iter:1, loss:nan
	iter:2, loss:nan
	iter:3, loss:nan
	iter:4, loss:nan


KeyboardInterrupt: 

In [200]:

# criterion = nn.CrossEntropyLoss()
# model1 = make_model()
# params = [p for p in model1.parameters() if p.requires_grad]
# optimizer = optim.SGD(params, lr=0.001, momentum=0.9)

# num_epochs = 10

# for epoch in range(num_epochs):
#     # train for one epoch, printing every 10 iterations
#     train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
#     # update the learning rate
#     lr_scheduler.step()
#     # evaluate on the test dataset
#     evaluate(model, data_loader_test, device=device)

In [None]:
def train(train_data_loader, model):
    print('Training')
    global train_itr
    global train_loss_list
    
     # initialize tqdm progress bar
    prog_bar = tqdm(train_data_loader, total=len(train_data_loader))
    
    for i, data in enumerate(prog_bar):
        optimizer.zero_grad()
        images, targets = data
        
        images = list(image.to(DEVICE) for image in images)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_list.append(loss_value)
        train_loss_hist.send(loss_value)
        losses.backward()
        optimizer.step()
        train_itr += 1
    
        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return train_loss_list