In [1]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
import torchvision
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch
import torch.utils
from torch.utils.data import (
    Dataset,
    DataLoader
)
import numpy as np
from PIL import Image
import math
import sys
import os
import xml.etree.ElementTree as ET
import engine
import train
#import detection.detectionutils  # Get this folder into the project directory from https://github.com/pytorch/vision/tree/main/references/detection
#import detection.engine  # In order to work in Colab, you need to change rows 7-9 of engine.py to:
                                                                  # import detection.utils
                                                                  # from detection.coco_eval import CocoEvaluator
                                                                  # from detection.coco_utils import get_coco_api_from_dataset
                                                                  # Also in coco_utils.py you need to change 7th row to 'import torchvision.transforms as T'

In [2]:
import datetime
import errno
import os
import time
from collections import defaultdict, deque

import torch
import torch.distributed as dist


class SmoothedValue:
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
        )


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]
    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)
    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.inference_mode():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict


class MetricLogger:
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(f"{name}: {str(meter)}")
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ""
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt="{avg:.4f}")
        data_time = SmoothedValue(fmt="{avg:.4f}")
        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
            log_msg = self.delimiter.join(
                [
                    header,
                    "[{0" + space_fmt + "}/{1}]",
                    "eta: {eta}",
                    "{meters}",
                    "time: {time}",
                    "data: {data}",
                    "max mem: {memory:.0f}",
                ]
            )
        else:
            log_msg = self.delimiter.join(
                [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
            )
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(
                        log_msg.format(
                            i,
                            len(iterable),
                            eta=eta_string,
                            meters=str(self),
                            time=str(iter_time),
                            data=str(data_time),
                            memory=torch.cuda.max_memory_allocated() / MB,
                        )
                    )
                else:
                    print(
                        log_msg.format(
                            i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
                        )
                    )
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)")


def collate_fn(batch):
    return tuple(zip(*batch))


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__

    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop("force", False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ["WORLD_SIZE"])
        args.gpu = int(os.environ["LOCAL_RANK"])
    elif "SLURM_PROCID" in os.environ:
        args.rank = int(os.environ["SLURM_PROCID"])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print("Not using distributed mode")
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = "nccl"
    print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
    torch.distributed.init_process_group(
        backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
    )
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)


Defining function, which will return pretrained model with a replaced head

In [3]:
def get_model_detection(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

Defining transforms for train and test sets

In [4]:
train_transform = FasterRCNN_ResNet50_FPN_Weights.COCO_V1.transforms
test_transform = FasterRCNN_ResNet50_FPN_Weights.COCO_V1.transforms

In [5]:
def get_transform(train):
    transform = []
    transform.append(transforms.PILToTensor())
    transform.append(transforms.ConvertImageDtype(torch.float))
    if train:
        transform.append(transforms.RandomHorizontalFlip(0.5))
    return transforms.Compose(transform)

Creating a Dataset class 

In [6]:
class MasksDataset(Dataset):
  def __init__(self, root_dir, transform = None):
    self.root_dir = root_dir
    self.transform = transform
    self.images = list(sorted(os.listdir(os.path.join(self.root_dir, "images"))))
    self.annotations = list(sorted(os.listdir(os.path.join(self.root_dir, "annotations"))))

  def __len__(self):
    return len(self.images)

  def __getitem__(self, index):
    img_path = os.path.join(self.root_dir, "images", self.images[index])
    img = Image.open(img_path).convert("RGB")

    annotation_path = os.path.join(self.root_dir, "annotations", self.annotations[index])
    tree = ET.parse(annotation_path)
    root = tree.getroot()


    box = []
    label = [1,]
    for j in range(4):
        box.append(int(root[5][4][j].text))

    box = torch.as_tensor([box,], dtype=torch.float32)
    label = torch.as_tensor(label, dtype=torch.int64)

    if self.transform is not None:
      img = self.transform(img)  
    target = {'boxes': box, 'labels': label}
    return img, target



In [7]:
dataset_path = r"/content/drive/MyDrive/passport_dataset"

train_set = MasksDataset(root_dir= dataset_path, transform=get_transform(train=True))
test_set = MasksDataset(root_dir= dataset_path, transform=get_transform(train=False))

In [8]:
model = get_model_detection(2)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [9]:
dataloader = DataLoader(
 train_set, batch_size=2, shuffle=True, num_workers=2,
 collate_fn=collate_fn)

In [None]:
# Cell for testing outputs
images,targets = next(iter(dataloader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)
predictions

In [10]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = f"Epoch: [{epoch}]"

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger

Training

In [11]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# split the dataset in train and test set
indices = torch.randperm(len(train_set)).tolist()
dataset = torch.utils.data.Subset(train_set, indices[:-50])
dataset_test = torch.utils.data.Subset(test_set, indices[-50:])

# define training and validation data loaders
data_loader = dataloader

#data_loader_test = torch.utils.data.DataLoader(
#    dataset_test, batch_size=1, shuffle=False, num_workers=2,
#    collate_fn=detection.utils.collate_fn)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)

# let's train it for 10 epochs
num_epochs = 8

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    #detection.engine.evaluate(model, data_loader_test, device=device)

torch.save(model.state_dict(), '/content/fasterrcnn2.h5')
print("Training is over")

Epoch: [0]  [0/5]  eta: 0:00:52  lr: 0.001254  loss: 0.8967 (0.8967)  loss_classifier: 0.7275 (0.7275)  loss_box_reg: 0.0015 (0.0015)  loss_objectness: 0.1641 (0.1641)  loss_rpn_box_reg: 0.0036 (0.0036)  time: 10.4301  data: 1.7274  max mem: 2363
Epoch: [0]  [4/5]  eta: 0:00:02  lr: 0.005000  loss: 0.4492 (0.5088)  loss_classifier: 0.3175 (0.3696)  loss_box_reg: 0.0015 (0.0091)  loss_objectness: 0.1276 (0.1204)  loss_rpn_box_reg: 0.0049 (0.0097)  time: 2.5839  data: 0.3504  max mem: 3774
Epoch: [0] Total time: 0:00:12 (2.5926 s / it)
Epoch: [1]  [0/5]  eta: 0:00:03  lr: 0.005000  loss: 0.1568 (0.1568)  loss_classifier: 0.0929 (0.0929)  loss_box_reg: 0.0341 (0.0341)  loss_objectness: 0.0255 (0.0255)  loss_rpn_box_reg: 0.0043 (0.0043)  time: 0.7538  data: 0.1494  max mem: 3774
Epoch: [1]  [4/5]  eta: 0:00:00  lr: 0.005000  loss: 0.1801 (0.2517)  loss_classifier: 0.1014 (0.1516)  loss_box_reg: 0.0341 (0.0551)  loss_objectness: 0.0337 (0.0361)  loss_rpn_box_reg: 0.0058 (0.0088)  time: 0.63

In [12]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [13]:
transform = get_transform(train=False)

In [17]:
img = Image.open('/content/1.jpeg').convert("RGB")
img = transform(img)
img = img.cuda()
model.eval()
prediction = model((img,))
prediction

[{'boxes': tensor([[239.8196, 440.7705, 339.3933, 539.4921],
          [166.4177, 439.4568, 386.4370, 544.6243],
          [197.6655, 440.0789, 296.4489, 540.0864],
          [242.7691, 447.7349, 381.3595, 512.0982],
          [219.7668, 454.1010, 318.7171, 563.2122],
          [278.4780, 431.9431, 343.4335, 567.3729],
          [210.3877, 437.1707, 348.8278, 502.6105],
          [188.0201, 465.8103, 283.3354, 576.2057],
          [240.8364, 377.2519, 342.1846, 608.4230],
          [257.0763, 405.6508, 320.1521, 536.2310],
          [191.6536, 378.7079, 294.7998, 609.5652],
          [235.2463, 406.9960, 298.0741, 536.4868],
          [192.0762, 420.9538, 254.2723, 544.6440],
          [197.6287, 506.1664, 293.4708, 607.8303],
          [285.6390, 534.9506, 425.8648, 599.7220],
          [ 73.9169, 113.6971, 169.7932, 218.6502],
          [307.6358, 448.8411, 408.0442, 549.4703],
          [114.0092,  83.5613, 174.4879, 221.2652],
          [288.0507, 156.0025, 423.5787, 222.5877],
   

In [None]:
from object_detection.core import keypoint_ops
from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils


In [None]:

box_tensor = prediction[0]["boxes"][i]
caption = label(prediction[0]["labels"][i].item())
score = str(round(prediction[0]["scores"][i].item(), 2))
vis_utils.draw_bounding_box_on_image(image=img, xmin=box_tensor[0].item(), ymin=box_tensor[1].item(),
                                             xmax=box_tensor[2].item(),
                                             ymax=box_tensor[3].item(), use_normalized_coordinates=False,
                                             display_str_list=[caption, score], thickness=3)

img_name = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits))
img.save(os.path.join('/content/', img_name))
print("Done.")