# instance segmentation

In [1]:
import torch 
import os
import json
import numpy as np
from PIL import Image
from pycocotools.coco import COCO
from torch.utils.data import Dataset
from torchvision.io import read_image
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torchvision
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import pytorch_lightning as pl
import mlflow.pytorch
import mlflow
from pytorch_lightning.loggers import MLFlowLogger
import torchvision.transforms as T
from torchvision.transforms import v2
from torchvision.models import ResNet50_Weights

from pathlib import Path
os.chdir("..")
print(os.getcwd())
BASE_DIR = Path(os.getcwd()).resolve()  # <--- Changed to work in Jupyter

/home/wladyka/Swin-Transformer


In [2]:
category_mapping = {
    16: 0,  # bird
    17: 1,  # cat
    18: 2,  # dog
    19: 3,  # horse
    20: 4,  # sheep
    21: 5,  # cow
    22: 6,  # elephant
    23: 7,  # bear
    24: 8,  # zebra
    25: 9   # giraffe
}

### Build Dataloader

In [3]:
class CustomCOCODataset(Dataset):
    def __init__(self, image_dir, annotation, transforms=None):
        self.image_dir = str(image_dir)
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transforms = transforms
        self.normalization=v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    def __getitem__(self, index):
        img_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)
        path = self.coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.image_dir, path)).convert("RGB")

        masks = []
        boxes = []
        labels = []
        for ann in anns:
            masks.append(self.coco.annToMask(ann))
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(category_mapping[ann['category_id']])

        # If no annotations, create dummy
        if len(boxes) == 0:
            boxes = np.zeros((0, 4), dtype=np.float32)
            labels = np.zeros((0,), dtype=np.int64)
            masks = np.zeros((0, img.height, img.width), dtype=np.uint8)
        else:
            boxes = np.array(boxes, dtype=np.float32)
            labels = np.array(labels, dtype=np.int64)
            masks = np.stack(masks, axis=0).astype(np.uint8)

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        target["masks"] = torch.as_tensor(masks, dtype=torch.uint8)

        if self.transforms is not None:
            img = self.transforms(img)
        img = self.normalization(img)
        return img, target

    def __len__(self):
        return len(self.ids)

In [4]:
def build_model(num_classes):

    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=None, weights_backbone=ResNet50_Weights.IMAGENET1K_V1 )
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
    return model

### Create a Lightning Module

In [5]:
class MaskRCNNLightningModule(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, images, targets=None):
        return self.model(images, targets)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        images = list(img.to(self.device) for img in images)
        targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]
        # self.model.train()
        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        self.log('train_loss', losses)
        return losses

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        images = list(img.to(self.device) for img in images)
        targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]
        self.model.train()
        loss_dict = self.model(images, targets)

        if not isinstance(loss_dict, dict):
            raise TypeError(f"Unexpected type for loss_dict: {type(loss_dict)}")

        losses = sum(loss for loss in loss_dict.values())
        self.log('val_loss', losses, prog_bar=True, logger=True)
        return losses

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
        return [optimizer], [lr_scheduler]

### Extension of the callback class (Lightning)

In [6]:
class MetricTracker(pl.Callback):
    def __init__(self):
        self.collection = []

    def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
        if "val_loss" in outputs:
            vacc = outputs["val_loss"]
            self.collection.append(vacc)

    def on_validation_epoch_end(self, trainer, pl_module):
        if "val_loss" in trainer.logged_metrics:
            elogs = trainer.logged_metrics["val_loss"]
            self.collection.append(elogs)

### Training (with Pytorch Lightning Trainer)

In [8]:
num_classes = 11  # Example: 1 class (background) + 1 class (object)
train_images_dir = BASE_DIR / "dataset/coco10/train2017_subset/images"
train_ann_dir = BASE_DIR / "dataset/coco10/train2017_subset/coco10_train_annotations.json"
val_images_dir = BASE_DIR / "dataset/coco10/val2017_subset/images"
val_ann_dir = BASE_DIR / "dataset/coco10/val2017_subset/coco10_val_annotations.json"
train_dataset = CustomCOCODataset(image_dir=train_images_dir, annotation=train_ann_dir, transforms=T.ToTensor())
val_dataset = CustomCOCODataset(image_dir=val_images_dir, annotation=val_ann_dir, transforms=T.ToTensor())

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))

model = build_model(num_classes=num_classes)
module = MaskRCNNLightningModule(model=model)

mlf_logger = MLFlowLogger(
    experiment_name=f"maskrcnn_resnet",
    tracking_uri="http://localhost:5000",
    log_model=True,
)

# metric_tracker = MetricTracker()

trainer = pl.Trainer(
    max_epochs=10,
    logger=mlf_logger,
    # callbacks=[metric_tracker]
)

with mlflow.start_run():
    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    mlflow.pytorch.log_model(module.model, "model")

loading annotations into memory...
Done (t=0.92s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params | Mode 
-------------------------------------------
0 | model | MaskRCNN | 44.0 M | train
-------------------------------------------
43.7 M    Trainable params
222 K     Non-trainable params
44.0 M    Total params
175.883   Total estimated model params size (MB)
207       Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 0/23989 [00:00<?, ?it/s]                         

OutOfMemoryError: CUDA out of memory. Tried to allocate 60.00 MiB. GPU 0 has a total capacity of 3.80 GiB of which 62.56 MiB is free. Process 1941137 has 104.00 MiB memory in use. Including non-PyTorch memory, this process has 3.09 GiB memory in use. Of the allocated memory 2.98 GiB is allocated by PyTorch, and 39.43 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
# Wartości testowe
in_channels = 256
hidden_layer = 256
num_classes = 3

# Test inicjalizacji
predictor = MaskRCNNPredictor(in_channels, hidden_layer, num_classes)
print(predictor)


MaskRCNNPredictor(
  (conv5_mask): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
  (relu): ReLU(inplace=True)
  (mask_fcn_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
)
