In [1]:
import os
import numpy as np
import cv2
from detectron2.utils.logger import setup_logger
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.model_zoo import get_config_file, get_checkpoint_url
setup_logger()

<Logger detectron2 (DEBUG)>

In [2]:
# Function to convert mask to Detectron2's polygon format
def mask_to_polygons(mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    polygons = [contour.flatten().tolist() for contour in contours if len(contour.flatten()) > 4]
    return polygons

In [3]:
# Dataset loading function
def get_custom_dataset_dicts(dataset_dir, is_train=True):
    dataset_dicts = []
    for filename in os.listdir(os.path.join(dataset_dir, "image")):
        if filename.endswith(".jpg"):  # Assuming image files are JPG
            image_path = os.path.join(dataset_dir, "image", filename)
            mask_path = os.path.join(dataset_dir, "mask", filename)  # Adjust if needed

            height, width = cv2.imread(image_path).shape[:2]
            record = {"file_name": image_path, "image_id": filename, "height": height, "width": width}

            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            if mask is None:
                continue

            polygons = mask_to_polygons(mask)
            bbox_array = np.asarray(cv2.boundingRect(mask)).reshape(1, -1)
            bbox = BoxMode.convert(bbox_array, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)

            objs = [{"segmentation": polygons, "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS, "category_id": 0}]
            record["annotations"] = objs
            dataset_dicts.append(record)
    print("Dataset length: ", len(dataset_dicts))
    return dataset_dicts

In [4]:
# Register the dataset
for d in ["train", "val"]:
    DatasetCatalog.register("my_dataset_" + d, lambda d=d: get_custom_dataset_dicts("training_dataset", is_train=(d == "train")))
    MetadataCatalog.get("my_dataset_" + d).set(thing_classes=["object"])

In [7]:
# Visualize Annotations

import random
import cv2
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

dataset_dicts = get_custom_dataset_dicts("training_dataset", is_train=False)

for d in random.sample(dataset_dicts, 1):  # Visualize 1 random images
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("my_dataset_val"), scale=0.5)

    # Debugging: Print bbox and other annotation details
    for obj in d["annotations"]:
        print("BBox:", obj["bbox"])
        print("Segmentation:", obj["segmentation"])
        print("BBox type and shape:", type(obj["bbox"]), np.asarray(obj["bbox"]).shape)

    # Try drawing only the boxes
    out = visualizer.draw_dataset_dict({"file_name": d["file_name"], "annotations": [{"segmentation": obj["segmentation"], "bbox": obj["bbox"][0], "bbox_mode": obj["bbox_mode"], "category_id": obj["category_id"]} for obj in d["annotations"]]})
    cv2.imwrite(f'val_image_{d["file_name"]}.jpg', out.get_image()[:, :, ::-1])
    cv2.imshow(f'val_image_{d["file_name"]}.jpg', out.get_image()[:, :, ::-1])
    # vis = visualizer.draw_dataset_dict(d)
    # cv2.imshow('Validation Image', vis.get_image()[:, :, ::-1])
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()




Dataset length:  60
BBox: [[  0   0 640 512]]
Segmentation: [[0, 0, 0, 511, 639, 511, 639, 0]]
BBox type and shape: <class 'numpy.ndarray'> (1, 4)
BBox: [[   0  369 1024  768]]
Segmentation: [[1, 369, 1, 401, 0, 402, 0, 767, 101, 767, 102, 766, 1022, 766, 1022, 440, 1023, 439, 1023, 404, 1019, 404, 1018, 403, 932, 403, 931, 402, 810, 402, 809, 403, 803, 403, 802, 402, 771, 402, 770, 401, 739, 401, 738, 400, 700, 400, 699, 399, 684, 399, 683, 398, 668, 398, 667, 397, 620, 397, 619, 398, 612, 398, 611, 397, 564, 397, 563, 396, 517, 396, 516, 395, 465, 395, 464, 394, 380, 394, 379, 393, 325, 393, 324, 392, 312, 392, 311, 391, 294, 391, 293, 390, 293, 389, 295, 387, 295, 386, 301, 380, 302, 380, 304, 378, 312, 378, 313, 379, 314, 378, 324, 378, 325, 379, 327, 379, 328, 380, 330, 380, 331, 381, 333, 381, 334, 380, 356, 380, 357, 379, 384, 379, 385, 378, 463, 378, 465, 376, 466, 376, 467, 375, 468, 375, 469, 374, 470, 374, 445, 374, 444, 373, 429, 373, 428, 372, 412, 372, 411, 371, 385, 371,

In [5]:
# Configuration
cfg = get_cfg()
cfg.merge_from_file(get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))  # Default configuration
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Default pretrained weights
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.002
cfg.SOLVER.MAX_ITER = 1000   # Adjust according to your dataset size
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Number of classes
cfg.SOLVER.ITER_DISPLAY = 1  # Display metrics every iteration
cfg.SOLVER.CHECKPOINT_PERIOD = 1000  # Save model every 1000 iterations

RuntimeError: COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml not available in Model Zoo!

In [6]:
import torch
torch.cuda.is_available()

False

In [11]:
cfg.MODEL.DEVICE='cpu'

In [12]:
cfg.OUTPUT_DIR = "./output"

In [13]:
# Training
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Save the model
model_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

[32m[01/05 04:16:08 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[01/05 04:16:09 d2.engine.train_loop]: [0mStarting training from iteration 0


In [None]:
# Configure the Model for Evaluation
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # Path to the model weights
cfg.DATASETS.TEST = ("my_dataset_val",)  # Validation dataset
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05  # Set a threshold for this model

# Import Evaluation Modules
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Perform Evaluation
evaluator = COCOEvaluator("my_dataset_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "my_dataset_val")
inference_on_dataset(trainer.model, val_loader, evaluator)

