# Image segmentation code for Detectron2

## 1. Install Detectron2

First step is to install detectron2.
Full installation documentation here - https://detectron2.readthedocs.io/en/latest/tutorials/install.html

In [None]:
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# # On macOS, you may need to prepend the above commands with a few environment variables:
# CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# 2. Create annotations for your image files

For detectron2 to work, you will need to annotate your images. We have done this using https://www.makesense.ai/ to create object detection annotations in COCO JSON format.

You should generate one json file for each set. There should be a training set josn file, and an internal validation set json file.

## 3. Set your directories to the training and internal validation images and accompanying json annotation files

In [None]:
# First set your training set
# Folder to training images
training_set_images = "path_to_training_images"
training_set_annotations = "path_to_training_json_file"

# Second set your internal validation set
# Folder to internal validation images
validation_set_images = "path_to_validation_images"
validation_set_annotations = "path_to_validation_json_file"

## 4. Import libraries

In [None]:
import numpy as np
import os, json, cv2, random
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [None]:
# Specify graphic device if you have one, cpu works for all devices but is much slower
mps_device = torch.device("cpu")

# 5. Train on you custom dataset

In [None]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, training_set_annotations, training_set_images)
register_coco_instances("my_dataset_val", {}, validation_set_annotations, validation_set_images)

train_metadata = MetadataCatalog.get("my_dataset_train")
train_dataset_dicts = DatasetCatalog.get("my_dataset_train")

val_metadata = MetadataCatalog.get("my_dataset_val")
val_dataset_dicts = DatasetCatalog.get("my_dataset_val")

Visualize some random samples

In [None]:
from matplotlib import pyplot as plt

for d in random.sample(train_dataset_dicts, 1):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.imshow(vis.get_image()[:, :, ::-1])
    plt.show()

## Train

Set your output directory

In [None]:
output_dir = "path_to_output_directory"

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator
from detectron2.data import build_detection_test_loader
from detectron2.data import DatasetMapper
from detectron2.LossEvalHook import LossEvalHook

class MyTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        return COCOEvaluator(dataset_name, cfg, True, output_folder)
                     
    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1,LossEvalHook(
            cfg.TEST.EVAL_PERIOD,
            self.model,
            build_detection_test_loader(
                self.cfg,
                self.cfg.DATASETS.TEST[0],
                DatasetMapper(self.cfg,True)
            )
        ))
        return hooks
    

from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
import os

cfg = get_cfg()
cfg.OUTPUT_DIR = output_dir
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.TEST.EVAL_PERIOD = 150

## Increase input image size for better resolution
#cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 736, 768, 800)
#cfg.INPUT.MAX_SIZE_TRAIN = 1333

# Set model weights and move them to CPU
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cpu"  # Set model to graphic device, CPU works for all but will be slower

cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 1500 default
cfg.SOLVER.STEPS = []        # do not decay learning rate

# Adjust anchor sizes and aspect ratios
#cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
#cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # Default is 512, using 256 for this dataset.
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # State number of classes, do not include bg
# NOTE: this config means the number of classes, without the background. Do not use num_classes+1 here.

# Increase the mask loss weight if segmentation is not accurate
#cfg.MODEL.ROI_HEADS.MASK_LOSS_WEIGHT = 2.0

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

# trainer = DefaultTrainer(cfg)
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)

Start the training process

In [None]:
trainer.train() 

Save training results

Change the yaml directory to your specific destination

In [None]:
import yaml
# Save the configuration to a config.yaml file
config_yaml_path = "path_to_directory/config.yaml" # adjust as needed
with open(config_yaml_path, 'w') as file:
    yaml.dump(cfg, file)

Generate the loss plots

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np

experiment_folder = output_dir

def load_json_arr(json_path):
    lines = []
    with open(json_path, 'r') as f:
        for line in f:
            lines.append(json.loads(line))
    return lines

experiment_metrics = load_json_arr(experiment_folder + 'metrics.json')

# Creating a figure and axis with high resolution for publication
plt.figure(figsize=(10, 6), dpi=300)

# Improved plot for total_loss
plt.plot(
    [x['iteration'] for x in experiment_metrics if 'loss_mask' in x], 
    [x['total_loss'] for x in experiment_metrics if 'loss_mask' in x],
    label='Training Loss', linewidth=1.5, marker='o', markersize=3, linestyle='-', color='blue'
)

# Improved plot for validation_loss
plt.plot(
    [x['iteration'] for x in experiment_metrics if 'validation_loss' in x], 
    [x['validation_loss'] for x in experiment_metrics if 'validation_loss' in x],
    label='Validation Loss', linewidth=1.5, marker='s', markersize=3, linestyle='--', color='red'
)

# Enhancing the plot with grid, labels, title, and legend
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.xlabel('Iteration', fontsize=14, fontweight='bold')
plt.ylabel('Loss', fontsize=14, fontweight='bold')
plt.title('Training and Validation Loss over Iterations', fontsize=16, fontweight='bold')
plt.legend(fontsize=12, loc='upper right')

# Saving the figure with high quality
plt.savefig(experiment_folder + 'loss_plot_high_quality.png', format='png', dpi=600)

# Show the plot
plt.show()


# 6. Inference and evaluation using the trained model

Remember to set model_final.pth

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.utils.visualizer import ColorMode, Visualizer
import cv2
import random
from IPython.display import display, Image
from io import BytesIO

# Function to display an image in Jupyter Notebook
def show_image(image):
    """Display the image in Jupyter notebook."""
    _, encoded_image = cv2.imencode('.png', image)
    display(Image(data=encoded_image.tobytes()))

# Assuming val_dataset_dicts and val_metadata are defined elsewhere
for d in random.sample(val_dataset_dicts, 5):  # Select a number of images for display
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=val_metadata,
                   scale=0.8,
                   instance_mode=ColorMode.IMAGE_BW)  # Remove the colors of unsegmented pixels
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    show_image(out.get_image()[:, :, ::-1])  # Display the image

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_dataset_val", output_dir=output_dir)
val_loader = build_detection_test_loader(cfg, "my_dataset_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

## Load a new image and segment it

In [None]:
from detectron2.utils.visualizer import Visualizer
import cv2
from IPython.display import display, Image
from io import BytesIO

# Function to display an image in Jupyter Notebook
def show_image(image):
    """Display the image in Jupyter notebook."""
    _, encoded_image = cv2.imencode('.png', image)
    display(Image(data=encoded_image.tobytes()))

# Load an image
new_im = cv2.imread("path_to_image")
outputs = predictor(new_im)

# Use `Visualizer` to draw the predictions on the image.
v = Visualizer(new_im[:, :, ::-1], metadata=train_metadata)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

# Display the image with predictions
show_image(out.get_image()[:, :, ::-1])

## Process multiple images in a directory and save the results in an output directory

In [None]:
# Directory path to the input images folder
input_images_directory = "path"

# Output directory where the segmented images will be saved
output_directory = "path"  

# Loop over the images in the input folder
for image_filename in os.listdir(input_images_directory):
    image_path = os.path.join(input_images_directory, image_filename)
    new_im = cv2.imread(image_path)

    # Perform prediction on the new image
    outputs = predictor(new_im)  # Format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    # We can use `Visualizer` to draw the predictions on the image.
    v = Visualizer(new_im[:, :, ::-1], metadata=train_metadata)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

    # Create the output filename with _result extension
    result_filename = os.path.splitext(image_filename)[0] + "_result.png"
    output_path = os.path.join(output_directory, result_filename)

    # Save the segmented image
    cv2.imwrite(output_path, out.get_image()[:, :, ::-1])

print("Segmentation of all images completed.")

## Saving binary masks for each class

In [None]:
import os
import cv2
import numpy as np
import torch
from detectron2.utils.visualizer import Visualizer

# Directory path to the input images folder
input_images_directory = "path"

# Output directory where the segmented images will be saved
output_directory = "path"  

# Loop over the images in the input folder
for image_filename in os.listdir(input_images_directory):
    if not image_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue  # Skip non-image files

    image_path = os.path.join(input_images_directory, image_filename)
    new_im = cv2.imread(image_path)

    if new_im is None:
        print(f"Failed to load image: {image_path}")
        continue

    # Perform prediction on the new image
    outputs = predictor(new_im)  # Format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format

    # Check if predictions are available
    if len(outputs["instances"].pred_masks) == 0:
        print(f"No objects detected in image: {image_filename}")
        continue  # Skip to the next image if no objects are detected

    # Create a dictionary to store the mask for each class with unique integer labels
    class_masks = {class_name: torch.zeros_like(outputs["instances"].pred_masks[0], dtype=torch.uint8, device=torch.device("cpu"))
                   for class_name in train_metadata.thing_classes}

    # Assign a unique integer label to each object in the mask
    for i, pred_class in enumerate(outputs["instances"].pred_classes):
        class_name = train_metadata.thing_classes[pred_class]
        class_masks[class_name] = torch.where(outputs["instances"].pred_masks[i].to(device=torch.device("cpu")),
                                              i + 1,
                                              class_masks[class_name])

    # Save the masks for each class with unique integer labels
    for class_name, class_mask in class_masks.items():
        # Convert the tensor to a NumPy array and then to a regular (CPU) array
        class_mask_np = class_mask.cpu().numpy()

        # Create the output filename with _class_name_result.png extension
        class_filename = os.path.splitext(image_filename)[0] + f"_{class_name}_result.png"
        class_output_path = os.path.join(output_directory, class_filename)

        # Save the image with unique integer labels
        cv2.imwrite(class_output_path, class_mask_np.astype(np.uint8))

print("Segmentation of all images completed.")