

# Leaf Segmentation with <img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="200">

Trains a detectron2 model on Superfund leaf dataset.



# Setup

### Install requirements

In [1]:
%%capture

# requirements
!pip install pyyaml==5.1
!pip install opencv-python
!pip install matplotlib
!pip install openpyxl

# cocosplit requirements
!pip install sklearn funcy argparse scikit-multilearn

### Import Detectron2 and other Python Libraries

In [2]:
import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

from matplotlib import pyplot as plt

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

torch:  2.0 ; cuda:  cu117


# Prepare the Data

### Split dataset into train and test sets for k-fold cross validation

To make better use our small dataset (~176 images), we use k-fold cross validation.

First, we add some variables to specify where output from this training run should be stored, including the split k-fold test/train annotations and final model weights.  Set `today` to today's date and set `suffix` to specify how you'd like your output folder named (defaults to `kfold_train`).

In [3]:
today = "2023-09-11"

suffix = "kfold_train_2"

output_folder = today + '_' + suffix
annoation_file = 'data/2023-09-11_leaves.json'

Next set the number of folds for k-fold cross validation (defaults to k=5, which is pretty standard).

In [4]:
# number of folds for k-fold cross validation
k = 5

Next we run the `cocosplit_kfold` script to split the images into k folds and get the training and test sets for each iteration of k-fold cross validation. You can add the `-p` flag if you'd like to see the train/test indicies for each k-fold iteration to verify this is working as expected.

In [5]:
# shuffle and randomly split into k folds
run = f"python ./cocosplit_kfold.py\
         -k {k}\
         {annoation_file}\
         /home/jovyan/work/{output_folder}"

!{run}

Traceback (most recent call last):
  File "/home/jovyan/work/./cocosplit_kfold.py", line 7, in <module>
    import funcy
ModuleNotFoundError: No module named 'funcy'


Now, in the output folder, you should see JSON files containing the annotations for the training & test set for each iteration of k-fold cross validation.  For example `train_0.json` and `test_0.json` respectively contain the training and test annotations for the first training iteration.

### Register the dataset with Detectron2

Register the leaf dataset to detectron2, following the [detectron2 custom dataset tutorial](https://detectron2.readthedocs.io/tutorials/datasets.html).


In [6]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
from detectron2.data.datasets import register_coco_instances

for fold in range(0, k):

    register_coco_instances(f"train_{fold}", {}, f"/home/jovyan/work/{output_folder}/train_{fold}.json", "/home/jovyan/work/data/")
    register_coco_instances(f"val_{fold}", {}, f"/home/jovyan/work/{output_folder}/test_{fold}.json", "/home/jovyan/work/data/")



To verify the data loading is correct, let's visualize the annotations of randomly selected samples in the training set:



In [1]:
leaf_metadata = MetadataCatalog.get("train_0")
dataset_dicts = DatasetCatalog.get("train_0")

print(leaf_metadata)

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=leaf_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.show()


NameError: name 'MetadataCatalog' is not defined

# Time to Train!

Now, let's fine-tune a COCO-pretrained R50-FPN Mask R-CNN model on the leaf dataset.


In [8]:
import os

import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import DatasetMapper, MetadataCatalog, build_detection_train_loader
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch
from detectron2.evaluation import CityscapesSemSegEvaluator, DatasetEvaluators, SemSegEvaluator
from detectron2.projects.deeplab import add_deeplab_config, build_lr_scheduler


def build_sem_seg_train_aug(cfg):
    augs = [
        T.ResizeShortestEdge(
            cfg.INPUT.MIN_SIZE_TRAIN,
            cfg.INPUT.MAX_SIZE_TRAIN,
            cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
        ),
        T.RandomFlip(),
        T.RandomBrightness(0.8, 1.2)
    ]

    return augs


class AugmentedTrainer(DefaultTrainer):
    """
    We use the "DefaultTrainer" which contains a number pre-defined logic for
    standard training workflow. They may not work for you, especially if you
    are working on a new research project. In that case you can use the cleaner
    "SimpleTrainer", or write your own training loop.
    """

    @classmethod
    def build_train_loader(cls, cfg):
        
        print('Building custom train loader...')
        mapper = DatasetMapper(cfg, is_train=True, augmentations=build_sem_seg_train_aug(cfg))
        return build_detection_train_loader(cfg, mapper=mapper)



In [None]:
from detectron2.engine import DefaultTrainer

for fold in range(1, 5):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    
    cfg.DATASETS.TRAIN = (f"train_{fold}",)
    cfg.DATASETS.TEST = ()
    
    cfg.DATALOADER.NUM_WORKERS = 2
    
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
    
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
    cfg.SOLVER.MAX_ITER = 5000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
    cfg.SOLVER.STEPS = []        # do not decay learning rate
    
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  # leaf dataset has 3 classes (leaf, qr, red-square)
    
    cfg.OUTPUT_DIR = f'./{output_folder}/fold_{fold}'
    # cfg.INPUT.CROP.ENABLED = True
    
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = AugmentedTrainer(cfg) 
    trainer.resume_or_load(resume=False)
    trainer.train()


[32m[09/11 23:18:39 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) 

[32m[09/11 23:18:39 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[09/11 23:19:03 d2.utils.events]: [0m eta: 1:30:34  iter: 19  total_loss: 3.622  loss_cls: 1.48  loss_box_reg: 0.1922  loss_mask: 0.6907  loss_rpn_cls: 1.135  loss_rpn_loc: 0.1465    time: 1.1655  last_time: 1.1077  data_time: 0.9124  last_data_time: 0.8965   lr: 4.9953e-06  max_mem: 2345M
[32m[09/11 23:19:25 d2.utils.events]: [0m eta: 1:29:12  iter: 39  total_loss: 3.533  loss_cls: 1.33  loss_box_reg: 0.2528  loss_mask: 0.6863  loss_rpn_cls: 1.143  loss_rpn_loc: 0.1867    time: 1.0870  last_time: 1.6437  data_time: 0.8085  last_data_time: 1.4538   lr: 9.9902e-06  max_mem: 2345M
[32m[09/11 23:19:46 d2.utils.events]: [0m eta: 1:28:56  iter: 59  total_loss: 2.878  loss_cls: 1.075  loss_box_reg: 0.3617  loss_mask: 0.6809  loss_rpn_cls: 0.583  loss_rpn_loc: 0.1809    time: 1.0788  last_time: 1.0728  data_time: 0.8656  last_data_time: 0.8800   lr: 1.4985e-05  max_mem: 2345M
[32m[09/11 23:20:09 d2.utils.events]: [0m eta: 1:28:33  iter: 79  total_loss: 2.234  loss_cls: 0.8188  los

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) 

[32m[09/12 00:50:53 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[09/12 00:51:16 d2.utils.events]: [0m eta: 1:31:37  iter: 19  total_loss: 4.055  loss_cls: 1.418  loss_box_reg: 0.1982  loss_mask: 0.6921  loss_rpn_cls: 1.547  loss_rpn_loc: 0.1961    time: 1.1232  last_time: 0.9250  data_time: 0.9737  last_data_time: 0.7691   lr: 4.9953e-06  max_mem: 2757M
[32m[09/12 00:51:38 d2.utils.events]: [0m eta: 1:31:51  iter: 39  total_loss: 3.277  loss_cls: 1.279  loss_box_reg: 0.2255  loss_mask: 0.6885  loss_rpn_cls: 0.955  loss_rpn_loc: 0.1791    time: 1.1165  last_time: 1.1235  data_time: 0.9299  last_data_time: 0.9475   lr: 9.9902e-06  max_mem: 2757M
[32m[09/12 00:52:02 d2.utils.events]: [0m eta: 1:31:45  iter: 59  total_loss: 2.848  loss_cls: 1.054  loss_box_reg: 0.345  loss_mask: 0.6817  loss_rpn_cls: 0.5133  loss_rpn_loc: 0.129    time: 1.1412  last_time: 1.1568  data_time: 0.9961  last_data_time: 0.9432   lr: 1.4985e-05  max_mem: 2776M
[32m[09/12 00:52:26 d2.

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) 

[32m[09/12 02:23:48 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[09/12 02:24:11 d2.utils.events]: [0m eta: 1:31:22  iter: 19  total_loss: 3.933  loss_cls: 1.39  loss_box_reg: 0.2039  loss_mask: 0.6966  loss_rpn_cls: 1.314  loss_rpn_loc: 0.151    time: 1.1320  last_time: 1.1912  data_time: 0.9696  last_data_time: 0.9739   lr: 4.9953e-06  max_mem: 3095M
[32m[09/12 02:24:33 d2.utils.events]: [0m eta: 1:30:45  iter: 39  total_loss: 3.12  loss_cls: 1.249  loss_box_reg: 0.2536  loss_mask: 0.6928  loss_rpn_cls: 0.8199  loss_rpn_loc: 0.1305    time: 1.1130  last_time: 1.0775  data_time: 0.9079  last_data_time: 0.8777   lr: 9.9902e-06  max_mem: 3095M
[32m[09/12 02:24:55 d2.utils.events]: [0m eta: 1:30:26  iter: 59  total_loss: 2.653  loss_cls: 1.025  loss_box_reg: 0.311  loss_mask: 0.6858  loss_rpn_cls: 0.5209  loss_rpn_loc: 0.1283    time: 1.1065  last_time: 1.1505  data_time: 0.9056  last_data_time: 0.9399   lr: 1.4985e-05  max_mem: 3095M
[32m[09/12 02:25:17 d2.u

In [None]:
!pip install jupyter-tensorboard

In [None]:


# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir ./{output_folder}/fold_{fold} --port 6007

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set a custom testing threshold
predictor = DefaultPredictor(cfg)

Then, we randomly select several samples to visualize the prediction results.

In [None]:
from detectron2.utils.visualizer import ColorMode

dataset_dicts = DatasetCatalog.get("val")
leaf_metadata = MetadataCatalog.get("val")

for d in random.sample(dataset_dicts, 10):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=leaf_metadata, 
                   # scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.show()