# Evaluating the Model

### Install Requirements

In [1]:
%%capture

!pip install fiftyone
!pip install pyzbar
!pip install opencv-python
!pip install seaborn
!pip install openpyxl

### Import Required Libraries

In [2]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.data.catalog import Metadata
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode

import os
import sys
import numpy as np
import pandas as pd
import fiftyone as fo
from PIL import Image, ImageOps
from PIL.ExifTags import TAGS
from pathlib import Path
from pyzbar.pyzbar import decode
from pyzbar.pyzbar import ZBarSymbol
from torchvision import transforms
from IPython.display import display


In [None]:
import torch
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T

class AspectRatioResizeMapper:
    """Custom mapper to resize images while keeping the aspect ratio."""
    
    def __init__(self, short_side_length=1000):
        self.transform = T.ResizeShortestEdge(short_side_length, short_side_length * 2, "range") 

    def __call__(self, dataset_dict):
        dataset_dict = dataset_dict.copy()  # Don't modify the original dataset
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        
        # Apply aspect ratio-preserving resize
        aug_input = T.AugInput(image)
        transforms = self.transform(aug_input)
        image = aug_input.image  # Resized image (NumPy array)

        # ✅ Convert NumPy array to a PyTorch tensor
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))  # (C, H, W) format

        # Update dataset dictionary
        dataset_dict["image"] = image
        return dataset_dict

In [37]:
from detectron2.data.datasets import load_coco_json
from detectron2.data import MetadataCatalog

def get_leaf_only_dataset(json_path, image_dir, dataset_name):
    """Loads COCO dataset but keeps only the 'leaf' class annotations."""

    dataset_dicts = load_coco_json(json_path, image_dir, dataset_name)

    # Dynamically get the correct index for "leaf"
    metadata = MetadataCatalog.get(dataset_name)
    leaf_class_index = metadata.thing_classes.index("leaf")
    
    print('leaf_class_index', leaf_class_index)

    filtered_dataset = []
    for image_dict in dataset_dicts:
        # Keep only annotations that belong to the "leaf" class
        leaf_annotations = [ann for ann in image_dict["annotations"] if ann["category_id"] == leaf_class_index]

        if leaf_annotations:  # Only keep images with at least one leaf
            image_dict = image_dict.copy()  # Avoid modifying the original dataset
            image_dict["annotations"] = leaf_annotations
            image_dict["categories"] = [{"id": leaf_class_index, "name": "leaf"}]  # Update category list
            filtered_dataset.append(image_dict)

    return filtered_dataset

In [3]:
# set this to today's date
today = "2024-03-15"

# modify output folder suffix if needed
suffix = "kfold_train"

# name of output folder
output_folder_name = today + "_" + suffix

images_folder = '/home/jovyan/work/data/2024-03-14_leaves'
output_folder = f'/home/jovyan/work/mask_rcnn/{output_folder_name}'

k=5

namespace(thing_classes=['leaf', 'qr', 'red-square'])

### Specify Model

In [45]:
from detectron2.evaluation import COCOEvaluator
from pycocotools.cocoeval import COCOeval
import copy

import numpy as np
import itertools
from tabulate import tabulate
from detectron2.utils.logger import create_small_table

class CustomCOCOEvaluator(COCOEvaluator):
    """Custom COCO Evaluator with modified small/medium/large thresholds."""
    
    def _evaluate_predictions_on_coco(self, coco_gt, coco_results, iou_type):
        """Override the function to modify small/medium/large thresholds."""
        
        # Call the original COCO evaluation
        coco_eval = super()._evaluate_predictions_on_coco(coco_gt, coco_results, iou_type)

        if coco_eval is None:
            return None

        # Modify thresholds (change here as needed)
        small_threshold = 10000   # Instead of 1024
        medium_threshold = 50000  # Instead of 9216

        # Create a copy of the evaluation results to modify thresholds
        coco_eval_copy = copy.deepcopy(coco_eval)

        # Adjust area thresholds in COCO evaluation
        coco_eval_copy.params.areaRng = [
            [0, small_threshold],      # Small
            [small_threshold, medium_threshold],  # Medium
            [medium_threshold, 1e10]   # Large
        ]
        coco_eval_copy.params.areaRngLbl = ["small", "medium", "large"]

        # Run evaluation again with new thresholds
        coco_eval_copy.evaluate()
        coco_eval_copy.accumulate()
        coco_eval_copy.summarize()

        return coco_eval_copy
    
    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
        """
        Derive detailed COCO evaluation metrics, including AP (0.5:0.95), AP50, AP75, APs, APm, and APl
        per class.

        Args:
            coco_eval (COCOEval): COCO evaluation object containing results.
            iou_type (str): Type of IoU metric (e.g., 'bbox', 'segm').
            class_names (list[str]): List of class names.

        Returns:
            dict: Dictionary containing AP metrics.
        """

        # Define the standard COCO metric names
        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
        }[iou_type]

        if coco_eval is None:
            self._logger.warn("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # Extract the default COCO AP metrics
        results = {
            metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan")
            for idx, metric in enumerate(metrics)
        }

        self._logger.info(
            "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
        )

        if not np.isfinite(sum(results.values())):
            self._logger.info("Some metrics cannot be computed and are shown as NaN.")

        if class_names is None or len(class_names) <= 1:
            return results  # Skip per-class AP computation if no class names

        # Extract per-class AP values using precision results
        precisions = coco_eval.eval["precision"]  # Shape: (IoU, Recall, Class, Area, Max Dets)

        assert len(class_names) == precisions.shape[2]
        assert coco_eval.params.iouThrs[0] == 0.5  # Ensure IoU=0.5 is first in the list

        results_per_category = []
        for idx, name in enumerate(class_names):
            # Compute AP (mean over IoUs 0.5:0.95)
            precision_all = precisions[:, :, idx, 0, -1]  # IoU=[0.5:0.95], all object sizes
            precision_all = precision_all[precision_all > -1]
            ap = np.mean(precision_all) * 100 if precision_all.size else float("nan")

            # Compute AP for IoU=0.5 (AP50)
            precision_50 = precisions[0, :, idx, 0, -1]  # IoU=0.5, all object sizes
            precision_50 = precision_50[precision_50 > -1]
            ap50 = np.mean(precision_50) * 100 if precision_50.size else float("nan")

            # Compute AP for IoU=0.75 (AP75)
            precision_75 = precisions[5, :, idx, 0, -1]  # IoU=0.75, all object sizes
            precision_75 = precision_75[precision_75 > -1]
            ap75 = np.mean(precision_75) * 100 if precision_75.size else float("nan")

            # Compute AP for small, medium, and large objects
            precision_s = precisions[0, :, idx, 1, -1]  # Small objects
            precision_m = precisions[0, :, idx, 2, -1]  # Medium objects
            precision_l = precisions[0, :, idx, 3, -1]  # Large objects

            ap_s = np.mean(precision_s[precision_s > -1]) * 100 if precision_s.size else float("nan")
            ap_m = np.mean(precision_m[precision_m > -1]) * 100 if precision_m.size else float("nan")
            ap_l = np.mean(precision_l[precision_l > -1]) * 100 if precision_l.size else float("nan")

            results_per_category.append((name, ap, ap50, ap75, ap_s, ap_m, ap_l))

        # Tabulate per-category results
        headers = ["category", "AP", "AP50", "AP75", "APs", "APm", "APl"]
        results_flatten = list(itertools.chain(*results_per_category))
        results_2d = itertools.zip_longest(*[results_flatten[i::7] for i in range(7)])
        table = tabulate(results_2d, tablefmt="pipe", floatfmt=".3f", headers=headers, numalign="left")

        self._logger.info("Per-category {} AP (AP, AP50, AP75, APs, APm, APl): \n".format(iou_type) + table)

        # Store per-class AP in the results dictionary
        for name, ap, ap50, ap75, ap_s, ap_m, ap_l in results_per_category:
            results.update({
                f"AP-{name}": ap,
                f"AP50-{name}": ap50,
                f"AP75-{name}": ap75,
                f"APs-{name}": ap_s,
                f"APm-{name}": ap_m,
                f"APl-{name}": ap_l,
            })

        return results




In [48]:
import numpy as np
from PIL import Image, ImageOps


from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data.catalog import Metadata


# Assuming leaf_predictor is defined elsewhere and dataset is an iterable of samples
datasets = {}

def maybe_unregister(name):
    if name in DatasetCatalog.list():
        DatasetCatalog.remove(name)
    if name in MetadataCatalog.list():
        MetadataCatalog.remove(name)
    
        
segm_ap_list = []
segm_ap_50_list = []
segm_ap_75_list = []
segm_ap_small_list = []
segm_ap_medium_list = []
segm_ap_large_list = []


for fold in range(0, 5):
    
    # path to model to evaluate
    model_path = f"{output_folder}/fold_{fold}/model_final.pth"
    dataset_name = f"test_{fold}"
    coco_json_path = f"{output_folder}/test_{fold}.json"
    maybe_unregister(dataset_name)

    register_coco_instances(dataset_name, {}, coco_json_path, images_folder)
    
    
    leaf_cfg = get_cfg()
    leaf_cfg.MODEL.DEVICE='cpu'
    leaf_cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    leaf_cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
    leaf_cfg.MODEL.WEIGHTS = model_path # path to trained weights
    leaf_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set a custom testing threshold

    leaf_predictor = DefaultPredictor(leaf_cfg)
    val_loader = build_detection_test_loader(leaf_cfg, dataset_name)
    
    evaluator = CustomCOCOEvaluator(dataset_name, ("segm","bbox"), False, output_dir="./output")
    results = inference_on_dataset(leaf_predictor.model, val_loader, evaluator)
    print(results)
    
    segm_results = results["segm"]
    
    # Extract segmentation AP for "leaf"
    segm_ap_list.append(results["segm"]["AP-leaf"])
    segm_ap_50_list.append(results["segm"]["AP50-leaf"])
    segm_ap_75_list.append(results["segm"]["AP75-leaf"])
    segm_ap_small_list.append(results["segm"]["APs-leaf"])
    segm_ap_medium_list.append(results["segm"]["APm-leaf"])
    segm_ap_large_list.append(results["segm"]["APl-leaf"])

avg_segm_ap = np.mean(segm_ap_list)
avg_segm_ap_50 = np.mean(segm_ap_50_list)
avg_segm_ap_75 = np.mean(segm_ap_75_list)
avg_segm_ap_small = np.mean(segm_ap_small_list)
avg_segm_ap_medium = np.mean(segm_ap_medium_list)
avg_segm_ap_large = np.mean(segm_ap_large_list)

print("\n===== Average Segmentation AP Across 5 Folds =====")
print(f"Avg AP@0.5:0.95: {avg_segm_ap:.2f}")
print(f"Avg AP@0.5: {avg_segm_ap_50:.2f}")
print(f"Avg AP@0.75: {avg_segm_ap_75:.2f}")
print(f"Avg AP (Small): {avg_segm_ap_small:.2f}")
print(f"Avg AP (Medium): {avg_segm_ap_medium:.2f}")
print(f"Avg AP (Large): {avg_segm_ap_large:.2f}")
    
        

    
    
    
    


    

[32m[02/01 02:32:29 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/fold_0/model_final.pth ...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[02/01 02:32:29 d2.data.datasets.coco]: [0mLoaded 36 images in COCO format from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/test_0.json
[32m[02/01 02:32:29 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[02/01 02:32:29 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[02/01 02:32:29 d2.data.common]: [0mSerializing 36 elements to byte tensors and concatenating them all ...
[32m[02/01 02:32:29 d2.data.common]: [0mSerialized dataset takes 0.20 MiB
[32m[02/01 02:32:29 d2.evaluation.evaluator]: [0mStart inference on 36 batches


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[32m[02/01 02:33:48 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/fold_1/model_final.pth ...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[02/01 02:33:48 d2.data.datasets.coco]: [0mLoaded 35 images in COCO format from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/test_1.json
[32m[02/01 02:33:48 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[02/01 02:33:48 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[02/01 02:33:48 d2.data.common]: [0mSerializing 35 elements to byte tensors and concatenating them all ...
[32m[02/01 02:33:48 d2.data.common]: [0mSerialized dataset takes 0.12 MiB
[32m[02/01 02:33:48 d2.evaluation.evaluator]: [0mStart inference on 35 batches


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[32m[02/01 02:35:13 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/fold_2/model_final.pth ...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[02/01 02:35:13 d2.data.datasets.coco]: [0mLoaded 35 images in COCO format from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/test_2.json
[32m[02/01 02:35:13 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[02/01 02:35:13 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[02/01 02:35:13 d2.data.common]: [0mSerializing 35 elements to byte tensors and concatenating them all ...
[32m[02/01 02:35:13 d2.data.common]: [0mSerialized dataset takes 0.16 MiB
[32m[02/01 02:35:13 d2.evaluation.evaluator]: [0mStart inference on 35 batches


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[32m[02/01 02:36:25 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/fold_3/model_final.pth ...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[02/01 02:36:25 d2.data.datasets.coco]: [0mLoaded 35 images in COCO format from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/test_3.json
[32m[02/01 02:36:25 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[02/01 02:36:25 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[02/01 02:36:25 d2.data.common]: [0mSerializing 35 elements to byte tensors and concatenating them all ...
[32m[02/01 02:36:25 d2.data.common]: [0mSerialized dataset takes 0.15 MiB
[32m[02/01 02:36:25 d2.evaluation.evaluator]: [0mStart inference on 35 batches


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[32m[02/01 02:37:40 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/fold_4/model_final.pth ...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[02/01 02:37:41 d2.data.datasets.coco]: [0mLoaded 35 images in COCO format from /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/test_4.json
[32m[02/01 02:37:41 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[02/01 02:37:41 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[02/01 02:37:41 d2.data.common]: [0mSerializing 35 elements to byte tensors and concatenating them all ...
[32m[02/01 02:37:41 d2.data.common]: [0mSerialized dataset takes 0.16 MiB
[32m[02/01 02:37:41 d2.evaluation.evaluator]: [0mStart inference on 35 batches


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


There are several output folders from training multiple models.  

In [50]:
import pandas as pd

# Create DataFrame
df = pd.DataFrame({
    "Fold": list(range(1, 6)) + ["Average"],
    "Segm AP@0.5:0.95": segm_ap_list + [avg_segm_ap],
    "Segm AP@0.5": segm_ap_50_list + [avg_segm_ap_50],
    "Segm AP@0.75": segm_ap_75_list + [avg_segm_ap_75],
    "Segm AP (Small)": segm_ap_small_list + [avg_segm_ap_small],
    "Segm AP (Medium)": segm_ap_medium_list + [avg_segm_ap_medium],
    "Segm AP (Large)": segm_ap_large_list + [avg_segm_ap_large],
})

# Save to CSV
csv_path = f"{output_folder}/leaf_mAP_results.csv"
df.to_csv(csv_path, index=False)

print(f"\n===== Average Segmentation AP for Leaf Across 5 Folds =====")
print(df)
print(f"\nResults saved to {csv_path}")




===== Average Segmentation AP for Leaf Across 5 Folds =====
      Fold  Segm AP@0.5:0.95  Segm AP@0.5  Segm AP@0.75  Segm AP (Small)  \
0        1         50.699781    71.556017     58.240435              0.0   
1        2         50.362510    80.443395     54.372890              0.0   
2        3         54.437614    77.499942     61.451514              0.0   
3        4         47.301129    72.175564     52.612351              0.0   
4        5         42.204802    68.695043     47.479718              0.0   
5  Average         49.001167    74.073992     54.831381              0.0   

   Segm AP (Medium)  Segm AP (Large)  
0         10.396040        90.442620  
1         50.060192        93.972560  
2         43.889723        90.360239  
3         16.390424        90.477845  
4         27.906394        83.339819  
5         29.728555        89.718617  

Results saved to /home/jovyan/work/mask_rcnn/2024-03-15_kfold_train/leaf_mAP_results.csv
