In [11]:
import os
from PIL import Image, ImageEnhance
import torchvision.transforms.functional as TF
import torch
from torchvision.ops import nms
from inference_sdk import InferenceHTTPClient
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from coco_eval import CocoEvaluator
import numpy as np
import random


In [3]:
# Dummy paths - replace these
BASE_DIR = "./Road-Damage-Indonesia-4/test"
ANNOTATION_FILE = "./Road-Damage-Indonesia-4/test/_annotations.coco.json"
IMAGE_DIR = BASE_DIR

# Replace with your Roboflow API info
API_KEY = "pt6GF9eJQFZMOFNFL7sc"
MODEL_ID = "road-damage-indonesia-fou2m/2"

CLIENT = InferenceHTTPClient(
    api_url = "https://detect.roboflow.com",
    api_key = API_KEY
)

In [4]:
coco_gt = COCO(ANNOTATION_FILE)
image_ids = coco_gt.getImgIds()
image_paths = [os.path.join(IMAGE_DIR, coco_gt.loadImgs(i)[0]['file_name']) for i in image_ids[:10]]
image_paths

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


['./Road-Damage-Indonesia-4/test\\X_27330_jpeg.rf.b1af74b216d0542841dd23ca817fdc71.jpg',
 './Road-Damage-Indonesia-4/test\\X_27340_jpeg.rf.4af9e108a84c2761e5fb60b44f0bbdf2.jpg',
 './Road-Damage-Indonesia-4/test\\X_9320_jpeg.rf.bae4afd615668c06c2c174cc14d8de6f.jpg',
 './Road-Damage-Indonesia-4/test\\X_2810_jpeg.rf.3e8d39d3e615feef9df9ed3f3da06473.jpg',
 './Road-Damage-Indonesia-4/test\\Y_17270_jpeg.rf.10b9aae5082f769ca285bea4e1985f92.jpg',
 './Road-Damage-Indonesia-4/test\\Y_31920_jpeg.rf.6de7e47078672dac0d47dc8a8e4e8c48.jpg',
 './Road-Damage-Indonesia-4/test\\X_6210_jpeg.rf.75e90db30e3482f51d92509585eb2035.jpg',
 './Road-Damage-Indonesia-4/test\\Y_27220_jpeg.rf.fac59348b40dc5052e7931ef5b9c7272.jpg',
 './Road-Damage-Indonesia-4/test\\Y_31940_jpeg.rf.d467a1dec0c41c989650f91336e343c7.jpg',
 './Road-Damage-Indonesia-4/test\\Y_29650_jpeg.rf.a3f36abccfaa32ba8b6d870c5c372349.jpg']

In [5]:
# Build class name to ID map
categories = coco_gt.loadCats(coco_gt.getCatIds())
CLASS_NAME_TO_ID = {cat['name']: cat['id'] for cat in categories}

In [13]:
import torch
import torchvision.transforms.functional as TF
from PIL import Image, ImageEnhance
from torchvision.ops import nms
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np

# ----------------------------
# AUGMENTATION FUNCTIONS
# ----------------------------
def get_augmented_images(img):
    img = img.convert("RGB")
    img_tensor = TF.to_tensor(img)
    augmentations = []

    augmentations.append(("original", img_tensor))
    augmentations.append(("hflip", torch.flip(img_tensor, dims=[2])))

    small = TF.resize(img_tensor, [int(img_tensor.shape[1] * 0.75), int(img_tensor.shape[2] * 0.75)])
    small = TF.resize(small, [img_tensor.shape[1], img_tensor.shape[2]])
    augmentations.append(("scale_down", small))

    large = TF.resize(img_tensor, [int(img_tensor.shape[1] * 1.25), int(img_tensor.shape[2] * 1.25)])
    large = TF.resize(large, [img_tensor.shape[1], img_tensor.shape[2]])
    augmentations.append(("scale_up", large))

    bright = TF.to_tensor(ImageEnhance.Brightness(TF.to_pil_image(img_tensor)).enhance(1.5))
    augmentations.append(("bright", bright))

    contrast = TF.to_tensor(ImageEnhance.Contrast(TF.to_pil_image(img_tensor)).enhance(1.3))
    augmentations.append(("contrast", contrast))

    rotate_p = TF.to_tensor(
        TF.rotate(TF.to_pil_image(img_tensor), 15, expand=True).resize((img_tensor.shape[2], img_tensor.shape[1]))
    )
    augmentations.append(("rotate_15", rotate_p))

    rotate_m = TF.to_tensor(
        TF.rotate(TF.to_pil_image(img_tensor), -15, expand=True).resize((img_tensor.shape[2], img_tensor.shape[1]))
    )
    augmentations.append(("rotate_-15", rotate_m))

    return augmentations

# ----------------------------
# PREDICTION UTILITIES
# ----------------------------
def undo_horizontal_flip(preds, width):
    for pred in preds.get("predictions", []):
        pred["x"] = width - pred["x"]
    return preds

def aggregate_predictions(preds_list, iou_thresh=0.5):
    all_boxes, all_scores, all_labels = [], [], []
    for preds in preds_list:
        for pred in preds.get("predictions", []):
            x1 = pred["x"] - pred["width"] / 2
            y1 = pred["y"] - pred["height"] / 2
            x2 = pred["x"] + pred["width"] / 2
            y2 = pred["y"] + pred["height"] / 2
            box = [x1, y1, x2, y2]
            all_boxes.append(box)
            all_scores.append(pred["confidence"])
            all_labels.append(pred["class"])

    if not all_boxes:
        return []

    boxes = torch.tensor(all_boxes)
    scores = torch.tensor(all_scores)
    labels = list(all_labels)
    keep = nms(boxes, scores, iou_thresh)

    return [{
        "bbox": boxes[i].tolist(),
        "score": scores[i].item(),
        "label": labels[i]
    } for i in keep]

def coco_xyxy_to_xywh(bbox):
    x1, y1, x2, y2 = bbox
    return [x1, y1, x2 - x1, y2 - y1]

# ----------------------------
# MAIN INFERENCE + EVALUATION
# ----------------------------

# TODO: Replace these with your actual setup
# coco_gt = COCO("path_to_groundtruth.json")
# image_paths = [...]  # list of image file paths
# image_ids = [...]    # corresponding COCO image IDs
# MODEL_ID = "your-yolov12-model-id"
# CLASS_NAME_TO_ID = {"cat": 1, "dog": 2, ...}  # your label map
# CLIENT = your inference client (must have .infer(image, model_id) -> dict)

all_coco_formatted = []

for idx, image_path in enumerate(image_paths):
    image_id = image_ids[idx]

    try:
        original_img = Image.open(image_path)
    except Exception as e:
        print(f"Error opening {image_path}: {e}")
        continue

    tta_images = get_augmented_images(original_img)
    img_width = original_img.width
    all_predictions = []

    for aug_type, aug_tensor in tta_images:
        aug_pil = TF.to_pil_image(aug_tensor)
        result = CLIENT.infer(aug_pil, model_id=MODEL_ID)

        if aug_type == "hflip":
            result = undo_horizontal_flip(result, img_width)

        all_predictions.append(result)

    merged = aggregate_predictions(all_predictions)

    for obj in merged:
        label_name = obj["label"]
        if label_name not in CLASS_NAME_TO_ID:
            continue
        all_coco_formatted.append({
            "image_id": image_id,
            "category_id": CLASS_NAME_TO_ID[label_name],
            "bbox": coco_xyxy_to_xywh(obj["bbox"]),
            "score": obj["score"]
        })

# ----------------------------
# COCO Evaluation
# ----------------------------
if len(all_coco_formatted) > 0:
    coco_dt = coco_gt.loadRes(all_coco_formatted)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.params.imgIds = image_ids[:len(image_paths)]
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    precision = coco_eval.eval['precision']
    precision_50 = precision[0, :, -1, 0, 2]
    valid = precision_50[precision_50 > -1]
    mean_precision_50 = np.mean(valid)
    print(f"Precision @ IoU=0.50: {mean_precision_50:.3f}")
else:
    print("No valid predictions to evaluate.")


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.00s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.129
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.265
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.080
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.300
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.094
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.385
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.144
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.231
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.231
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10