In [1]:
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Install required packages
!pip install -q ultralytics
!pip install -q torch torchvision
!pip install -q pycocotools
!pip install -q opencv-python

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m87.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, ssd300_vgg16
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from ultralytics import YOLO
import cv2
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PyTorch version: 2.6.0+cu124
Torchvision version: 0.21.0+cu124
CUDA available: True
CUDA device: Tesla T4


In [4]:
# Download COCO128 dataset
!wget -q https://ultralytics.com/assets/coco128.zip
!unzip -q coco128.zip
!rm coco128.zip

# Set paths
COCO128_PATH = Path('coco128')
IMAGES_PATH = COCO128_PATH / 'images' / 'train2017'
LABELS_PATH = COCO128_PATH / 'labels' / 'train2017'

print(f"Images path: {IMAGES_PATH}")
print(f"Labels path: {LABELS_PATH}")
print(f"Number of images: {len(list(IMAGES_PATH.glob('*.jpg')))}")

Images path: coco128/images/train2017
Labels path: coco128/labels/train2017
Number of images: 128


In [5]:
# Create COCO format annotations for evaluation
def create_coco_annotations():
    """Convert YOLO format to COCO format for evaluation"""
    images = []
    annotations = []
    ann_id = 1
    
    for idx, img_path in enumerate(sorted(IMAGES_PATH.glob('*.jpg')), 1):
        img = cv2.imread(str(img_path))
        h, w = img.shape[:2]
        
        images.append({
            'id': idx,
            'file_name': img_path.name,
            'height': h,
            'width': w
        })
        
        # Read YOLO format labels
        label_path = LABELS_PATH / f"{img_path.stem}.txt"
        if label_path.exists():
            with open(label_path, 'r') as f:
                for line in f:
                    class_id, x_center, y_center, width, height = map(float, line.strip().split())
                    
                    # Convert YOLO format to COCO format (x, y, width, height)
                    x = (x_center - width / 2) * w
                    y = (y_center - height / 2) * h
                    box_w = width * w
                    box_h = height * h
                    
                    annotations.append({
                        'id': ann_id,
                        'image_id': idx,
                        'category_id': int(class_id) + 1,  # COCO categories start from 1
                        'bbox': [x, y, box_w, box_h],
                        'area': box_w * box_h,
                        'iscrowd': 0
                    })
                    ann_id += 1
    
    # COCO categories (80 classes)
    categories = [{'id': i, 'name': f'class_{i}'} for i in range(1, 81)]
    
    # Complete COCO format with required fields
    coco_format = {
        'info': {
            'description': 'COCO128 Dataset',
            'url': 'https://github.com/ultralytics/coco128',
            'version': '1.0',
            'year': 2024,
            'contributor': 'Ultralytics',
            'date_created': '2024/01/01'
        },
        'licenses': [{
            'id': 1,
            'name': 'Attribution-NonCommercial-ShareAlike License',
            'url': 'http://creativecommons.org/licenses/by-nc-sa/2.0/'
        }],
        'images': images,
        'annotations': annotations,
        'categories': categories
    }
    
    # Save annotations
    ann_file = COCO128_PATH / 'annotations.json'
    with open(ann_file, 'w') as f:
        json.dump(coco_format, f)
    
    return str(ann_file)

annotations_file = create_coco_annotations()
print(f"Created COCO annotations: {annotations_file}")

Created COCO annotations: coco128/annotations.json


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load YOLOv11 models
print("\nLoading YOLOv11 models...")
yolov11m = YOLO('yolo11m.pt')
yolov11l = YOLO('yolo11l.pt')

# Load Faster R-CNN
print("Loading Faster R-CNN...")
faster_rcnn = fasterrcnn_resnet50_fpn(pretrained=True)
faster_rcnn.to(device)
faster_rcnn.eval()

# Load SSD
print("Loading SSD...")
ssd = ssd300_vgg16(pretrained=True)
ssd.to(device)
ssd.eval()

print("\nAll models loaded successfully!")


Loading YOLOv11 models...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt': 100% ━━━━━━━━━━━━ 38.8MB 143.7MB/s 0.3s
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt to 'yolo11l.pt': 100% ━━━━━━━━━━━━ 49.0MB 192.6MB/s 0.3s
Loading Faster R-CNN...


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 166MB/s]


Loading SSD...


Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /root/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:00<00:00, 212MB/s]



All models loaded successfully!


In [7]:
def warm_up_model(model, model_type, num_iterations=10):
    """Warm up model for accurate timing"""
    dummy_input = torch.randn(1, 3, 640, 640).to(device)
    
    print(f"Warming up {model_type}...")
    for _ in range(num_iterations):
        if model_type.startswith('yolo'):
            model.predict(dummy_input, verbose=False)
        else:
            with torch.no_grad():
                model(dummy_input)
    
    if torch.cuda.is_available():
        torch.cuda.synchronize()

def measure_inference_time(model, model_type, image_paths, num_runs=3):
    """Measure inference time and FPS"""
    times = []
    
    for run in range(num_runs):
        start_time = time.time()
        
        for img_path in image_paths:
            img = cv2.imread(str(img_path))
            
            if model_type.startswith('yolo'):
                results = model.predict(img, verbose=False, conf=0.25)
            else:
                img_tensor = torchvision.transforms.functional.to_tensor(img).unsqueeze(0).to(device)
                with torch.no_grad():
                    predictions = model(img_tensor)
        
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        
        elapsed = time.time() - start_time
        times.append(elapsed)
    
    avg_time = np.mean(times)
    avg_time_per_image = avg_time / len(image_paths)
    fps = 1.0 / avg_time_per_image
    
    return avg_time_per_image, fps

In [8]:
def convert_yolo_to_coco_results(results, img_id):
    """Convert YOLO results to COCO format"""
    coco_results = []
    
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = float(box.conf[0])
            cls = int(box.cls[0])
            
            coco_results.append({
                'image_id': img_id,
                'category_id': cls + 1,
                'bbox': [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
                'score': conf
            })
    
    return coco_results

def convert_torchvision_to_coco_results(predictions, img_id):
    """Convert torchvision model results to COCO format"""
    coco_results = []
    
    boxes = predictions[0]['boxes'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    
    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box
        coco_results.append({
            'image_id': img_id,
            'category_id': int(label),
            'bbox': [float(x1), float(y1), float(x2 - x1), float(y2 - y1)],
            'score': float(score)
        })
    
    return coco_results

def calculate_map50(model, model_type, annotations_file):
    """Calculate mAP@0.5 for a model"""
    print(f"\nCalculating mAP@0.5 for {model_type}...")
    
    coco_gt = COCO(annotations_file)
    image_ids = sorted(coco_gt.getImgIds())
    
    all_results = []
    
    for img_id in image_ids:
        img_info = coco_gt.loadImgs(img_id)[0]
        img_path = IMAGES_PATH / img_info['file_name']
        img = cv2.imread(str(img_path))
        
        if model_type.startswith('yolo'):
            results = model.predict(img, verbose=False, conf=0.001)
            coco_results = convert_yolo_to_coco_results(results, img_id)
        else:
            img_tensor = torchvision.transforms.functional.to_tensor(img).unsqueeze(0).to(device)
            with torch.no_grad():
                predictions = model(img_tensor)
            coco_results = convert_torchvision_to_coco_results(predictions, img_id)
        
        all_results.extend(coco_results)
    
    if not all_results:
        return 0.0
    
    # Save results
    results_file = f'results_{model_type}.json'
    with open(results_file, 'w') as f:
        json.dump(all_results, f)
    
    # Evaluate
    coco_dt = coco_gt.loadRes(results_file)
    coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
    coco_eval.params.iouThrs = [0.5]  # mAP@0.5
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    
    map50 = coco_eval.stats[0]  # AP at IoU=0.5
    
    return map50

In [9]:
# Get list of images for timing
image_paths = sorted(list(IMAGES_PATH.glob('*.jpg')))
print(f"\nEvaluating on {len(image_paths)} images")

# Store results
results_data = {
    'Model': [],
    'mAP@0.5': [],
    'Inference Time (ms)': [],
    'FPS': []
}

models_to_evaluate = [
    (yolov11m, 'YOLOv11m'),
    (yolov11l, 'YOLOv11l'),
    (faster_rcnn, 'Faster R-CNN'),
    (ssd, 'SSD300')
]

for model, model_name in models_to_evaluate:
    print(f"\n{'='*60}")
    print(f"Evaluating {model_name}")
    print(f"{'='*60}")
    
    # Warm up
    warm_up_model(model, model_name.lower().replace(' ', '_').replace('-', '_'))
    
    # Measure inference time
    print(f"\nMeasuring inference time...")
    avg_time, fps = measure_inference_time(
        model, 
        model_name.lower().replace(' ', '_').replace('-', '_'),
        image_paths
    )
    
    # Calculate mAP@0.5
    map50 = calculate_map50(
        model,
        model_name.lower().replace(' ', '_').replace('-', '_'),
        annotations_file
    )
    
    # Store results
    results_data['Model'].append(model_name)
    results_data['mAP@0.5'].append(map50 * 100)  # Convert to percentage
    results_data['Inference Time (ms)'].append(avg_time * 1000)  # Convert to ms
    results_data['FPS'].append(fps)
    
    print(f"\nResults for {model_name}:")
    print(f"  mAP@0.5: {map50*100:.2f}%")
    print(f"  Inference Time: {avg_time*1000:.2f} ms")
    print(f"  FPS: {fps:.2f}")


Evaluating on 128 images

Evaluating YOLOv11m
Warming up yolov11m...

Measuring inference time...

Calculating mAP@0.5 for yolov11m...
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.04s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.29s).
Accumulating evaluation results...
DONE (t=0.11s).
 Average Precision  (AP) @[ IoU=0.50:0.50 | area=   all | maxDets=100 ] = 0.780
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.50 | area= small | maxDets=100 ] = 0.490
 Average Precision  (AP) @[ IoU=0.50:0.50 | area=medium | maxDets=100 ] = 0.807
 Average Precision  (AP) @[ IoU=0.50:0.50 | area= large | maxDets=100 ] = 0.943
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets=  1 ] = 0.535
 Averag

In [10]:
# Create results DataFrame
df_results = pd.DataFrame(results_data)
print("\n" + "="*60)
print("FINAL RESULTS SUMMARY")
print("="*60)
print(df_results.to_string(index=False))

# Save results to CSV
df_results.to_csv('model_comparison_results.csv', index=False)
print("\nResults saved to 'model_comparison_results.csv'")


FINAL RESULTS SUMMARY
       Model   mAP@0.5  Inference Time (ms)       FPS
    YOLOv11m 78.012838            21.299501 46.949457
    YOLOv11l 76.862296            24.552766 40.728608
Faster R-CNN  9.830760           113.412481  8.817372
      SSD300  7.920735            37.825399 26.437262

Results saved to 'model_comparison_results.csv'


In [11]:
print("\n" + "="*60)
print("DETAILED ANALYSIS")
print("="*60)

# Find best model for each metric
best_map = df_results.loc[df_results['mAP@0.5'].idxmax()]
best_fps = df_results.loc[df_results['FPS'].idxmax()]
best_time = df_results.loc[df_results['Inference Time (ms)'].idxmin()]

print(f"\nBest Accuracy (mAP@0.5): {best_map['Model']} with {best_map['mAP@0.5']:.2f}%")
print(f"Fastest (FPS): {best_fps['Model']} with {best_fps['FPS']:.2f} FPS")
print(f"Lowest Latency: {best_time['Model']} with {best_time['Inference Time (ms)']:.2f} ms")

# Calculate efficiency score (mAP / inference_time)
df_results['Efficiency Score'] = df_results['mAP@0.5'] / df_results['Inference Time (ms)']
best_efficiency = df_results.loc[df_results['Efficiency Score'].idxmax()]
print(f"\nBest Overall Efficiency: {best_efficiency['Model']}")



DETAILED ANALYSIS

Best Accuracy (mAP@0.5): YOLOv11m with 78.01%
Fastest (FPS): YOLOv11m with 46.95 FPS
Lowest Latency: YOLOv11m with 21.30 ms

Best Overall Efficiency: YOLOv11m
