# Step 5: Train + Evaluate

Train a detector on your labeled data and evaluate it properly. Understanding **where** the model fails tells you what to label next.

> **Note:** We train on `human_labels` and evaluate against `human_labels` on the val set. For this tutorial, we copy ground_truth to human_labels for val samples (filtered to schema classes) so we have labels to evaluate against. In production, you'd label the val set too.

In [None]:
!pip install -q ultralytics

In [None]:
import fiftyone as fo
from fiftyone import ViewField as F
import os

LABEL_FIELD = "human_labels"

dataset = fo.load_dataset("annotation_tutorial")

# Get schema classes from dataset info
if "annotation_schema" in dataset.info:
    SCHEMA_CLASSES = set(dataset.info["annotation_schema"]["classes"])
else:
    # Fallback
    SCHEMA_CLASSES = {
        "person", "car", "truck", "bus", "motorcycle", "bicycle",
        "dog", "cat", "bird", "horse",
        "chair", "couch", "dining table", "tv",
        "bottle", "cup", "bowl", "other"
    }

print(f"Schema classes: {len(SCHEMA_CLASSES)}")

## Get Training Data

**Important:** We only train on samples that actually have labels, not just samples tagged as "annotated".

In [None]:
# Training data: annotated samples WITH actual labels
annotated = dataset.match_tags("annotated:v0")

# Filter to only samples with detections
train_view = annotated.match(F(f"{LABEL_FIELD}.detections").length() > 0)

# Validation data
val_view = dataset.load_saved_view("val_set")

print(f"Annotated samples: {len(annotated)}")
print(f"Training samples (with labels): {len(train_view)}")
print(f"Validation samples: {len(val_view)}")

if len(train_view) == 0:
    print("\n>>> No training samples with labels. Complete Step 4 first.")

In [None]:
# For evaluation, we need human_labels on val set
# In production, you'd label these. For tutorial, we copy ground_truth
# FILTERED to schema classes for consistency

copied_count = 0
skipped_count = 0

for sample in val_view:
    if sample.ground_truth and not sample[LABEL_FIELD]:
        filtered_dets = []
        for d in sample.ground_truth.detections:
            if d.label in SCHEMA_CLASSES:
                filtered_dets.append(fo.Detection(
                    label=d.label,
                    bounding_box=d.bounding_box
                ))
                copied_count += 1
            else:
                skipped_count += 1
        sample[LABEL_FIELD] = fo.Detections(detections=filtered_dets)
        sample.save()

print(f"Val set prepared: {copied_count} detections copied, {skipped_count} skipped (not in schema)")

## Export for Training

In [None]:
if len(train_view) == 0:
    raise ValueError("No training samples. Complete Step 4 first.")

# Get classes from training data
classes = train_view.distinct(f"{LABEL_FIELD}.detections.label")
print(f"Classes in training data: {classes}")

export_dir = "/tmp/annotation_tutorial_yolo"
os.makedirs(export_dir, exist_ok=True)

In [None]:
# Export training data
train_view.export(
    export_dir=os.path.join(export_dir, "train"),
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=LABEL_FIELD,
    classes=classes,
)

# Export validation data
val_view.export(
    export_dir=os.path.join(export_dir, "val"),
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=LABEL_FIELD,
    classes=classes,
)

print(f"Exported {len(train_view)} train, {len(val_view)} val samples to {export_dir}")

In [None]:
# Create YAML config
yaml_content = f"""path: {export_dir}
train: train/images
val: val/images

names:
"""
for i, cls in enumerate(classes):
    yaml_content += f"  {i}: {cls}\n"

yaml_path = os.path.join(export_dir, "dataset.yaml")
with open(yaml_path, "w") as f:
    f.write(yaml_content)

print(f"Created {yaml_path}")

## Train YOLOv8

In [None]:
from ultralytics import YOLO

# Train (small model, few epochs for demo)
model = YOLO('yolov8n.pt')
results = model.train(
    data=yaml_path,
    epochs=10,
    imgsz=640,
    batch=8,
    name='tutorial_v0',
    project='/tmp/yolo_tutorial'
)

model_path = '/tmp/yolo_tutorial/tutorial_v0/weights/best.pt'
print(f"Model saved: {model_path}")

## Run Inference on Validation

In [None]:
# Load trained model
model = YOLO(model_path)

# Run inference on val set
for sample in val_view:
    results = model(sample.filepath, verbose=False)[0]
    
    detections = []
    if results.boxes is not None:
        for box in results.boxes:
            x1, y1, x2, y2 = box.xyxyn[0].tolist()
            conf = box.conf[0].item()
            cls_idx = int(box.cls[0].item())
            label = classes[cls_idx] if cls_idx < len(classes) else f"class_{cls_idx}"
            
            detections.append(fo.Detection(
                label=label,
                bounding_box=[x1, y1, x2-x1, y2-y1],
                confidence=conf
            ))
    
    sample["predictions"] = fo.Detections(detections=detections)
    sample.save()

print(f"Added predictions to {len(val_view)} val samples")

## Evaluate

In [None]:
# Run evaluation
results = val_view.evaluate_detections(
    "predictions",
    gt_field=LABEL_FIELD,
    eval_key="eval_v0",
    compute_mAP=True
)

print(f"mAP: {results.mAP():.3f}")
results.print_report()

## Analyze Failures

Understanding failures is more important than the mAP number.

In [None]:
# Find high-FN samples (model missed objects)
high_fn = val_view.sort_by("eval_v0_fn", reverse=True).limit(10)
high_fn.tag_samples("failure:high_fn")

# Find high-FP samples (model hallucinated)
high_fp = val_view.sort_by("eval_v0_fp", reverse=True).limit(10)
high_fp.tag_samples("failure:high_fp")

print(f"Tagged {len(high_fn)} high-FN samples")
print(f"Tagged {len(high_fp)} high-FP samples")

In [None]:
# View failures in App
session = fo.launch_app(val_view)

In the App:
1. Filter by `failure:high_fn` to see samples where model missed objects
2. Filter by `failure:high_fp` to see samples where model hallucinated
3. Look for patterns: specific classes? lighting conditions? object sizes?

These patterns tell you what to label next.

In [None]:
# Save evaluation info
dataset.info["eval_v0"] = {
    "mAP": results.mAP(),
    "train_samples": len(train_view),
    "val_samples": len(val_view),
    "model_path": model_path
}
dataset.save()

# Save failure view
failures = val_view.match_tags(["failure:high_fn", "failure:high_fp"])
dataset.save_view("eval_v0_failures", failures)

print(f"Saved {len(failures)} failure samples to view 'eval_v0_failures'")

## Summary

You trained and evaluated a model:
- Filtered to samples with actual labels (not just tagged as annotated)
- Exported in YOLO format
- Trained YOLOv8n for 10 epochs
- Evaluated with FiftyOne: mAP + per-sample FP/FN
- Tagged failure cases for next iteration

**Key insight:** The failure tags tell you what to label next.

**Next:** Step 6 - Iteration Loop