# Elk Detection and Counting Analysis

This notebook analyzes the `grassland.mp4` video file using YOLO object detection to identify and count elk.

## Objectives:
- Load and analyze the grassland video
- Use YOLO to detect animals in each frame
- Filter detections to focus on elk/deer-like animals
- Count and track elk throughout the video
- Visualize results and generate summary statistics

## Setup and Imports

In [None]:
import sys
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from collections import defaultdict, Counter
import seaborn as sns
from tqdm import tqdm

# Add project root to path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

# Import our YOLO detector
from src.detection.yolo_detector import YOLODetector

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

## Configuration and Constants

In [None]:
# File paths
VIDEO_PATH = "../data/raw/grassland.mp4"
MODEL_PATH = "../yolov8n.pt"
OUTPUT_DIR = "../results/elk_analysis/"

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Detection parameters
CONFIDENCE_THRESHOLD = 0.3  # Lower threshold to catch more potential elk
IOU_THRESHOLD = 0.5

# COCO classes that might represent elk/deer/animals
# Class 0: person, 15: cat, 16: dog, 17: horse, 18: sheep, 19: cow, etc.
# We'll focus on larger mammals that could be elk
POTENTIAL_ELK_CLASSES = [17, 18, 19, 20, 21, 22, 23]  # horse, sheep, cow, elephant, bear, zebra, giraffe
ANIMAL_CLASS_NAMES = ['horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe']

print(f"Video path: {VIDEO_PATH}")
print(f"Model path: {MODEL_PATH}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Target animal classes: {ANIMAL_CLASS_NAMES}")

## Initialize YOLO Detector

In [None]:
# Initialize YOLO detector
detector = YOLODetector(
    model_path=MODEL_PATH,
    device="auto",
    confidence_threshold=CONFIDENCE_THRESHOLD,
    iou_threshold=IOU_THRESHOLD,
    target_classes=None  # We'll filter manually to be more flexible
)

# Print model info
model_info = detector.get_model_info()
print("Model Information:")
for key, value in model_info.items():
    print(f"  {key}: {value}")

print(f"\nAll available classes: {list(detector.class_names.values())}")

## Video Analysis Setup

In [None]:
# Open video file
cap = cv2.VideoCapture(VIDEO_PATH)

if not cap.isOpened():
    raise ValueError(f"Could not open video file: {VIDEO_PATH}")

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
duration = frame_count / fps

print(f"Video Properties:")
print(f"  Resolution: {width}x{height}")
print(f"  FPS: {fps:.2f}")
print(f"  Total frames: {frame_count}")
print(f"  Duration: {duration:.2f} seconds")

# Sample every N frames to speed up analysis
FRAME_SKIP = max(1, int(fps // 2))  # Sample ~2 frames per second
print(f"  Analyzing every {FRAME_SKIP} frames")

## Frame-by-Frame Analysis

In [None]:
# Storage for results
detection_results = []
frame_data = []
sample_frames = []

frame_idx = 0
analyzed_frames = 0

print("Starting video analysis...")
pbar = tqdm(total=frame_count//FRAME_SKIP, desc="Processing frames")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Skip frames for faster processing
    if frame_idx % FRAME_SKIP != 0:
        frame_idx += 1
        continue
    
    # Run YOLO detection
    detections = detector.detect(frame)
    
    # Filter for potential elk/animal detections
    animal_detections = []
    for i, class_id in enumerate(detections['class_ids']):
        class_name = detections['class_names'][i]
        
        # Check if this could be an elk (large mammal)
        if (class_id in POTENTIAL_ELK_CLASSES or 
            any(animal in class_name.lower() for animal in ['horse', 'cow', 'sheep', 'deer', 'elk', 'animal'])):
            
            animal_detections.append({
                'frame_idx': frame_idx,
                'timestamp': frame_idx / fps,
                'bbox': detections['boxes'][i],
                'confidence': detections['scores'][i],
                'class_id': class_id,
                'class_name': class_name,
                'area': (detections['boxes'][i][2] - detections['boxes'][i][0]) * 
                       (detections['boxes'][i][3] - detections['boxes'][i][1])
            })
    
    # Store frame data
    frame_info = {
        'frame_idx': frame_idx,
        'timestamp': frame_idx / fps,
        'total_detections': len(detections['boxes']),
        'animal_detections': len(animal_detections),
        'elk_candidates': len(animal_detections)  # For now, treat all animals as potential elk
    }
    
    frame_data.append(frame_info)
    detection_results.extend(animal_detections)
    
    # Save some sample frames with detections for visualization
    if len(animal_detections) > 0 and len(sample_frames) < 10:
        sample_frames.append((frame.copy(), animal_detections, frame_idx))
    
    analyzed_frames += 1
    frame_idx += 1
    pbar.update(1)

pbar.close()
cap.release()

print(f"\nAnalysis complete!")
print(f"Analyzed {analyzed_frames} frames")
print(f"Found {len(detection_results)} potential elk detections")
print(f"Saved {len(sample_frames)} sample frames")

## Results Analysis

In [None]:
# Convert results to DataFrames for analysis
df_detections = pd.DataFrame(detection_results)
df_frames = pd.DataFrame(frame_data)

print("Detection Summary:")
if len(df_detections) > 0:
    print(f"Total detections: {len(df_detections)}")
    print(f"Unique classes detected: {df_detections['class_name'].unique()}")
    print(f"Average confidence: {df_detections['confidence'].mean():.3f}")
    print(f"Confidence range: {df_detections['confidence'].min():.3f} - {df_detections['confidence'].max():.3f}")
    
    # Class distribution
    print("\nClass distribution:")
    class_counts = df_detections['class_name'].value_counts()
    for class_name, count in class_counts.items():
        print(f"  {class_name}: {count}")
else:
    print("No animal detections found in the video.")

print(f"\nFrame Summary:")
print(f"Frames with detections: {(df_frames['animal_detections'] > 0).sum()}")
print(f"Max detections in single frame: {df_frames['animal_detections'].max()}")
print(f"Average detections per frame: {df_frames['animal_detections'].mean():.2f}")

## Elk Counting Analysis

In [None]:
# Estimate elk count using different methods

if len(df_detections) > 0:
    # Method 1: Maximum detections in any single frame
    max_elk_single_frame = df_frames['animal_detections'].max()
    
    # Method 2: Average detections across frames with detections
    frames_with_detections = df_frames[df_frames['animal_detections'] > 0]
    avg_elk_per_frame = frames_with_detections['animal_detections'].mean() if len(frames_with_detections) > 0 else 0
    
    # Method 3: Median detections in frames with detections
    median_elk_per_frame = frames_with_detections['animal_detections'].median() if len(frames_with_detections) > 0 else 0
    
    # Method 4: Filter by confidence and size
    high_conf_detections = df_detections[df_detections['confidence'] > 0.5]
    large_detections = df_detections[df_detections['area'] > 1000]  # Larger bounding boxes
    
    print("Elk Count Estimates:")
    print(f"Method 1 - Maximum in single frame: {max_elk_single_frame}")
    print(f"Method 2 - Average per frame: {avg_elk_per_frame:.1f}")
    print(f"Method 3 - Median per frame: {median_elk_per_frame:.1f}")
    print(f"Method 4 - High confidence detections: {len(high_conf_detections)}")
    print(f"Method 5 - Large detections: {len(large_detections)}")
    
    # Best estimate (using maximum as it's likely the most accurate for counting individuals)
    estimated_elk_count = max_elk_single_frame
    print(f"\n*** ESTIMATED ELK COUNT: {estimated_elk_count} ***")
    
else:
    print("No elk detected in the video.")
    estimated_elk_count = 0

## Visualization

In [None]:
# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Plot 1: Detections over time
axes[0, 0].plot(df_frames['timestamp'], df_frames['animal_detections'], 'b-', alpha=0.7)
axes[0, 0].set_xlabel('Time (seconds)')
axes[0, 0].set_ylabel('Number of Detections')
axes[0, 0].set_title('Elk Detections Over Time')
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Confidence distribution
if len(df_detections) > 0:
    axes[0, 1].hist(df_detections['confidence'], bins=20, alpha=0.7, color='green')
    axes[0, 1].axvline(df_detections['confidence'].mean(), color='red', linestyle='--', 
                      label=f'Mean: {df_detections["confidence"].mean():.3f}')
    axes[0, 1].set_xlabel('Confidence Score')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title('Detection Confidence Distribution')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Detection area distribution
if len(df_detections) > 0:
    axes[1, 0].hist(df_detections['area'], bins=20, alpha=0.7, color='orange')
    axes[1, 0].set_xlabel('Bounding Box Area (pixels²)')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('Detection Size Distribution')
    axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Class distribution
if len(df_detections) > 0:
    class_counts = df_detections['class_name'].value_counts()
    axes[1, 1].bar(range(len(class_counts)), class_counts.values, alpha=0.7, color='purple')
    axes[1, 1].set_xticks(range(len(class_counts)))
    axes[1, 1].set_xticklabels(class_counts.index, rotation=45)
    axes[1, 1].set_ylabel('Count')
    axes[1, 1].set_title('Detected Classes')
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/elk_analysis_plots.png", dpi=300, bbox_inches='tight')
plt.show()

## Sample Frame Visualization

In [None]:
# Visualize sample frames with detections
def draw_detections(image, detections):
    """Draw bounding boxes on image"""
    img_copy = image.copy()
    
    for det in detections:
        bbox = det['bbox'].astype(int)
        x1, y1, x2, y2 = bbox
        
        # Draw bounding box
        cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Draw label
        label = f"{det['class_name']}: {det['confidence']:.2f}"
        label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
        cv2.rectangle(img_copy, (x1, y1 - label_size[1] - 10), 
                     (x1 + label_size[0], y1), (0, 255, 0), -1)
        cv2.putText(img_copy, label, (x1, y1 - 5), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    
    return img_copy

# Display sample frames
if sample_frames:
    n_samples = min(6, len(sample_frames))
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    axes = axes.flatten()
    
    for i in range(n_samples):
        frame, detections, frame_idx = sample_frames[i]
        
        # Draw detections
        annotated_frame = draw_detections(frame, detections)
        
        # Convert BGR to RGB for matplotlib
        annotated_frame_rgb = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
        
        axes[i].imshow(annotated_frame_rgb)
        axes[i].set_title(f'Frame {frame_idx} - {len(detections)} detections')
        axes[i].axis('off')
    
    # Hide unused subplots
    for i in range(n_samples, len(axes)):
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.savefig(f"{OUTPUT_DIR}/sample_detections.png", dpi=300, bbox_inches='tight')
    plt.show()
else:
    print("No sample frames with detections to display.")

## Export Results

In [None]:
# Save detailed results to CSV
if len(df_detections) > 0:
    df_detections.to_csv(f"{OUTPUT_DIR}/elk_detections.csv", index=False)
    print(f"Saved detailed detections to: {OUTPUT_DIR}/elk_detections.csv")

df_frames.to_csv(f"{OUTPUT_DIR}/frame_analysis.csv", index=False)
print(f"Saved frame analysis to: {OUTPUT_DIR}/frame_analysis.csv")

# Create summary report
summary_report = {
    'video_file': VIDEO_PATH,
    'analysis_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
    'video_duration_seconds': duration,
    'total_frames': frame_count,
    'analyzed_frames': analyzed_frames,
    'frames_with_detections': (df_frames['animal_detections'] > 0).sum(),
    'total_detections': len(df_detections),
    'estimated_elk_count': estimated_elk_count,
    'confidence_threshold': CONFIDENCE_THRESHOLD,
    'model_used': MODEL_PATH
}

# Save summary as JSON
import json
with open(f"{OUTPUT_DIR}/analysis_summary.json", 'w') as f:
    json.dump(summary_report, f, indent=2)

print(f"Saved analysis summary to: {OUTPUT_DIR}/analysis_summary.json")

# Print final summary
print("\n" + "="*50)
print("FINAL ANALYSIS SUMMARY")
print("="*50)
for key, value in summary_report.items():
    print(f"{key.replace('_', ' ').title()}: {value}")
print("="*50)