# Object Detection Pipeline - Детекция объектов

Пайплайны для:
- Faster R-CNN
- RetinaNet
- SSD
- DETR

In [None]:
!pip install torch torchvision pycocotools -q

In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import json

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

## 1. Настройки

In [None]:
# === ВАШИ ДАННЫЕ ===
# Формат COCO: annotations.json с bbox координатами
IMAGES_DIR = './images'
TRAIN_ANNOTATIONS = 'train_annotations.json'
TEST_DIR = './test_images'

NUM_CLASSES = 2  # background + 1 класс объектов
BATCH_SIZE = 4
EPOCHS = 10
LR = 0.005

## 2. Dataset

In [None]:
class DetectionDataset(Dataset):
    def __init__(self, images_dir, annotations_file=None, transforms=None):
        self.images_dir = Path(images_dir)
        self.transforms = transforms
        
        if annotations_file:
            with open(annotations_file) as f:
                self.coco = json.load(f)
            self.image_ids = [img['id'] for img in self.coco['images']]
        else:
            self.image_ids = [p.stem for p in self.images_dir.glob('*.jpg')]
            self.coco = None
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        if self.coco:
            img_info = self.coco['images'][idx]
            img_path = self.images_dir / img_info['file_name']
            image = cv2.imread(str(img_path))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Получение аннотаций
            annos = [a for a in self.coco['annotations'] if a['image_id'] == img_info['id']]
            
            boxes = []
            labels = []
            for anno in annos:
                x, y, w, h = anno['bbox']
                boxes.append([x, y, x+w, y+h])
                labels.append(anno['category_id'])
            
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            
            target = {
                'boxes': boxes,
                'labels': labels,
            }
            
            if self.transforms:
                image = self.transforms(image)
            
            return torch.from_numpy(image).permute(2, 0, 1).float() / 255.0, target
        else:
            # Для тестовых данных без аннотаций
            img_path = list(self.images_dir.glob(f'{self.image_ids[idx]}.*'))[0]
            image = cv2.imread(str(img_path))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            return torch.from_numpy(image).permute(2, 0, 1).float() / 255.0

In [None]:
train_dataset = DetectionDataset(IMAGES_DIR, TRAIN_ANNOTATIONS)
test_dataset = DetectionDataset(TEST_DIR)

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, 
                          collate_fn=collate_fn, num_workers=4)

print(f"Train: {len(train_dataset)}, Test: {len(test_dataset)}")

## 3. Модель Faster R-CNN

In [None]:
# Загрузка pre-trained Faster R-CNN
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Замена classification head для нашего количества классов
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)

model = model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=LR, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

print("✓ Модель готова!")

## 4. Обучение

In [None]:
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    
    for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
    
    lr_scheduler.step()
    print(f"Epoch {epoch+1} - Loss: {total_loss/len(train_loader):.4f}")

torch.save(model.state_dict(), 'detection_model.pth')
print("✓ Обучение завершено!")

## 5. Предсказания

In [None]:
model.eval()
predictions = []

with torch.no_grad():
    for image in tqdm(test_dataset, desc="Predicting"):
        image = image.to(device)
        prediction = model([image])[0]
        
        # Фильтрация по confidence
        keep = prediction['scores'] > 0.5
        boxes = prediction['boxes'][keep].cpu().numpy()
        labels = prediction['labels'][keep].cpu().numpy()
        scores = prediction['scores'][keep].cpu().numpy()
        
        predictions.append({
            'boxes': boxes,
            'labels': labels,
            'scores': scores
        })

print("✓ Предсказания готовы!")