In [1]:
from pathlib import Path

import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam
from torchvision import transforms as t
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn import metrics
from sklearn.manifold import TSNE

from hse_dd.src.utils import Dataset, download_dataset, process_dataset
from hse_dd.src.nn import YoloDataset

In [6]:
datasets_dir = Path("./share/raw_data")
OUTPUT_DIR = Path("./share/combined_dataset")
ANNOTATIONS_DIR = OUTPUT_DIR / "annotations"
IMAGES_DIR = OUTPUT_DIR / "images"
ANNOTATIONS_DIR.mkdir(parents=True, exist_ok=True)
IMAGES_DIR.mkdir(parents=True, exist_ok=True)

datasets = [
    Dataset(
        name="dasmehdixtr",
        base_path=datasets_dir / "dasmehdixtr" / "drone_dataset_yolo" / "dataset_txt"
    ),
    Dataset(
        name="dasmehdixtr",
        meta_type="xml",
        base_path=datasets_dir / "dasmehdixtr" / "dataset_xml_format" / "dataset_xml_format"
    ),
    Dataset(
        name="mcagriaksoy",
        base_path=datasets_dir / "mcagriaksoy" / "Database1" / "Database1"
    ),
]

In [7]:
for dataset in datasets:
    download_dataset(dataset, datasets_dir)
    process_dataset(dataset, IMAGES_DIR, ANNOTATIONS_DIR)
    print(f"Processing Dataset {dataset.name}_{dataset.meta_type}...")

# Проверка результатов
image_files = list(IMAGES_DIR.iterdir())
annotation_files = list(ANNOTATIONS_DIR.iterdir())

print(f"\nВсего изображений: {len(image_files)}")
print(f"Всего аннотаций: {len(annotation_files)}")


Processing Dataset dasmehdixtr: 2719it [00:01, 2191.04it/s]


Processing Dataset dasmehdixtr_yolo...


Processing Dataset dasmehdixtr: 3291it [00:00, 3845.11it/s]


Processing Dataset dasmehdixtr_xml...


Processing Dataset mcagriaksoy: 8025it [00:01, 4452.94it/s]


Processing Dataset mcagriaksoy_yolo...

Всего изображений: 5673
Всего аннотаций: 6467


# Baseline

In [9]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms
import json
from sklearn.metrics import average_precision_score


In [12]:
images_dir = str(IMAGES_DIR)
labels_dir = str(ANNOTATIONS_DIR)

transform = transforms.Compose([
    transforms.ToTensor()
])

dataset = YoloDataset(images_dir, labels_dir, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.to(device)


num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

def train(model, optimizer, data_loader, device):
    model.train()
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    return losses.item()

def evaluate(model, data_loader, device):
    model.eval()
    aps = []
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            outputs = model(images)

            for target, output in zip(targets, outputs):
                gt_boxes = target['boxes'].cpu().numpy()
                pred_boxes = output['boxes'].cpu().numpy()
                pred_scores = output['scores'].cpu().numpy()
                aps.append(average_precision_score(gt_boxes, pred_boxes, pred_scores))
    return sum(aps) / len(aps)

num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train(model, optimizer, train_loader, device)
    lr_scheduler.step()
    val_ap = evaluate(model, val_loader, device)
    print(f"Epoch {epoch+1}, Loss: {train_loss}, Val mAP: {val_ap:.4f}")
torch.save(model.state_dict(), "faster_rcnn_yolo.pth")




KeyboardInterrupt: 