# Segmentation Pipeline - Сегментация изображений

Пайплайны для:
- Семантическая сегментация (U-Net, DeepLab)
- Instance сегментация (Mask R-CNN)
- Panoptic сегментация

In [None]:
!pip install torch torchvision segmentation-models-pytorch albumentations opencv-python -q

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

## 1. Настройки

In [None]:
# === ВАШИ ДАННЫЕ ===
IMAGES_DIR = './images'
MASKS_DIR = './masks'
TRAIN_CSV = 'train.csv'  # image_path, mask_path
TEST_CSV = 'test.csv'

# === НАСТРОЙКИ ===
ENCODER = 'resnet50'  # 'efficientnet-b0', 'mobilenet_v2'
ENCODER_WEIGHTS = 'imagenet'
NUM_CLASSES = 1  # Бинарная сегментация (фон/объект)
IMG_SIZE = 256
BATCH_SIZE = 8
EPOCHS = 20
LR = 1e-4

## 2. Dataset

In [None]:
train_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(),
    ToTensorV2(),
])

test_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(),
    ToTensorV2(),
])

class SegmentationDataset(Dataset):
    def __init__(self, df, images_dir, masks_dir=None, transform=None, is_test=False):
        self.df = df
        self.images_dir = Path(images_dir)
        self.masks_dir = Path(masks_dir) if masks_dir else None
        self.transform = transform
        self.is_test = is_test
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.images_dir / row['image_path']
        image = cv2.imread(str(img_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.is_test:
            if self.transform:
                image = self.transform(image=image)['image']
            return image
        else:
            mask_path = self.masks_dir / row['mask_path']
            mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
            mask = (mask > 0).astype(np.float32)
            
            if self.transform:
                transformed = self.transform(image=image, mask=mask)
                image = transformed['image']
                mask = transformed['mask']
            
            return image, mask.unsqueeze(0)

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

train_dataset = SegmentationDataset(train_df, IMAGES_DIR, MASKS_DIR, train_transform)
test_dataset = SegmentationDataset(test_df, IMAGES_DIR, transform=test_transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

print(f"Train: {len(train_dataset)}, Test: {len(test_dataset)}")

## 3. Модель U-Net

In [None]:
# U-Net с pre-trained энкодером
model = smp.Unet(
    encoder_name=ENCODER,
    encoder_weights=ENCODER_WEIGHTS,
    classes=NUM_CLASSES,
    activation=None,  # Sigmoid добавим в loss
).to(device)

# Альтернативы:
# model = smp.FPN(...)       # Feature Pyramid Network
# model = smp.DeepLabV3Plus(...) # DeepLab
# model = smp.PSPNet(...)    # PSPNet

# Loss и optimizer
loss_fn = smp.losses.DiceLoss(mode='binary')
# loss_fn = smp.losses.JaccardLoss(mode='binary')
# loss_fn = nn.BCEWithLogitsLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

print("✓ Модель создана!")

## 4. Обучение

In [None]:
def train_epoch(model, loader, loss_fn, optimizer, device):
    model.train()
    total_loss = 0
    
    for images, masks in tqdm(loader, desc="Training"):
        images, masks = images.to(device), masks.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(loader)

for epoch in range(EPOCHS):
    loss = train_epoch(model, train_loader, loss_fn, optimizer, device)
    scheduler.step()
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {loss:.4f}")

torch.save(model.state_dict(), 'segmentation_model.pth')
print("✓ Модель обучена!")

## 5. Предсказания

In [None]:
model.eval()
predictions = []

with torch.no_grad():
    for images in tqdm(test_loader, desc="Predicting"):
        images = images.to(device)
        outputs = model(images)
        masks = torch.sigmoid(outputs) > 0.5
        predictions.extend(masks.cpu().numpy())

# Сохранение масок
# for i, mask in enumerate(predictions):
#     cv2.imwrite(f'predictions/mask_{i}.png', (mask[0] * 255).astype(np.uint8))

print("✓ Предсказания готовы!")