# PyTorch dataloaders with darwin-py

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

from darwin.torch import get_dataset
import darwin.torch.transforms as T

### 🐥 Get dataset from Darwin - [Bird Species](https://www.v7labs.com/open-datasets/bird-species)

In [None]:
!darwin dataset pull v7-open-datasets/bird-species:birds

In [None]:
dataset_id = "v7-open-datasets/bird-species"
dataset = get_dataset(dataset_id, dataset_type="instance-segmentation")
print(dataset) #--- Dataset statistics
print(dataset.classes) #--- Classes
dataset.get_image(0) #--- Get image

### 🪓 Create train-val-test splits - random and stratified!

In [None]:
!darwin dataset split v7-open-datasets/bird-species --val-percentage 0.10 --test-percentage 0.20

### 🚀 Get dataset partitions for training and validation

In [None]:
dataset_id = "v7-open-datasets/bird-species" #--- For completeness, we again specify the dataset_id

trfs_train = T.Compose([T.RandomHorizontalFlip(), T.ToTensor()]) #--- Compose multiple transforms
dataset_train = get_dataset(dataset_id, dataset_type="instance-segmentation",
    partition="train", split_type="stratified", transform=trfs_train)

trfs_val = T.ToTensor()
dataset_val = get_dataset(dataset_id, dataset_type="instance-segmentation",
    partition="val", split_type="stratified", transform=trfs_val)

print(dataset)
print(dataset_train)
print(dataset_val)

### 🔥 Let's see the datasets in action!

In [None]:
# Get the dataloader for the training set

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = torch.utils.data.DataLoader(dataset_train, batch_size=2, shuffle=True, num_workers=0, collate_fn=collate_fn)

In [None]:
# Instantiate a Mask-RCNN model using Torchvision's API!

def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # add a new bounding box predictor
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # add a new mask predictor
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    return model

In [None]:
# Basic optimization setup!

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# get the model using our helper function
num_classes = dataset.num_classes + 1 #--- number of classes in the dataset + background
model = get_instance_segmentation_model(num_classes)
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
# let's train it for 10 epochs

for epoch in range(10):
    # train for one epoch, printing every 10 iterations
    print(f"Starting epoch {epoch}...")
    acumm_loss = 0
    for i, (images, targets) in enumerate(data_loader):
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items() if isinstance(v, torch.Tensor)} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss_dict.values())

        optimizer.zero_grad() #--- Zero the gradients
        losses.backward() #--- Backpropagate
        optimizer.step() #--- Update the weights
        acumm_loss += losses.cpu().item()
        if i % 10 == 0:
            print(f"({i}/{len(data_loader)}) Loss: {acumm_loss/10}")
            acumm_loss = 0

    lr_scheduler.step()