In [23]:
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.nn as nn
import torchvision
import ast
import torch
from tqdm import tqdm

KAGGLE_PATH_ANNOTATIONS = '/kaggle/input/tensorflow-great-barrier-reef/train.csv'
KAGGLE_PATH_IMG_DIR = '/kaggle/input/tensorflow-great-barrier-reef/train_images/'
LOCAL_PATH_ANNOTATIONS = 'data/train.csv'
LOCAL_PATH_IMG_DIR = 'data/train_images/'


# TODO: pewnie można zrobić zmienne globalne z directory path, żeby podmieniać na kagglową jak puszczamy w kagglu i na własną, jak puszczamy lokalnie
# *ewentualnie lokalnie ustawić jak w kaggle xd

In [24]:
class StarfishDataset(Dataset):
    def __init__(self,
                 annotations_file=LOCAL_PATH_ANNOTATIONS,
                 img_dir=LOCAL_PATH_IMG_DIR
                 ):
        self.img_labels = pd.read_csv(annotations_file)
        self.annotated = self.img_labels[self.img_labels['annotations'] != '[]']  # get only annotated frames
        self.img_dir = img_dir

    def __len__(self):
        return len(self.annotated)

    def __getitem__(self, idx):
        image = read_image(os.path.join(self.img_dir, 'video_{}'.format(self.annotated.iloc[idx][0]),
                                        '{}.jpg'.format(self.annotated.iloc[idx][2])))
        min_image = image.min()
        max_image = image.max()
        # normalize image to 0-1 - required by torchvision
        image -= min_image
        image = torch.FloatTensor(image/max_image)

        labels = self.annotated.iloc[idx][-1]
        labels = ast.literal_eval(labels)
        coords = []
        for parsed_label in labels:
            x1, y1 = parsed_label['x'], parsed_label['y']
            x2, y2 = x1+parsed_label['width'], y1+parsed_label['height']
            coords.append([x1, y1, x2, y2])
            
        target = [torch.FloatTensor(coords), torch.LongTensor([0 for _ in range(len(coords))])] # label has to be integer, since we have only one label I coded it as 1 for simplicity

        return image, target

dataset = StarfishDataset()
dataset.__getitem__(0)


(tensor([[[0.1451, 0.1020, 0.0980,  ..., 0.0000, 0.0000, 0.0000],
          [0.1686, 0.1255, 0.1059,  ..., 0.0000, 0.0000, 0.0000],
          [0.1373, 0.1176, 0.1098,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.2706, 0.3294, 0.2275,  ..., 0.0627, 0.0627, 0.0627],
          [0.2588, 0.2549, 0.2706,  ..., 0.1137, 0.1216, 0.0980],
          [0.2353, 0.2745, 0.3020,  ..., 0.1373, 0.1804, 0.2039]],
 
         [[0.5686, 0.5569, 0.5686,  ..., 0.5922, 0.5922, 0.5882],
          [0.5961, 0.5804, 0.5765,  ..., 0.5922, 0.5922, 0.5882],
          [0.5647, 0.5725, 0.5843,  ..., 0.5922, 0.5922, 0.5882],
          ...,
          [0.6078, 0.7686, 0.8000,  ..., 0.5059, 0.5137, 0.5255],
          [0.6118, 0.6275, 0.7137,  ..., 0.5373, 0.5608, 0.5451],
          [0.5961, 0.6510, 0.7098,  ..., 0.5608, 0.6275, 0.6706]],
 
         [[0.8118, 0.7882, 0.8039,  ..., 0.9961, 0.9961, 0.9922],
          [0.8275, 0.8118, 0.8118,  ..., 0.9961, 0.9961, 0.9922],
          [0.7882, 0.7961, 0.8078,  ...,

In [25]:
torch.manual_seed(1)
dataset = StarfishDataset()
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
print('Train dataset: {} instances, test dataset: {}'.format(len(train_dataset), len(test_dataset)))

train_dataloader = DataLoader(
    train_dataset, batch_size=1, shuffle=False, num_workers=1)
test_dataloader = DataLoader(
    test_dataset, batch_size=1, shuffle=False, num_workers=1)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = torch.device('cpu') # <----------------------------- manual switch to CPU, my GPU is too weak :(
cpu = torch.device('cpu')

num_classes = 1  # starfish and not starfish I guess
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

# criterion = gio()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

Train dataset: 3935 instances, test dataset: 984


In [None]:
# TODO: finish eval loop
# https://pytorch.org/vision/stable/models.html#runtime-characteristics see Faster R-CNN for the details of this model, what it requires, returns, etc
for e in tqdm(range(1)):
    model.train()
    for images, targets in tqdm(train_dataloader):
        # print('org images', images, 'org targets', targets)
        target = []
        for i in range(len(images)):
            d = {}
            d['boxes'] = targets[0][i].to(device)
#             print(d['boxes'])
            d['labels'] = targets[1][i].to(device)
            target.append(d)
        # for t in target:
        #     print(t['boxes'])
        #     print('--------')

        images = images.to(device)

        loss_dict = model(images, target)
        loss = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    print('Reached eval')
    with torch.no_grad():
        groundtruth, predictions = None, None
        for images, targets in tqdm(test_dataloader):
            torch.cuda.empty_cache()
            predictions = model(images)
            print(predictions)

    optimizer.step()


  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/3935 [00:00<?, ?it/s][A
  0%|          | 1/3935 [00:25<28:19:16, 25.92s/it][A
  0%|          | 2/3935 [00:51<28:19:53, 25.93s/it][A
  0%|          | 3/3935 [01:17<28:19:50, 25.94s/it][A
  0%|          | 4/3935 [01:41<27:16:42, 24.98s/it][A
  0%|          | 5/3935 [02:05<26:48:34, 24.56s/it][A
  0%|          | 6/3935 [02:30<26:59:10, 24.73s/it][A
  0%|          | 7/3935 [02:54<26:43:52, 24.50s/it][A
  0%|          | 8/3935 [03:17<26:25:09, 24.22s/it][A
  0%|          | 9/3935 [03:42<26:42:13, 24.49s/it][A