This preliminary notebook using some code form https://www.kaggle.com/mrinath/efficientdet-train-pytorch

In [None]:
!ls /kaggle/input/k/ulricharmel/baseline-predict-pytorch

In [None]:
!cp /kaggle/input/k/ulricharmel/baseline-predict-pytorch/utils.py /kaggle/working/
!cp /kaggle/input/k/ulricharmel/baseline-predict-pytorch/transforms.py /kaggle/working/
!cp /kaggle/input/k/ulricharmel/baseline-predict-pytorch/coco_eval.py /kaggle/working/
!cp /kaggle/input/k/ulricharmel/baseline-predict-pytorch/engine.py /kaggle/working/
!cp /kaggle/input/k/ulricharmel/baseline-predict-pytorch/coco_utils.py /kaggle/working/

In [None]:
# plot some of the images
import matplotlib.pyplot as plt
from matplotlib import patches

def get_rectangle_edges_from_pascal_bbox(bbox):
    xmin_top_left, ymin_top_left, xmax_bottom_right, ymax_bottom_right = bbox

    bottom_left = (xmin_top_left, ymax_bottom_right)
    width = xmax_bottom_right - xmin_top_left
    height = ymin_top_left - ymax_bottom_right

    return bottom_left, width, height

def draw_pascal_voc_bboxes(
    plot_ax,
    bboxes,
    get_rectangle_corners_fn=get_rectangle_edges_from_pascal_bbox,
):
    for bbox in bboxes:
        bottom_left, width, height = get_rectangle_corners_fn(bbox)

        rect_1 = patches.Rectangle(
            bottom_left,
            width,
            height,
            linewidth=2,
            edgecolor="black",
            fill=False,
        )
        rect_2 = patches.Rectangle(
            bottom_left,
            width,
            height,
            linewidth=2,
            edgecolor="red",
            fill=False,
        )

        # Add the patch to the Axes
        plot_ax.add_patch(rect_1)
        plot_ax.add_patch(rect_2)

def draw_image(
    image, bboxes=None, draw_bboxes_fn=draw_pascal_voc_bboxes, figsize=(10, 10)
):
    fig, ax = plt.subplots(1, figsize=figsize)
    ax.imshow(image)

    if bboxes is not None:
        draw_bboxes_fn(ax, bboxes)

    plt.show()

In [None]:
class DataAdaptor:
    def __init__(self,df):
        self.df = df
    def __len__(self):
        return len(self.df)
    
    def get_boxes(self, row):
        """Returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]"""
        
        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values
        
        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = np.clip(boxes[:, 0] + boxes[:, 2],0,1280)
        boxes[:, 3] = np.clip(boxes[:, 1] + boxes[:, 3],0,720) 
        
        return boxes
    
    def get_image_bb(self , idx):
        img_src = self.df.loc[idx,'path']
        image   = cv2.imread(img_src)
        image   = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        row     = self.df.iloc[idx]
        bboxes  = self.get_boxes(row) 
        class_labels = np.ones(len(bboxes))
        return image, bboxes, class_labels, idx
    
        
    def show_image(self, index):
        image, bboxes, class_labels, image_id = self.get_image_bb(index)
        print(f"image_id: {image_id}")
        draw_image(image, bboxes.tolist())
#         print(class_labels) 
        return image

In [None]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torch import optim
from torchvision import transforms


class CotsData(torch.utils.data.Dataset):
    def __init__(self, df, transforms=None):
        self.ds = df
        self.transforms = transforms
    
    def get_boxes(self, row):
        """Returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]"""
        
        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values
        
        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = np.clip(boxes[:, 0] + boxes[:, 2],0,1280)
        boxes[:, 3] = np.clip(boxes[:, 1] + boxes[:, 3],0,720) 
        
        return boxes
            
    def __getitem__(self, idx):
        # load images
        img_path = self.ds.loc[idx,'path']
        # mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        
        row = self.ds.iloc[idx]
        boxes = self.get_boxes(row)
        num_objs = self.ds.loc[idx, 'number_boxes']

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64) # check this probably have to set this to true

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.ds)

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

      
def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
#     model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) #pretrained=True
    model.load_state_dict(torch.load('/kaggle/input/resnet/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth'))

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

#     # now get the number of input features for the mask classifier
#     in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
#     hidden_layer = 256
#     # and replace the mask predictor with a new one
#     model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
#                                                        hidden_layer,
#                                                        num_classes)

    return model

In [None]:
# from engine import train_one_epoch, evaluate
# import utils
import transforms as T


def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [None]:
# we redifine the model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)


In [None]:

# This loads the model
model_path = '/kaggle/input/checkvideo2/checkpoint-video2.pth'
state_dict = torch.load(model_path)
# print(state_dict.keys())
model.load_state_dict(state_dict)
model.eval()

In [None]:
def apply_nms(orig_prediction, iou_thresh=0.3, score_thresh=0.35):
    
    # torchvision returns the indices of the bboxes to keep
    # function to implement non maximm suppression
    # might also need to eliminate predictions with very low scores
    # trim low scores first
    
    keep = orig_prediction['scores'] >= score_thresh
    
    scores_prediction = {}
    scores_prediction['boxes'] = orig_prediction['boxes'][keep]
    scores_prediction['scores'] = orig_prediction['scores'][keep]
    scores_prediction['labels'] = orig_prediction['labels'][keep]
    
    keep = torchvision.ops.nms(scores_prediction['boxes'], scores_prediction['scores'], iou_thresh)
    
    final_prediction = {}
    final_prediction['boxes'] = scores_prediction['boxes'][keep]
    final_prediction['scores'] = scores_prediction['scores'][keep]
    final_prediction['labels'] = scores_prediction['labels'][keep]
    
    return final_prediction

def return_predict_string(predictions):
    str_p = ''
    for i, score in enumerate(predictions['scores']):
        box = predictions['boxes'][i].cpu().detach().numpy()
        score = score.cpu().detach().numpy()
        str_p += f'{score:.3f} {int(np.round(box[0]))} {int(np.round(box[1]))} {int(np.round(box[2]-box[0]))} {int(np.round(box[3]-box[1]))} '
    
    str_p = str_p.strip(' ')
#     if str_p == '':
#         str_p = '0.9 716 678 54 42'
    
    return str_p

def preprocess_img(img):
    img = img/255.
    x,y, c = img.shape
    img = img.transpose(2, 0, 1)
    return torch.from_numpy(img)

In [None]:
import greatbarrierreef
rows=[]
ii = 0
env = greatbarrierreef.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission
for (pixel_array, sample_prediction_df) in iter_test:
    pixel_p = preprocess_img(pixel_array)
    prediction = model([pixel_p.to(device, dtype=torch.float)])[0]
    sample_prediction_df['annotations'] = anno = return_predict_string(apply_nms(prediction, 0, 0))  # make your predictions here
    rows.append([ii, anno])
    env.predict(sample_prediction_df)
    ii += 1



In [None]:
rows
# model([pixel_p.to(device=device, dtype=torch.float)])[0]

In [None]:
# !ls /kaggle/input/tensorflow-great-barrier-reef/

In [None]:
# data = np.load("/kaggle/input/tensorflow-great-barrier-reef/example_test.npy")

In [None]:
# pixel_p = preprocess_img(data[0])
# prediction = model([pixel_p.to(device, dtype=torch.float)])[0]

In [None]:
# prediction

In [None]:
# img_path = '/kaggle/input/tensorflow-great-barrier-reef/train_images/video_0/12142.jpg'
# img = Image.open(img_path).convert("RGB")

In [None]:
# img2 = convert_tensor(img)

In [None]:
# img2[0].shape

In [None]:
# prediction = model([img2[0].to(device)])[0]

In [None]:
# prediction

In [None]:
# pixel_p.shape

In [None]:
# pixel_p*255.

In [None]:
# plt.imshow(pixel_p.reshape(720,1280,3))