In [1]:
import wandb
import os
#os.environ['WANDB_MODE'] = 'dryrun'
wandb.init("sky-eye-full")

W&B Run: https://app.wandb.ai/xvr-hlt/uncategorized/runs/5tu8pc0o

In [2]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2

# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [3]:
TRAIN_DIR = '/home/jupyter/datasets/xview/train'

In [4]:
from glob import glob
import itertools
from tqdm import tqdm_notebook as tqdm
import numpy as np
import json
from detectron2.structures import BoxMode
import shapely.wkt

In [5]:
damage_classes = ('no-damage', 'minor-damage', 'major-damage', 'destroyed')
damage_cat_ids = {i:ix for ix, i in enumerate(damage_classes)}

def get_instances(files, bbox_mode=detectron2.structures.boxes.BoxMode.XYXY_ABS, thresh=1000):
    dataset_dicts = []
    for file in tqdm(files):
        with open(file) as f:
            i = json.load(f)
        if len(i['features']['xy']) > thresh:
            continue

        h,w = i['metadata']['height'], i['metadata']['width']
        objs = []
        
        for feat in i['features']['xy']:
            prop = feat['properties']
            if prop.get('subtype') == 'un-classified':
                continue
            poly = shapely.wkt.loads(feat['wkt'])
            points = list(poly.exterior.coords)
            px, py = zip(*points)
            segm = [p for xy in points for p in xy]
            
            objs.append({
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": bbox_mode,
                "segmentation": [segm],
                "category_id": damage_cat_ids[prop['subtype']] if 'subtype' in prop else 0,
                "iscrowd": 0
            })

        dataset_dicts.append({
            'height': h,
            'width': w,
            'file_name': file.replace('/labels/', '/images/').replace('json', 'png'),
            'annotations': objs
        })
    return dataset_dicts

In [6]:
import random
from detectron2.data import DatasetCatalog, MetadataCatalog


for typ, thing_classes in (('pre', ('building',)), ('post', damage_classes)):
    random.seed(0)
    all_files = glob(f'{TRAIN_DIR}/labels/*{typ}_disaster.json')
    dev_ix = int(len(all_files)*.20)
    dev_instances = all_files[:dev_ix]
    train_instances = all_files[dev_ix:]
    
    DatasetCatalog.register(f"{typ}/train", lambda i=train_instances: get_instances(i))
    MetadataCatalog.get(f"{typ}/train").set(thing_classes=thing_classes)
    
    DatasetCatalog.register(f"{typ}/dev", lambda i=dev_instances: get_instances(i))
    MetadataCatalog.get(f"{typ}/dev").set(thing_classes=thing_classes)

In [7]:
from collections import Counter

In [8]:
dataset = 'post'
dataset_train = f'{dataset}/train'
dataset_dev = f'{dataset}/dev'
dev_dicts = DatasetCatalog.get(dataset_dev)
metadata = MetadataCatalog.get(dataset_train)

HBox(children=(IntProgress(value=0, max=559), HTML(value='')))




In [9]:
import torch
import detectron2.data.transforms as T
from detectron2.structures.masks import polygons_to_bitmask
from random import sample
from detectron2.utils.visualizer import ColorMode
from PIL import Image

#dev_dicts = get_instances(dev_instances)

def sample_vis(model, cfg, dicts, n=5):
    model = model.eval()
    transform_gen = T.ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST)
    input_format = cfg.INPUT.FORMAT
    images = []
    for d in sample(dicts, n):
        with torch.no_grad():
            im = cv2.imread(d["file_name"])
            inputs = prepare_inputs(im, input_format, transform_gen)
            outputs = model([inputs])[0]

    
            vis = Visualizer(im[:, :, ::-1],
                           metadata=metadata, 
                           scale=0.5, 
                           #instance_mode=ColorMode.IMAGE_BW
            )
            pred = vis.draw_instance_predictions(outputs["instances"].to("cpu"))
            images.append(Image.fromarray(pred.get_image()))
    model.train()
    wandb.log({'examples': [wandb.Image(i) for i in images]})
    return {'metric': 1}

def prepare_inputs(original_image, input_format, transform_gen):
    if input_format == "RGB":
        original_image = original_image[:, :, ::-1]
    height, width = original_image.shape[:2]
    image = transform_gen.get_transform(original_image).apply_image(original_image)
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
    return {"image": image, "height": height, "width": width}

def eval_model(eval_dicts, cfg, model):
    model = model.eval()
    transform_gen = T.ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST)
    input_format = cfg.INPUT.FORMAT
    tps, fns, fps = 0., 0., 0.
    
    with torch.no_grad():
        for e in tqdm(eval_dicts):
            im = cv2.imread(e["file_name"])
            inputs = prepare_inputs(im, input_format, transform_gen)
            outputs = model([inputs])[0]
            outputs_bool = np.array(outputs['instances'].pred_masks.cpu()).sum(0) > 0
            polygons = [np.array(a['segmentation'][0]) for a in e['annotations']]
            if len(polygons) > 0:
                targets_bool = polygons_to_bitmask(polygons, e['height'], e['width'])
            else:
                targets_bool = np.zeros((1024,1024)).astype(np.bool)

            tps += outputs_bool[targets_bool].sum() if targets_bool.sum() > 0 else 0.
            fns += targets_bool[~outputs_bool].sum() if (~outputs_bool).sum() > 0 else 0.
            fps += (~targets_bool[outputs_bool]).sum() if outputs_bool.sum() > 0 else 0.
    
    rec = tps/(tps+fns)
    pre = tps/(tps+fps)
    metrics = {
        'building:recall': rec,
        'building:precision': pre,
        'building:f1': 2*rec*pre/(rec+pre)
    }
    wandb.log(metrics)
    model = model.train()
    return metrics

In [10]:
batch_size = 2
nbatches = len(train_instances) // batch_size
epochs = 80

In [11]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
import os

cfg = get_cfg()
cfg.merge_from_file("detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.DATASETS.TRAIN = (dataset_train,)
cfg.DATASETS.TEST = ()

cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
cfg.DATALOADER.NUM_WORKERS = 8

cfg.SOLVER.CHECKPOINT_PERIOD = nbatches*10
cfg.SOLVER.MAX_ITER = nbatches*epochs
cfg.STEPS = cfg.SOLVER.MAX_ITER*.85, cfg.SOLVER.MAX_ITER*.15

cfg.INPUT.CROP.ENABLED = True
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl"  # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = batch_size
cfg.SOLVER.BASE_LR = 0.00025

cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(metadata.thing_classes)
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8], [16], [32], [64], [128]]

cfg.INPUT.MIN_SIZE_TRAIN = (512, 768, 1024)
cfg.MIN_SIZE_TRAIN_SAMPLING = "choice"

cfg.TEST.AUG.ENABLED = True
cfg.TEST.AUG.MIN_SIZES = (512, 768, 1024)
cfg.TEST.AUG.FLIP = True
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7



os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)

[32m[10/30 11:38:28 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

HBox(children=(IntProgress(value=0, max=2240), HTML(value='')))


[32m[10/30 11:38:40 d2.data.build]: [0mDistribution of training instances among all 4 categories:
[36m|  category  | #instances   |   category   | #instances   |   category   | #instances   |
|:----------:|:-------------|:------------:|:-------------|:------------:|:-------------|
| no-damage  | 88355        | minor-damage | 12350        | major-damage | 11841        |
| destroyed  | 8595         |              |              |              |              |
|   total    | 121141       |              |              |              |              |[0m
[32m[10/30 11:38:40 d2.data.detection_utils]: [0mTransformGens used in training: [ResizeShortestEdge(short_edge_length=(512, 768, 1024), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[10/30 11:38:40 d2.data.build]: [0mUsing training sampler TrainingSampler


'roi_heads.box_predictor.cls_score.weight' has shape (81, 1024) in the checkpoint but (5, 1024) in the model! Skipped.
'roi_heads.box_predictor.cls_score.bias' has shape (81,) in the checkpoint but (5,) in the model! Skipped.
'roi_heads.box_predictor.bbox_pred.weight' has shape (320, 1024) in the checkpoint but (16, 1024) in the model! Skipped.
'roi_heads.box_predictor.bbox_pred.bias' has shape (320,) in the checkpoint but (16,) in the model! Skipped.
'roi_heads.mask_head.predictor.weight' has shape (80, 256, 1, 1) in the checkpoint but (4, 256, 1, 1) in the model! Skipped.
'roi_heads.mask_head.predictor.bias' has shape (80,) in the checkpoint but (4,) in the model! Skipped.


In [12]:
train_model=True
if train_model:
    from detectron2.engine.hooks import EvalHook

    # evaluate every 2 epochs

    #eval_hook = EvalHook(1*nbatches, lambda: eval_model(dev_dicts, cfg, trainer.model))
    vis_hook = EvalHook(nbatches, lambda: sample_vis(trainer.model, cfg, dev_dicts))
    trainer.register_hooks([vis_hook])
    trainer.train()

[32m[10/30 11:38:41 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[10/30 11:38:51 d2.utils.events]: [0meta: 11:37:18  iter: 19  total_loss: 5.070  loss_cls: 1.756  loss_box_reg: 0.168  loss_mask: 0.693  loss_rpn_cls: 2.135  loss_rpn_loc: 0.338  time: 0.4598  data_time: 0.0114  lr: 0.000005  max_mem: 4836M
[32m[10/30 11:39:00 d2.utils.events]: [0meta: 11:37:09  iter: 39  total_loss: 4.289  loss_cls: 1.584  loss_box_reg: 0.203  loss_mask: 0.692  loss_rpn_cls: 1.507  loss_rpn_loc: 0.312  time: 0.4555  data_time: 0.0035  lr: 0.000010  max_mem: 6898M
[32m[10/30 11:39:09 d2.utils.events]: [0meta: 11:24:26  iter: 59  total_loss: 2.880  loss_cls: 1.300  loss_box_reg: 0.167  loss_mask: 0.690  loss_rpn_cls: 0.503  loss_rpn_loc: 0.323  time: 0.4493  data_time: 0.0040  lr: 0.000015  max_mem: 6898M
[32m[10/30 11:39:18 d2.utils.events]: [0meta: 11:38:08  iter: 79  total_loss: 2.357  loss_cls: 0.986  loss_box_reg: 0.109  loss_mask: 0.684  loss_rpn_cls: 0.318  loss_rpn_loc

AssertionError: Eval function must return a dict. Got [<PIL.Image.Image image mode=RGB size=512x512 at 0x7FBE0AE85E50>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7FBD38301D10>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7FBD382C3890>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7FBD38297310>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7FBD382B0410>] instead.

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
cfg.DATASETS.TEST = ("buildings/dev", )
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.structures.masks import polygons_to_bitmask

In [None]:
for d in random.sample(dev_data, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    
    vis = Visualizer(im[:, :, ::-1],
                   metadata=building_metadata, 
                   scale=0.5, 
                   #instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    display("Pred")
    pred = vis.draw_instance_predictions(outputs["instances"].to("cpu"))
    display(Image.fromarray(pred.get_image()))
    
    vis = Visualizer(im[:, :, ::-1],
                   metadata=building_metadata, 
                   scale=0.5, 
                   #instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    
    display("Target")
    target = vis.draw_dataset_dict(d)
    display(Image.fromarray(target.get_image()))

In [None]:
dataset_dicts = dataset_dicts = get_pre_dataset(train_instances)
import random
from PIL import Image
from IPython.display import display

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=building_metadata, scale=.5)
    vis = visualizer.draw_dataset_dict(d)
    display(Image.fromarray(vis.get_image()))(train_instances)
import random
from PIL import Image
from IPython.display import display

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=building_metadata, scale=.5)
    vis = visualizer.draw_dataset_dict(d)
    display(Image.fromarray(vis.get_image()))

In [None]:
def eval_model(eval_dicts, cfg, model):
    model = model.eval()
    transform_gen = T.ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST)
    input_format = cfg.INPUT.FORMAT
    tps, fns, fps = 0., 0., 0.
    
    with torch.no_grad():
        for e in tqdm(eval_dicts):
            im = cv2.imread(e["file_name"])
            inputs = prepare_inputs(im, input_format, transform_gen)
            outputs = model([inputs])[0]
            outputs_bool = np.array(outputs['instances'].pred_masks.cpu()).sum(0) > 0
            polygons = [np.array(a['segmentation'][0]) for a in e['annotations']]
            if len(polygons) > 0:
                targets_bool = polygons_to_bitmask(polygons, e['height'], e['width'])
            else:
                targets_bool = np.zeros((1024,1024)).astype(np.bool)

            tps += outputs_bool[targets_bool].sum() if targets_bool.sum() > 0 else 0.
            fns += targets_bool[~outputs_bool].sum() if (~outputs_bool).sum() > 0 else 0.
            fps += (~targets_bool[outputs_bool]).sum() if outputs_bool.sum() > 0 else 0.
    
    rec = tps/(tps+fns)
    pre = tps/(tps+fps)
    metrics = {
        'building:recall': rec,
        'building:precision': pre,
        'building:f1': 2*rec*pre/(rec+pre)
    }
    #wandb.log(metrics)
    model = model.train()
    return metrics

In [None]:
eval_model(dev_data, cfg, predictor.model)

In [None]:
import random
from PIL import Image
from IPython.display import display

for d in random.sample(data_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=1.)
    vis = visualizer.draw_dataset_dict(d)
    display(Image.fromarray(vis.get_image()))