### Training + Prediction

1. Data Augmentation
2. Training
3. Outputting validation scores such as mIOU or Lovasz
4. outputting predicted masks or predicted geojson polygons directly

In [4]:
import os
import sys
import argparse
import collections
from contextlib import contextmanager

from PIL import Image

import torch
import torch.backends.cudnn
import torch.nn as nn
from torch.nn import DataParallel
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.transforms import Resize, CenterCrop, Normalize

from tqdm import tqdm

from robosat.transforms import (
    JointCompose,
    JointTransform,
    JointRandomHorizontalFlip,
    JointRandomRotation,
    ConvertImageMode,
    ImageToTensor,
    MaskToTensor,
)

from robosat.datasets import SlippyMapTilesConcatenation
from robosat.metrics import Metrics
from robosat.losses import CrossEntropyLoss2d, mIoULoss2d, FocalLoss2d, LovaszLoss2d
from robosat.unet import UNet
from robosat.utils import plot
from robosat.config import load_config
from robosat.log import Log

In [5]:
def add_parser(subparser):
    parser = subparser.add_parser(
        "train", help="trains model on dataset", formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    
    parser.add_argument("--model", type=str, required=True, help="path to model configuration file")
    parser.add_argument("--dataset", type=str, required=True, help="path to dataset configuration file")
    parser.add_argument("--checkpoint", type=str, required=False, help="path to a model checkpoint (to retrain)")
    parser.add_argument("--resume", type=bool, default=False, help="resume training or fine-tuning (if checkpoint)")
    parser.add_argument("--workers", type=int, default=0, help="number of workers pre-processing images")

    parser.set_defaults(func=main)

In [6]:
def get_dataset_loaders(target_size, batch_size, dataset_path):
    target_size = (target_size, target_size)
    path = dataset_path
    
    # using imagenet mean and std for Normalization
    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

    transform = JointCompose(
        [   
            JointTransform(ConvertImageMode("RGB"), ConvertImageMode("P")),
            JointTransform(Resize(target_size, Image.BILINEAR), Resize(target_size, Image.NEAREST)),
            JointTransform(CenterCrop(target_size), CenterCrop(target_size)),
            JointRandomHorizontalFlip(0.5),
            JointRandomRotation(0.5, 90),
            JointRandomRotation(0.5, 90),
            JointRandomRotation(0.5, 90),
            JointTransform(ImageToTensor(), MaskToTensor()),
            JointTransform(Normalize(mean=mean, std=std), None),
        ]
    )

    train_dataset = SlippyMapTilesConcatenation(
        [os.path.join(path, "training", "images")], os.path.join(path, "training", "labels"), transform
    )

    val_dataset = SlippyMapTilesConcatenation(
        [os.path.join(path, "validation", "images")], os.path.join(path, "validation", "labels"), transform
    )

    assert len(train_dataset) > 0, "at least one tile in training dataset"
    assert len(val_dataset) > 0, "at least one tile in validation dataset"

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

    return train_loader, val_loader

In [7]:
train_loader, val_loader = get_dataset_loaders(256, 8, 'dataset')

In [8]:
def train(loader, num_classes, device, net, optimizer, criterion):
    num_samples = 0
    running_loss = 0

    # always two classes in our case
    metrics = Metrics(range(num_classes))
    # initialized model
    net.train()
    
    # training loop
    for images, masks, tiles in tqdm(loader, desc="Train", unit="batch", ascii=True):
        images = images.to(device)
        masks = masks.to(device)

        assert images.size()[2:] == masks.size()[1:], "resolutions for images and masks are in sync"

        num_samples += int(images.size(0))
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        for mask, output in zip(masks, outputs):
            prediction = output.detach()
            metrics.add(mask, prediction)

    return {
        "loss": running_loss / num_samples,
        "miou": metrics.get_miou(),
        "fg_iou": metrics.get_fg_iou(),
        "mcc": metrics.get_mcc(),
    }

In [9]:
def validate(loader, num_classes, device, net, criterion):
    num_samples = 0
    running_loss = 0

    metrics = Metrics(range(num_classes))

    with torch.no_grad():
        net.eval()

        for images, masks, tiles in tqdm(loader, desc="Validate", unit="batch", ascii=True):
            images = images.to(device)
            masks = masks.to(device)

            assert images.size()[2:] == masks.size()[1:], "resolutions for images and masks are in sync"

            num_samples += int(images.size(0))
            outputs = net(images)
            loss = criterion(outputs, masks)
            running_loss += loss.item()

            for mask, output in zip(masks, outputs):
                metrics.add(mask, output)

        return {
            "loss": running_loss / num_samples,
            "miou": metrics.get_miou(),
            "fg_iou": metrics.get_fg_iou(),
            "mcc": metrics.get_mcc(),
        }

In [15]:
# weighted values for loss functions
# add a helper to return weights seamlessly
try:
    weight = torch.Tensor([1.513212, 10.147043])
except KeyError:
    if model["opt"]["loss"] in ("CrossEntropy", "mIoU", "Focal"):
        sys.exit("Error: The loss function used, need dataset weights values")

# add in resume training if possible

# loading Model
net = UNet(num_classes)
net = DataParallel(net)
net = net.to(device)

# define optimizer 
optimizer = Adam(net.parameters(), lr=lr)

# select loss function, just set a default, or try to experiment
if loss_func == "CrossEntropy":
    criterion = CrossEntropyLoss2d(weight=weight).to(device)
elif loss_func == "mIoU":
    criterion = mIoULoss2d(weight=weight).to(device)
elif loss_func == "Focal":
    criterion = FocalLoss2d(weight=weight).to(device)
elif loss_func == "Lovasz":
    criterion = LovaszLoss2d().to(device)
else:
    sys.exit("Error: Unknown Loss Function value !")


#loading data
train_loader, val_loader = get_dataset_loaders(target_size, batch_size, dataset_path)

# setup training logs
# log = Log(checkpoint_path, "log")
# log.log("--- Hyper Parameters on Dataset: {} ---".format(dataset["common"]["dataset"]))
# log.log("Batch Size:\t {}".format(model["common"]["batch_size"]))
# log.log("Image Size:\t {}".format(model["common"]["image_size"]))
# log.log("Learning Rate:\t {}".format(model["opt"]["lr"]))
# log.log("Loss function:\t {}".format(model["opt"]["loss"]))
# if "weight" in locals():
#     log.log("Weights :\t {}".format(dataset["weights"]["values"]))
# log.log("---")

history = collections.defaultdict(list)

# training loop
for epoch in range(0, num_epochs):
    # log.log("Epoch: {}/{}".format(epoch + 1, num_epochs))

    train_hist = train(train_loader, num_classes, device, net, optimizer, criterion)
    
    # log.log("Train loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}".format(
    #         train_hist["loss"], train_hist["miou"], target_type, train_hist["fg_iou"], train_hist["mcc"]))

    for key, value in train_hist.items():
        history["train " + key].append(value)

    # validate for each epoch
    val_hist = validate(val_loader, num_classes, device, net, criterion)

    # log.log("Validation loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}".format(
    #         val_hist["loss"], val_hist["miou"], target_type, val_hist["fg_iou"], val_hist["mcc"]))

    for key, value in val_hist.items():
        history["val " + key].append(value)

    if (epoch+1)%5 == 0:
        # plotter use history values, no need for log
        visual = "history-{:05d}-of-{:05d}.png".format(epoch + 1, num_epochs)
        plot(os.path.join(checkpoint_path, visual), history)
    
    if (epoch+1)%1 == 0:
        checkpoint = "checkpoint-{:05d}-of-{:05d}.pth".format(epoch + 1, num_epochs)
        states = {"epoch": epoch + 1, "state_dict": net.state_dict(), "optimizer": optimizer.state_dict()}
        torch.save(states, os.path.join(checkpoint_path, checkpoint))

Train: 100%|##########| 84/84 [00:31<00:00,  2.64batch/s]
Validate: 100%|##########| 4/4 [00:00<00:00,  6.72batch/s]
Train: 100%|##########| 84/84 [00:29<00:00,  2.82batch/s]
Validate: 100%|##########| 4/4 [00:00<00:00,  6.64batch/s]
Train:  18%|#7        | 15/84 [00:05<00:25,  2.71batch/s]


KeyboardInterrupt: 

### Predict

In [12]:
# loading configs for training with argparse.
# do we actually need this argparse?
# maybe, but let's remove it for now
# model = load_config(args.model)
# dataset = load_config(args.dataset)

device = torch.device("cuda")

if not torch.cuda.is_available():
    sys.exit("Error: CUDA requested but not available")

# global vars
# make it into a config file if needed for experiment
num_classes = 2
lr = 0.0005
loss_func = "Lovasz"
num_epochs = 10
target_size = 256
batch_size  = 8
dataset_path = "dataset"
checkpoint_path = "checkpoint"
target_type = "Solar"
# make dir for checkpoint
os.makedirs(checkpoint_path, exist_ok=True)

In [11]:
import numpy as np

import torch
import torch.nn as nn
import torch.backends.cudnn
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Normalize

from tqdm import tqdm
from PIL import Image

from robosat.datasets import BufferedSlippyMapDirectory
from robosat.unet import UNet
from robosat.config import load_config
from robosat.colors import continuous_palette_for_color
from robosat.transforms import ConvertImageMode, ImageToTensor
from robosat.colors import make_palette

In [13]:
# additional args for prediction
checkpoint_name = "Solar_Best.pth"
tile_size = 256
weights = [1.513212, 10.147043]
tiles_dir = "Melbourne"
mask_dir = "predicted_masks"

In [15]:
# load checkpoint
chkpt = torch.load(os.path.join(checkpoint_path, checkpoint_name), map_location=device)

# load device
net = UNet(num_classes).to(device)
net = nn.DataParallel(net)
net.load_state_dict(chkpt["state_dict"])
net.eval()

# preprocess and load
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
transform = Compose([ConvertImageMode(mode="RGB"), ImageToTensor(), Normalize(mean=mean, std=std)])

# tiles file, need to get it again, or do we really need it? why not just predict
directory = BufferedSlippyMapDirectory(tiles_dir, transform=transform, size=tile_size)
assert len(directory) > 0, "at least one tile in dataset"

# loading data
loader = DataLoader(directory, batch_size=1)

# don't track tensors with autograd during prediction
with torch.no_grad():
    for images, tiles in tqdm(loader, desc="Eval", unit="batch", ascii=True):
        images = images.to(device)
        outputs = net(images)

        # manually compute segmentation mask class probabilities per pixel
        probs = nn.functional.softmax(outputs, dim=1).data.cpu().numpy()

        for tile, prob in zip(tiles, probs):
            x, y, z = list(map(int, tile))

            prob = directory.unbuffer(prob)
            mask = np.argmax(prob, axis=0)
            mask = mask*200
            mask = mask.astype(np.uint8)

            palette = make_palette("dark", "light")
            out = Image.fromarray(mask, mode="P")
            out.putpalette(palette)

            os.makedirs(os.path.join(mask_dir, str(z), str(x)), exist_ok=True)
            path = os.path.join(mask_dir, str(z), str(x), str(y) + ".png")
            out.save(path, optimize=True)

Eval:  51%|#####     | 3840/7535 [02:41<02:35, 23.71batch/s]


KeyboardInterrupt: 

### Post-Processing

1. From predicted masks to predicted geojson polygons for either Solar or Green

2. Loading Building Polygons

3. Comparative comparisons between Solar/Green with building polygons

4. Outputing highlighted building polygons


In [21]:
from robosat.tiles import tiles_from_slippy_map

from robosat.features.parking import ParkingHandler

import toml

In [34]:
config = toml.load('config.toml')

In [35]:
config

{'mask_dir': 'predicted_masks',
 'common': {'dataset': '/tmp/slippy-map-dir/',
  'classes': ['background', 'parking'],
  'colors': ['denim', 'orange']},
 'weights': {'values': [1.6248, 5.762827]},
 'commo1n': {'cuda': True,
  'batch_size': 2,
  'image_size': 512,
  'checkpoint': '/tmp/pth/'},
 'opt': {'epochs': 10, 'lr': 0.0001, 'loss': 'Lovasz'}}

In [36]:
config['mask_dir']

'predicted_masks'

In [None]:


handler = ParkingHandler()

tiles = list(tiles_from_slippy_map(mask_dir))

for tile, path in tqdm(tiles, ascii=True, unit="mask"):
    image = np.array(Image.open(path).convert("P"), dtype=np.uint8)
    mask = (image == 1).astype(np.uint8)
    handler.apply(tile, mask)

# output feature collection
handler.save(os.path.join(output_path, "feature.geojson"))

In [17]:
def mask_to_feature(output_folder):

    handler = ParkingHandler()
    
    tiles = list(tiles_from_slippy_map(mask_dir))

    for tile, path in tqdm(tiles, ascii=True, unit="mask"):
        image = np.array(Image.open(path).convert("P"), dtype=np.uint8)
        mask = (image == 1).astype(np.uint8)
        handler.apply(tile, mask)

    # output feature collection
    handler.save(os.path.join(output_path, "feature.geojson"))