<a href="https://colab.research.google.com/github/veda-sunkara/StreetToCloud/blob/master/FCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
LR = 1e-3
EPOCHS = 1000
EPOCHS_PER_UPDATE = 1
RUNNAME = "1e3_flood_0"

In [None]:
from google.colab import auth
auth.authenticate_user()

!curl https://sdk.cloud.google.com | bash

!gcloud init

In [None]:
!cd /home
!pwd

In [7]:
import torch
from torchvision import transforms
import torchvision.transforms.functional as F
import random

class InMemoryDataset(torch.utils.data.Dataset):
  
  def __init__(self, data_list, preprocess_func):
    self.data_list = data_list
    self.preprocess_func = preprocess_func
  
  def __getitem__(self, i):
    return self.preprocess_func(self.data_list[i])
  
  def __len__(self):
    return len(self.data_list)


def processAndAugment(data):
  (x,y) = data
  im,label = x.copy(), y.copy()

  # convert to PIL for easier transforms
  im1 = Image.fromarray(im[0])
  im2 = Image.fromarray(im[1])
  label = Image.fromarray(label.squeeze())

  # Get params for random transforms
  i, j, h, w = transforms.RandomCrop.get_params(im1, (256, 256))
  
  im1 = F.crop(im1, i, j, h, w)
  im2 = F.crop(im2, i, j, h, w)
  label = F.crop(label, i, j, h, w)
  if random.random() > 0.5:
    im1 = F.hflip(im1)
    im2 = F.hflip(im2)
    label = F.hflip(label)
  if random.random() > 0.5:
    im1 = F.vflip(im1)
    im2 = F.vflip(im2)
    label = F.vflip(label)
  
  norm = transforms.Normalize([0.6851, 0.5235], [0.0820, 0.1102])
  im = torch.stack([transforms.ToTensor()(im1).squeeze(), transforms.ToTensor()(im2).squeeze()])
  im = norm(im)
  label = transforms.ToTensor()(label).squeeze()
  if torch.sum(label.gt(.003) * label.lt(.004)):
    label *= 255
  label = label.round()

  return im, label


def processTestIm(data):
  (x,y) = data
  im,label = x.copy(), y.copy()
  norm = transforms.Normalize([0.6851, 0.5235], [0.0820, 0.1102])
  #label[0][0][0] = 255
  
  # convert to PIL for easier transforms
  im_c1 = Image.fromarray(im[0]).resize((512,512))
  im_c2 = Image.fromarray(im[1]).resize((512,512))
  label = Image.fromarray(label.squeeze()).resize((512,512))

  im_c1s = [F.crop(im_c1, 0, 0, 256, 256), F.crop(im_c1, 0, 256, 256, 256),
            F.crop(im_c1, 256, 0, 256, 256), F.crop(im_c1, 256, 256, 256, 256)]
  im_c2s = [F.crop(im_c2, 0, 0, 256, 256), F.crop(im_c2, 0, 256, 256, 256),
            F.crop(im_c2, 256, 0, 256, 256), F.crop(im_c2, 256, 256, 256, 256)]
  labels = [F.crop(label, 0, 0, 256, 256), F.crop(label, 0, 256, 256, 256),
            F.crop(label, 256, 0, 256, 256), F.crop(label, 256, 256, 256, 256)]


  ims = [torch.stack((transforms.ToTensor()(x).squeeze(),
                    transforms.ToTensor()(y).squeeze()))
                    for (x,y) in zip(im_c1s, im_c2s)]
  ims = [norm(im) for im in ims]
  ims = torch.stack(ims)
  labels = [(transforms.ToTensor()(label).squeeze()) for label in labels]
  labels = torch.stack(labels)
  if torch.sum(labels.gt(.003) * labels.lt(.004)):
    labels *= 255
  labels = labels.round()
  return ims, labels

In [None]:
!pip install rasterio

!gsutil cp gs://cnn_chips/flood_test_data.csv .
!gsutil cp gs://cnn_chips/flood_train_data.csv .
!gsutil cp gs://cnn_chips/flood_valid_data.csv .

In [9]:
import csv
from PIL import Image
import rasterio
import numpy as np
import os
BASEDIR = ''

def getArr(fname):
  return rasterio.open('/' + fname).read()


def download_perm_water_data_from_file(fname):
  with open(fname) as f:
    data_fnames = [tuple(line) for line in csv.reader(f)]
  i = 0
  data = []
  for (x,y) in data_fnames:
    print(x, y)
    arr_x, arr_y = getArr(x), getArr(y)
    if np.sum((arr_x != arr_x)) == 0:
      ignore = (arr_y == -1)
      ignore = ((np.uint8(ignore) * -1) * 256) + 1
      arr_y *= ignore
      data.append((arr_x, arr_y))
      i+=1
      print(i)
    else:
      print("skipping nan")
  return data

def download_perm_train_data():
  TRAINING_DATA_FILE = BASEDIR + 'flood_train_data.csv'
  return download_perm_water_data_from_file(TRAINING_DATA_FILE)

def download_perm_valid_data():
  VALID_DATA_FILE = BASEDIR + 'flood_valid_data.csv'
  return download_perm_water_data_from_file(VALID_DATA_FILE)

def download_perm_test_data():
  TEST_DATA_FILE = BASEDIR + 'flood_test_data.csv'
  return download_perm_water_data_from_file(TEST_DATA_FILE)

In [None]:
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# !sudo mkdir files4
# !gcsfuse --implicit-dirs cnn_chips files4

In [13]:
from time import time

def getArrFlood(fname):
  return rasterio.open(fname).read()

def download_flood_water_data_from_list(l):
  i= 0
  tot_nan = 0
  tot_good = 0
  flood_data = []
  for (im_fname, mask_fname) in l:
    print(im_fname)
    if not os.path.exists(os.path.join("files3/", im_fname)):
      print(os.path.join("files3/", im_fname))
      continue
    arr_x = np.nan_to_num(getArrFlood(os.path.join("files3/", im_fname)))
    arr_y = getArrFlood(os.path.join("files3/", mask_fname))
    ignore = (arr_y == -1)
    ignore = ((np.uint8(ignore) * -1) * 256) + 1
    # arr_y *= ignore
    arr_y = np.uint8(getArrFlood(os.path.join("files3/", mask_fname)))
    if np.sum((arr_y != arr_y)) == 0:
      arr_x = np.clip(arr_x, -50, 1)
      arr_x = (arr_x + 50) / 51
      if i % 100 == 0:
        print(i)
        print(im_fname, mask_fname)
      i += 1
      flood_data.append((arr_x,arr_y))
    else:
      print("skipping nan")
  print(i)
  return flood_data

def load_flood_train_data():
  basedir = ""
  fname = "flood_train_data.csv"
  with open(fname) as f:
    fname = [tuple(line) for line in csv.reader(f)]
  return download_flood_water_data_from_list(fname)

def load_weak_flood_train_data():
  basedir = ""
  files = [(os.path.join("S1_NoQC", x[1]), os.path.join("NoQC", x[0])) for x in zip(sorted(os.listdir("files3/NoQC")), sorted(os.listdir("files3/S1_NoQC")))]
  files = [x for x in files if "Bolivia" not in x[0]]
  print(files[0:10])
  return download_flood_water_data_from_list(files)

def load_flood_test_perm_data():
  fname = "flood_test_data.csv"
  with open(fname) as f:
    fname = [tuple(line) for line in csv.reader(f)]
  fname = [(t[0], t[1].replace("QC_v2", "Perm").replace("QC", "Perm")) for t in fname]
  return download_flood_water_data_from_list(fname)

def load_flood_valid_data():
  basedir = ""
  fname = "flood_valid_data.csv"
  with open(fname) as f:
    fname = [tuple(line) for line in csv.reader(f)]
  print(fname, "files!")
  return download_flood_water_data_from_list(fname)

def load_flood_test_data():
  basedir = ""
  fname = "flood_test_data.csv"
  with open(fname) as f:
    fname = [tuple(line) for line in csv.reader(f)]
  return download_flood_water_data_from_list(fname)

def load_flood_bolivia_test_data():
  basedir = ""
  fname = "flood_bolivia_data.csv"
  with open(fname) as f:
    fname = [tuple(line) for line in csv.reader(f)]
  return download_flood_water_data_from_list(fname)

In [None]:
train_data = download_perm_train_data()
train_dataset = InMemoryDataset(train_data, processAndAugment)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, sampler=None,
                  batch_sampler=None, num_workers=0, collate_fn=None,
                  pin_memory=True, drop_last=False, timeout=0,
                  worker_init_fn=None)
train_iter = iter(train_loader)

In [None]:
mkdir checkpoints

In [None]:
!sudo mkdir files3
!sudo mkdir files3/S1
!sudo mkdir files3/QC_v2
!gsutil -m rsync -r gs://cnn_chips/S1 files3/S1
!gsutil -m rsync -r gs://cnn_chips/QC_v2 files3/QC_v2

!cd /home

!ls files3/S1

In [None]:
!pwd

flood_test_perm_data = load_flood_test_perm_data()
flood_test_all_data = load_flood_test_data()

In [19]:
def ignore_perm_water(all_water, perm_water):
  w = all_water.copy()
  perm_water = (w == 1) * (perm_water == 1)
  w[perm_water] = 255
  return w

def ignore_flood_water(all_water, perm_water):
  w = all_water.copy()
  flood_water = (w == 1) * (perm_water == 0)
  w[flood_water] = 255
  return w

def get_flood_flood_test_data(all_water_data, perm_water_data):
  flood_flood_test_data = []
  for ((awx, awy), (pwx, pwy)) in zip(all_water_data, perm_water_data):
    pwy = ignore_perm_water(awy, pwy)
    if np.sum(awy == 1) > 0 and np.sum(pwy == 1) == 0:
      continue
    flood_flood_test_data.append((awx, pwy))
  return flood_flood_test_data
  

def get_perm_flood_test_data(all_water_data, perm_water_data):
  perm_flood_test_data = []
  for ((awx, awy), (pwx, pwy)) in zip(all_water_data, perm_water_data):
    pwy = ignore_flood_water(awy, pwy)
    if np.sum(awy == 1) > 0 and np.sum(pwy == 1) == 0:
      continue
    perm_flood_test_data.append((awx, pwy))
  return perm_flood_test_data

In [20]:
flood_flood_test_data = get_flood_flood_test_data(flood_test_all_data, flood_test_perm_data)
perm_flood_test_data = get_perm_flood_test_data(flood_test_all_data, flood_test_perm_data)

In [21]:
import torch
from torchvision import transforms
import torchvision.transforms.functional as F
import random

class InMemoryDataset(torch.utils.data.Dataset):
  
  def __init__(self, data_list, preprocess_func):
    self.data_list = data_list
    self.preprocess_func = preprocess_func
  
  def __getitem__(self, i):
    return self.preprocess_func(self.data_list[i])
  
  def __len__(self):
    return len(self.data_list)


def processAndAugment(data):
  (x,y) = data
  im,label = x.copy(), y.copy()

  # convert to PIL for easier transforms
  im1 = Image.fromarray(im[0])
  im2 = Image.fromarray(im[1])
  label = Image.fromarray(label.squeeze())

  # Get params for random transforms
  i, j, h, w = transforms.RandomCrop.get_params(im1, (256, 256))
  
  im1 = F.crop(im1, i, j, h, w)
  im2 = F.crop(im2, i, j, h, w)
  label = F.crop(label, i, j, h, w)
  if random.random() > 0.5:
    im1 = F.hflip(im1)
    im2 = F.hflip(im2)
    label = F.hflip(label)
  if random.random() > 0.5:
    im1 = F.vflip(im1)
    im2 = F.vflip(im2)
    label = F.vflip(label)
  
  norm = transforms.Normalize([0.6851, 0.5235], [0.0820, 0.1102])
  im = torch.stack([transforms.ToTensor()(im1).squeeze(), transforms.ToTensor()(im2).squeeze()])
  im = norm(im)
  label = transforms.ToTensor()(label).squeeze()
  if torch.sum(label.gt(.003) * label.lt(.004)):
    label *= 255
  label = label.round()

  return im, label


def processTestIm(data):
  (x,y) = data
  im,label = x.copy(), y.copy()
  norm = transforms.Normalize([0.6851, 0.5235], [0.0820, 0.1102])
  #label[0][0][0] = 255
  # convert to PIL for easier transforms
  im_c1 = Image.fromarray(im[0]).resize((512,512))
  im_c2 = Image.fromarray(im[1]).resize((512,512))
  label = Image.fromarray(label.squeeze()).resize((512,512))

  im_c1s = [F.crop(im_c1, 0, 0, 256, 256), F.crop(im_c1, 0, 256, 256, 256),
            F.crop(im_c1, 256, 0, 256, 256), F.crop(im_c1, 256, 256, 256, 256)]
  im_c2s = [F.crop(im_c2, 0, 0, 256, 256), F.crop(im_c2, 0, 256, 256, 256),
            F.crop(im_c2, 256, 0, 256, 256), F.crop(im_c2, 256, 256, 256, 256)]
  labels = [F.crop(label, 0, 0, 256, 256), F.crop(label, 0, 256, 256, 256),
            F.crop(label, 256, 0, 256, 256), F.crop(label, 256, 256, 256, 256)]


  ims = [torch.stack((transforms.ToTensor()(x).squeeze(),
                    transforms.ToTensor()(y).squeeze()))
                    for (x,y) in zip(im_c1s, im_c2s)]
  ims = [norm(im) for im in ims]
  ims = torch.stack(ims)
  labels = [(transforms.ToTensor()(label).squeeze()) for label in labels]
  labels = torch.stack(labels)
  if torch.sum(labels.gt(.003) * labels.lt(.004)):
    labels *= 255
  labels = labels.round()
  return ims, labels

In [22]:
test_all_data = flood_test_all_data
test_all_dataset = InMemoryDataset(test_all_data, processTestIm)
test_all_loader = torch.utils.data.DataLoader(test_all_dataset, batch_size=1, shuffle=True, sampler=None,
                  batch_sampler=None, num_workers=0, collate_fn=lambda x: x[0],
                  pin_memory=True, drop_last=False, timeout=0,
                  worker_init_fn=None)
test_all_iter = iter(test_all_loader)

test_flood_data = flood_flood_test_data
test_flood_dataset = InMemoryDataset(test_flood_data, processTestIm)
test_flood_loader = torch.utils.data.DataLoader(test_flood_dataset, batch_size=1, shuffle=True, sampler=None,
                  batch_sampler=None, num_workers=0, collate_fn=lambda x: x[0],
                  pin_memory=True, drop_last=False, timeout=0,
                  worker_init_fn=None)
test_flood_iter = iter(test_flood_loader)

test_perm_data = perm_flood_test_data
test_perm_dataset = InMemoryDataset(test_perm_data, processTestIm)
test_perm_loader = torch.utils.data.DataLoader(test_perm_dataset, batch_size=1, shuffle=True, sampler=None,
                  batch_sampler=None, num_workers=0, collate_fn=lambda x: x[0],
                  pin_memory=True, drop_last=False, timeout=0,
                  worker_init_fn=None)
test_perm_iter = iter(test_perm_loader)

In [23]:
import torchvision.models as models
import torch.nn as nn
import torch

net = models.segmentation.fcn_resnet50(pretrained=False, num_classes=2, pretrained_backbone=False)
net.backbone.conv1 = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
def convertBNtoGN(module, num_groups=16):
  if isinstance(module, torch.nn.modules.batchnorm.BatchNorm2d):
    return nn.GroupNorm(num_groups, module.num_features,
                        eps=module.eps, affine=module.affine)
    if module.affine:
        mod.weight.data = module.weight.data.clone().detach()
        mod.bias.data = module.bias.data.clone().detach()

  for name, child in module.named_children():
      module.add_module(name, convertBNtoGN(child, num_groups=num_groups))

  return module

net = convertBNtoGN(net)

In [24]:
from tqdm import tqdm_notebook as tqdm
from IPython.display import clear_output

def computeIOU(output, target):
  output = torch.argmax(output, dim=1).flatten() 
  target = target.flatten()
  no_ignore = target.ne(255).cuda()
  output = output.masked_select(no_ignore)
  target = target.masked_select(no_ignore)
  intersection = torch.sum(output * target)
  union = torch.sum(target) + torch.sum(output) - intersection
  iou = (intersection + .0000001) / (union + .0000001)
  if iou != iou:
    print("failed, replacing with 0")
    iou = torch.tensor(0).float()
  return iou
  

def computeAccuracy(output, target):
  output = torch.argmax(output, dim=1).flatten() 
  target = target.flatten()
  no_ignore = target.ne(255).cuda()
  output = output.masked_select(no_ignore)
  target = target.masked_select(no_ignore)
  correct = torch.sum(output.eq(target))
  return correct.float() / len(target)
  
def truePositives(output, target):
  output = torch.argmax(output, dim=1).flatten() 
  target = target.flatten()
  no_ignore = target.ne(255).cuda()
  output = output.masked_select(no_ignore)
  target = target.masked_select(no_ignore)
  correct = torch.sum(output * target)
  return correct

def trueNegatives(output, target):
  output = torch.argmax(output, dim=1).flatten() 
  target = target.flatten()
  no_ignore = target.ne(255).cuda()
  output = output.masked_select(no_ignore)
  target = target.masked_select(no_ignore)
  output = (output == 0)
  target = (target == 0)
  correct = torch.sum(output * target)
  return correct

def falsePositives(output, target):
  output = torch.argmax(output, dim=1).flatten() 
  target = target.flatten()
  no_ignore = target.ne(255).cuda()
  output = output.masked_select(no_ignore)
  target = target.masked_select(no_ignore)
  output = (output == 1)
  target = (target == 0)
  correct = torch.sum(output * target)
  return correct

def falseNegatives(output, target):
  output = torch.argmax(output, dim=1).flatten() 
  target = target.flatten()
  no_ignore = target.ne(255).cuda()
  output = output.masked_select(no_ignore)
  target = target.masked_select(no_ignore)
  output = (output == 0)
  target = (target == 1)
  correct = torch.sum(output * target)
  return correct


def test_loop(test_data_loader, net):
  net = net.eval()
  net = net.cuda()
  count = 0
  iou = 0
  loss = 0
  accuracy = 0
  tp = 0
  fp = 0
  tn = 0
  fn = 0
  with torch.no_grad():
      for (images, labels) in tqdm(test_data_loader):
          net = net.cuda()
          outputs = net(images.cuda())
          valid_iou = computeIOU(outputs["out"], labels.cuda())
          iou += valid_iou
          accuracy += computeAccuracy(outputs["out"], labels.cuda())
          tp +=  truePositives(outputs["out"], labels.cuda())
          fp +=  falsePositives(outputs["out"], labels.cuda())
          tn +=  trueNegatives(outputs["out"], labels.cuda())
          fn +=  falseNegatives(outputs["out"], labels.cuda())
          count += 1

  iou = iou / count
  print("Test Mean IOU:", iou)
  print("Total IOU:", (tp.float() / (fn + fp + tp)))
  print("OMISSON:", fn.float() / (fn + tp))
  print("COMMISSON:", fp.float() / (tn + fp))
  print("Test Accuracy:", accuracy / count)

In [25]:
import torch 

test_loop(test_all_loader, net)
test_loop(test_flood_loader, net)
test_loop(test_perm_loader, net)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=90.0), HTML(value='')))

  img = torch.from_numpy(np.array(pic, np.float32, copy=False))



Test Mean IOU: tensor(0.0569, device='cuda:0')
Total IOU: tensor(0.0923, device='cuda:0')
OMISSON: tensor(0.8363, device='cuda:0')
COMMISSON: tensor(0.1175, device='cuda:0')
Test Accuracy: tensor(0.7855, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=90.0), HTML(value='')))


Test Mean IOU: tensor(0.0482, device='cuda:0')
Total IOU: tensor(0.0766, device='cuda:0')
OMISSON: tensor(0.8392, device='cuda:0')
COMMISSON: tensor(0.1175, device='cuda:0')
Test Accuracy: tensor(0.8101, device='cuda:0')


HBox(children=(FloatProgress(value=0.0, max=55.0), HTML(value='')))


Test Mean IOU: tensor(0.0524, device='cuda:0')
Total IOU: tensor(0.0832, device='cuda:0')
OMISSON: tensor(0.8329, device='cuda:0')
COMMISSON: tensor(0.1163, device='cuda:0')
Test Accuracy: tensor(0.8010, device='cuda:0')
