In [1]:
import numpy as np
import os
import copy
import itertools
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from main.head import ASPPHeadNode
from data.nyuv2_dataloader_adashare import NYU_v2
from data.pixel2pixel_loss import NYUCriterions
from data.pixel2pixel_metrics import NYUMetrics

# Deeplab Resnet - Independent Models

In [2]:
affine_par = True

def conv3x3(in_channels, out_channels, stride=1, dilation=1):
    "3x3 convolution with padding"

    kernel_size = np.asarray((3, 3))

    # Compute the size of the upsampled filter with
    # a specified dilation rate.
    upsampled_kernel_size = (kernel_size - 1) * (dilation - 1) + kernel_size

    # Determine the padding that is necessary for full padding,
    # meaning the output spatial size is equal to input spatial size
    full_padding = (upsampled_kernel_size - 1) // 2

    # Conv2d doesn't accept numpy arrays as arguments
    full_padding, kernel_size = tuple(full_padding), tuple(kernel_size)

    return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
                      padding=full_padding, dilation=dilation, bias=False)

# No projection: identity shortcut
# conv -> bn -> relu -> conv -> bn
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, dilation=1):
        super(BasicBlock, self).__init__() 
        self.conv1 = conv3x3(inplanes, planes, stride, dilation=dilation)
        self.bn1 = nn.BatchNorm2d(planes, affine = affine_par)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes, dilation=dilation)
        self.bn2 = nn.BatchNorm2d(planes, affine = affine_par)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        y = self.bn2(out)
        return y
    
# Add residual projection
class ResidualBlock(nn.Module):
    def __init__(self, block, ds):
        super(ResidualBlock, self).__init__() 
        self.block = block
        self.ds = ds
        
    def forward(self, x):
        residual = self.ds(x) if self.ds is not None else x
        x = F.relu(residual + self.block(x))
        return x

In [3]:
class Deeplab_ResNet_Backbone(nn.Module):
    def __init__(self, block, layers):
        super(Deeplab_ResNet_Backbone, self).__init__()
        
        self.inplanes = 64

        strides = [1, 2, 1, 1]
        dilations = [1, 1, 2, 4]
        filt_sizes = [64, 128, 256, 512]
        self.blocks = []
        self.layer_config = layers
        
        branch_cnt = 0
        seed = self._make_seed()
        self.__add_to_blocks(seed)
        branch_cnt += 1
        
        for segment, num_blocks in enumerate(self.layer_config):
            filt_size, num_blocks, stride, dilation = filt_sizes[segment],layers[segment],strides[segment],dilations[segment]
            for b_idx in range(num_blocks):
                blocklayer = self._make_blocklayer(b_idx, block, filt_size, stride=stride, dilation=dilation)
                self.__add_to_blocks(blocklayer)

        self.blocks = nn.ModuleList(self.blocks)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, 0.01)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_seed(self):
        seed = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
                             nn.BatchNorm2d(64, affine=affine_par),
                             nn.ReLU(inplace=True),
                             nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)) 
        return seed
    
    def _make_downsample(self, block, inplanes, planes, stride=1, dilation=1):
        downsample = None
        if stride != 1 or inplanes != planes * block.expansion or dilation == 2 or dilation == 4:
            downsample = nn.Sequential(nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
                                       nn.BatchNorm2d(planes * block.expansion, affine = affine_par))
        return downsample
    
    def _make_blocklayer(self, block_idx, block, planes, stride=1, dilation=1):
        ds = None
        if block_idx == 0:
            basic_block = block(self.inplanes, planes, stride, dilation=dilation)
            ds = self._make_downsample(block, self.inplanes, planes, stride=stride, dilation=dilation)
            self.inplanes = planes * block.expansion
        else:
            basic_block = block(self.inplanes, planes, dilation=dilation)
            
        blocklayer = ResidualBlock(basic_block, ds)
        return blocklayer
    
    def __add_to_blocks(self, block):
        self.blocks.append(block)
        return

    def forward(self, x): 
        for block in self.blocks:
            x = block(x)
        return x

In [4]:
class Deeplab_ASPP(nn.Module):
    def __init__(self, cls_num):
        super(Deeplab_ASPP, self).__init__()
        self.branch = 100
        self.backbone = Deeplab_ResNet_Backbone(BasicBlock, [3, 4, 6, 3])
        self.heads = ASPPHeadNode(512, cls_num)
        
    def forward(self, x):
        feature = self.backbone(x)
        output = self.heads(feature)
        return output

# Train on NYUv2 for one task

In [5]:
class Trainer():
    def __init__(self, model, task, train_dataloader, val_dataloader, criterion, metric, 
                 lr=0.001, decay_lr_freq=4000, decay_lr_rate=0.5,
                 print_iters=50, val_iters=200, save_iters=200):
        super(Trainer, self).__init__()
        self.model = model
        self.task = task
        self.startIter = 0
        self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr, betas=(0.5, 0.999), weight_decay=0.0001)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=decay_lr_freq, gamma=decay_lr_rate)
        
        self.train_dataloader = train_dataloader
        self.train_iter = iter(self.train_dataloader)
        self.val_dataloader = val_dataloader
        self.criterion = criterion
        self.metric = metric
        
        self.loss_list = {}
        self.set_train_loss()
        
        self.print_iters = print_iters
        self.val_iters = val_iters
        self.save_iters = save_iters
    
    def train(self, iters, savePath=None, reload=None):
        self.model.train()
        if reload is not None and savePath is not None:
            self.load_model(savePath, reload)

        for i in range(self.startIter, iters):
            self.train_step()

            if (i+1) % self.print_iters == 0:
                self.print_train_loss(i)
                self.set_train_loss()
            if (i+1) % self.val_iters == 0:
                self.validate(i)
            if (i+1) % self.save_iters == 0:
                if savePath is not None:
                    self.save_model(i, savePath)
            
        # Reset loss list and the data iters
        self.set_train_loss()
        return
    
    def train_step(self):
        self.model.train()
        try:
            data = next(self.train_iter)
        except StopIteration:
            self.train_iter = iter(self.train_dataloader)
            data = next(self.train_iter)
            
        x = data['input'].cuda()
        self.optimizer.zero_grad()
        output = self.model(x)
        
        loss = 0
        y = data[self.task].cuda()
        if self.task + '_mask' in data:
            loss = self.criterion(output, y, data[self.task + '_mask'].cuda())
        else:
            loss = self.criterion(output, y)

        self.loss_list.append(loss.item())
        
        loss.backward()
        self.optimizer.step()
        
        if self.scheduler is not None:
            self.scheduler.step()
        return
    
    def validate(self, it):
        self.model.eval()
        loss_list = {}
        loss_list = []
        
        for i, data in enumerate(self.val_dataloader):
            x = data['input'].cuda()
            output = self.model(x)

            y = data[self.task].cuda()
            if self.task + '_mask' in data:
                tloss = self.criterion(output, y, data[self.task + '_mask'].cuda())
                self.metric(output, y, data[self.task + '_mask'].cuda())
            else:
                tloss = self.criterion(output, y)
                self.metric(output, y)
            loss_list.append(tloss.item())

        val_results = self.metric.val_metrics()
        print('[Iter {} Task {}] Val Loss: {:.4f}'.format((it+1), self.task[:4], np.mean(loss_list)), flush=True)
        print(val_results, flush=True)
        print('======================================================================', flush=True)
        return
    
    # helper functions
    def set_train_loss(self):
        self.loss_list = []
        return
    
    def load_model(self, savePath, reload):
        state = torch.load(savePath + reload)
        self.startIter = state['iter'] + 1
        self.model.load_state_dict(state['state_dict'])
        self.optimizer.load_state_dict(state['optimizer'])
        self.scheduler.load_state_dict(state['scheduler'])
        return
    
    def save_model(self, it, savePath):
        state = {'iter': it,
                'state_dict': self.model.state_dict(),
                'optimizer': self.optimizer.state_dict(),
                'scheduler': self.scheduler.state_dict()}
        torch.save(state, savePath + self.task + '.model')
        return
    
    def print_train_loss(self, it):
        if self.loss_list:
            avg_loss = np.mean(self.loss_list)
        else:
            return
        print('[Iter {} Task {}] Train Loss: {:.4f}'.format((it+1), self.task[:4], avg_loss), flush=True)
        return

In [6]:
dataroot = '/mnt/nfs/work1/huiguan/lijunzhang/policymtl/data/NYUv2/'
tasks = ('segment_semantic','normal','depth_zbuffer')
task_cls_num = {'segment_semantic': 40, 'normal':3, 'depth_zbuffer': 1}


criterionDict = {}
metricDict = {}
clsNum = {}
task = ['segment_semantic']
dataset = NYU_v2(dataroot, 'train', crop_h=321, crop_w=321)
trainDataloader = DataLoader(dataset, 16, shuffle=True)

dataset = NYU_v2(dataroot, 'test', crop_h=321, crop_w=321)
valDataloader = DataLoader(dataset, 16, shuffle=True)
criterion = NYUCriterions(task[0])
metric = NYUMetrics(task[0])
clsNum = task_cls_num[task[0]]

In [7]:
model = Deeplab_ASPP(clsNum)
model = model.cuda()

In [None]:
# Ind. Model, Adam, task = Seg
checkpoint = '/mnt/nfs/work1/huiguan/lijunzhang/multibranch/checkpoint/NYUv2/exp/'

trainer = Trainer(model, task[0], trainDataloader, valDataloader, criterion, metric)
trainer.train(20000, checkpoint)

[Iter 50 Task segm] Train Loss: 3.4750
[Iter 100 Task segm] Train Loss: 2.8563
[Iter 150 Task segm] Train Loss: 2.7555
[Iter 200 Task segm] Train Loss: 2.7052
[Iter 200 Task segm] Val Loss: 2.7961
{'mIoU': 0.1649, 'Pixel Acc': 0.3115}
[Iter 250 Task segm] Train Loss: 2.6486
[Iter 300 Task segm] Train Loss: 2.6110
[Iter 350 Task segm] Train Loss: 2.6516
[Iter 400 Task segm] Train Loss: 2.6310
[Iter 400 Task segm] Val Loss: 2.6000
{'mIoU': 0.1114, 'Pixel Acc': 0.3518}
[Iter 450 Task segm] Train Loss: 2.5912
[Iter 500 Task segm] Train Loss: 2.5425
[Iter 550 Task segm] Train Loss: 2.5495
[Iter 600 Task segm] Train Loss: 2.5162
[Iter 600 Task segm] Val Loss: 2.6940
{'mIoU': 0.1576, 'Pixel Acc': 0.3201}
[Iter 650 Task segm] Train Loss: 2.5236
[Iter 700 Task segm] Train Loss: 2.4928
[Iter 750 Task segm] Train Loss: 2.5000
[Iter 800 Task segm] Train Loss: 2.4932
[Iter 800 Task segm] Val Loss: 2.4027
{'mIoU': 0.1577, 'Pixel Acc': 0.3636}
[Iter 850 Task segm] Train Loss: 2.4692
[Iter 900 Task se

[Iter 5400 Task segm] Train Loss: 1.2827
[Iter 5400 Task segm] Val Loss: 1.7155
{'mIoU': 0.1765, 'Pixel Acc': 0.5047}
[Iter 5450 Task segm] Train Loss: 1.2524
[Iter 5500 Task segm] Train Loss: 1.2277
[Iter 5550 Task segm] Train Loss: 1.2514
[Iter 5600 Task segm] Train Loss: 1.2607
[Iter 5600 Task segm] Val Loss: 1.6373
{'mIoU': 0.21, 'Pixel Acc': 0.5268}
[Iter 5650 Task segm] Train Loss: 1.2171
[Iter 5700 Task segm] Train Loss: 1.2621
[Iter 5750 Task segm] Train Loss: 1.2171
[Iter 5800 Task segm] Train Loss: 1.2169
[Iter 5800 Task segm] Val Loss: 1.6236
{'mIoU': 0.2091, 'Pixel Acc': 0.5292}
[Iter 5850 Task segm] Train Loss: 1.2013
[Iter 5900 Task segm] Train Loss: 1.2036
[Iter 5950 Task segm] Train Loss: 1.2018
[Iter 6000 Task segm] Train Loss: 1.1582
[Iter 6000 Task segm] Val Loss: 1.6139
{'mIoU': 0.2037, 'Pixel Acc': 0.528}
[Iter 6050 Task segm] Train Loss: 1.1488
[Iter 6100 Task segm] Train Loss: 1.1792
[Iter 6150 Task segm] Train Loss: 1.1426
[Iter 6200 Task segm] Train Loss: 1.136

In [8]:
# Ind. Model, Adam, task = Seg, reload
checkpoint = '/mnt/nfs/work1/huiguan/lijunzhang/multibranch/checkpoint/NYUv2/exp/'

trainer = Trainer(model, task[0], trainDataloader, valDataloader, criterion, metric)
trainer.train(20000, checkpoint, reload='segment_semantic.model')

[Iter 10450 Task segm] Train Loss: 0.6542
[Iter 10500 Task segm] Train Loss: 0.6175
[Iter 10550 Task segm] Train Loss: 0.6675
[Iter 10600 Task segm] Train Loss: 0.6615
[Iter 10600 Task segm] Val Loss: 1.5958
{'mIoU': 0.2216, 'Pixel Acc': 0.5542}
[Iter 10650 Task segm] Train Loss: 0.6344
[Iter 10700 Task segm] Train Loss: 0.6211
[Iter 10750 Task segm] Train Loss: 0.6460
[Iter 10800 Task segm] Train Loss: 0.6333
[Iter 10800 Task segm] Val Loss: 1.5543
{'mIoU': 0.2439, 'Pixel Acc': 0.5665}
[Iter 10850 Task segm] Train Loss: 0.6476
[Iter 10900 Task segm] Train Loss: 0.6531
[Iter 10950 Task segm] Train Loss: 0.6705
[Iter 11000 Task segm] Train Loss: 0.6323
[Iter 11000 Task segm] Val Loss: 1.5706
{'mIoU': 0.2416, 'Pixel Acc': 0.5617}
[Iter 11050 Task segm] Train Loss: 0.6205
[Iter 11100 Task segm] Train Loss: 0.6342
[Iter 11150 Task segm] Train Loss: 0.6079
[Iter 11200 Task segm] Train Loss: 0.6386
[Iter 11200 Task segm] Val Loss: 1.5519
{'mIoU': 0.2357, 'Pixel Acc': 0.5626}
[Iter 11250 Task

[Iter 15650 Task segm] Train Loss: 0.4414
[Iter 15700 Task segm] Train Loss: 0.4217
[Iter 15750 Task segm] Train Loss: 0.4452
[Iter 15800 Task segm] Train Loss: 0.4212
[Iter 15800 Task segm] Val Loss: 1.6450
{'mIoU': 0.2452, 'Pixel Acc': 0.5702}


KeyboardInterrupt: 

In [8]:
# Ind. Model, Adam, task = Seg, reload
checkpoint = '/mnt/nfs/work1/huiguan/lijunzhang/multibranch/checkpoint/NYUv2/exp/'

trainer = Trainer(model, task[0], trainDataloader, valDataloader, criterion, metric)
trainer.train(20000, checkpoint, reload='segment_semantic.model')

[Iter 15850 Task segm] Train Loss: 0.4104
[Iter 15900 Task segm] Train Loss: 0.4212
[Iter 15950 Task segm] Train Loss: 0.4096
[Iter 16000 Task segm] Train Loss: 0.4025
[Iter 16000 Task segm] Val Loss: 1.6363
{'mIoU': 0.2494, 'Pixel Acc': 0.5732}
[Iter 16050 Task segm] Train Loss: 0.3969
[Iter 16100 Task segm] Train Loss: 0.3784
[Iter 16150 Task segm] Train Loss: 0.3863
[Iter 16200 Task segm] Train Loss: 0.3849
[Iter 16200 Task segm] Val Loss: 1.6353
{'mIoU': 0.251, 'Pixel Acc': 0.5779}
[Iter 16250 Task segm] Train Loss: 0.4020
[Iter 16300 Task segm] Train Loss: 0.3770
[Iter 16350 Task segm] Train Loss: 0.3888
[Iter 16400 Task segm] Train Loss: 0.3816
[Iter 16400 Task segm] Val Loss: 1.6435
{'mIoU': 0.2529, 'Pixel Acc': 0.5773}
[Iter 16450 Task segm] Train Loss: 0.3742
[Iter 16500 Task segm] Train Loss: 0.3705
[Iter 16550 Task segm] Train Loss: 0.3665
[Iter 16600 Task segm] Train Loss: 0.3666
[Iter 16600 Task segm] Val Loss: 1.6503
{'mIoU': 0.2513, 'Pixel Acc': 0.5781}
[Iter 16650 Task 

In [8]:
model

Deeplab_ASPP(
  (backbone): Deeplab_ResNet_Backbone(
    (blocks): ModuleList(
      (0): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=True)
      )
      (1): ResidualBlock(
        (block): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): ResidualBlock(
        (block): BasicBlock(
          (conv1): Conv2d(64, 64, k

# Plot Backbone

In [14]:
from graphviz import Digraph
from torch.autograd import Variable

In [15]:
def make_dot(var, params=None):
    if params is not None:
        assert isinstance(params.values()[0], Variable)
        param_map = {id(v): k for k, v in params.items()}

    node_attr = dict(style="filled", shape="box", align="left", fontsize="12", ranksep="0.1", height="0.2")
    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
    seen = set()

    def size_to_str(size):
        return "(" + (", ").join(["%d" % v for v in size]) + ")"

    def add_nodes(var):
        if var not in seen:
            if torch.is_tensor(var):
                dot.node(str(id(var)), size_to_str(var.size()), fillcolor="orange")
                dot.edge(str(id(var.grad_fn)), str(id(var)))
                var = var.grad_fn
            if hasattr(var, "variable"):
                u = var.variable
                name = param_map[id(u)] if params is not None else ""
                node_name = "%s\n %s" % (name, size_to_str(u.size()))
#                 print(node_name)
                
                dot.node(str(id(var)), node_name, fillcolor="lightblue")
            else:
                print(type(var).__name__)
                
                dot.node(str(id(var)), str(type(var).__name__))
            seen.add(var)
            if hasattr(var, "next_functions"):
                for u in var.next_functions:
                    if u[0] is not None:
                        dot.edge(str(id(u[0])), str(id(var)))
                        add_nodes(u[0])
            if hasattr(var, "saved_tensors"):
                for t in var.saved_tensors:
                    dot.edge(str(id(t)), str(id(var)))
                    add_nodes(t)

    add_nodes(var)
    return dot

In [16]:
inputs = torch.randn(1, 3, 224, 224)
y = backbone(inputs)
g = make_dot(y)
g.view()

ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
NativeBatchNormBackward
MkldnnConvolutionBackward
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
NativeBatchNormBackward
MkldnnConvolutionBackward
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
NativeBatchNormBackward
MkldnnConvolutionBackward
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
ReluBackward0
AddBackward0
MaxPool2DWithIndicesBackward
ReluBackward1
NativeBatchNormBackward
MkldnnConvolutionBackward
NativeBatchNormBackward
MkldnnConvolutionBackward
ReluBackward1
NativeBatchNormBackward
MkldnnConvolutionBackward
NativeBatchNormBackward
MkldnnConvolutionBackward
ReluBackward1
NativeBatchNormBackward
MkldnnConvolutionBackward
NativeBatchNormBackward
MkldnnConvolutionBackward
ReluBackward1
NativeBatchNormBackward
MkldnnCon

'Digraph.gv.pdf'