In [1]:
import random

import numpy as np
import pandas as pd

import os
from os import listdir
from os.path import join, splitext

import cv2
from PIL import Image

import torch
from torch import nn
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, ToTensor, Resize, ToPILImage

import matplotlib.pyplot as plt

SRGAN Model Libraries

In [2]:
from srgan.model import Generator, Discriminator
from srgan.loss import GeneratorLoss

Iterative Crowd Counting Model Libraries

In [3]:
from icc.data_loaderB import ImageDataLoader
from icc.model_ic_CNN import modelicCNN, retrain_icCNN
from icc.evaluate_icCNN import evaluate_model
from icc import network

Configurations and Data Loader

In [4]:
# GPU to run on
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [5]:
# Fixing random seed
rand_seed = 26700

np.random.seed(rand_seed)
torch.manual_seed(rand_seed)
torch.cuda.manual_seed(rand_seed)

In [6]:
train_path = 'data/train/images'
train_gt_path = 'data/train/ground_truth_csv'

val_path = 'data/val/images'
val_gt_path = 'data/val/ground_truth_csv'

output_dir = './logs/model_icCNN/'

method = 'mcnn'
dataset_name = 'shtechA'

In [7]:
train_data_loader = ImageDataLoader(train_path, 
                                    train_gt_path,
                                    shuffle=True,
                                    gt_downsample=False,
                                    pre_load=True,
                                    sr_mode=True)

val_data_loader = ImageDataLoader(val_path, 
                                  val_gt_path,
                                  shuffle=True,
                                  gt_downsample=False,
                                  pre_load=True,
                                  sr_mode=True)

Pre-loading the data. This may take a while...


  den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).as_matrix()


Loaded  100 / 300 files
Loaded  200 / 300 files
Loaded  300 / 300 files
Completed Loading  300 files
Pre-loading the data. This may take a while...
Loaded  100 / 182 files
Completed Loading  182 files


Hyperparameters to tune

In [8]:
BATCH_SIZE = 5
alpha = 2
MAX_EPOCH = 1

Utils to read and transform data

In [9]:
def RandomCrop(Input, Density, h, w, th, tw):
    x1 = random.randint(0, h - th)
    y1 = random.randint(0, w - tw)

    Input = Input[x1:x1 + th, y1:y1 + tw]
    Density = Density[x1:x1 + th, y1:y1 + tw]

    return Input, Density

In [10]:
def LowerResolution(img):
    y, x = img.shape[0], img.shape[1]
    fx, fy = int(x // alpha), int(y // alpha)
    
    img_small = cv2.resize(img, (fx, fy), interpolation=cv2.INTER_CUBIC)
    lr_img = cv2.resize(img_small, (x, y), interpolation=cv2.INTER_CUBIC)
        
    return lr_img

Initializing SRGAN parameters

In [11]:
# Initializing model
netG = Generator()
netD = Discriminator()

# Initializing optimizer
optimizerG = optim.Adam(netG.parameters())
optimizerD = optim.Adam(netD.parameters())

# Initializing loss
gen_criterion = GeneratorLoss()

if torch.cuda.is_available():
    netG.cuda()
    netD.cuda()
    gen_criterion.cuda()
    
netG.train()
netD.train()

Discriminator(
  (net): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2)
    (11): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): LeakyReLU(negative_slope=0.2)
    (14): Conv2d(256, 256, 

Initializing ICC parameters

In [12]:
# Initializing model
net = retrain_icCNN()

# Initializing optimizer
optimizerN = torch.optim.SGD(net.parameters(), lr=0.00001, weight_decay=0.00001, momentum = 0.9)

# Initializing loss
net_loss_fn = nn.MSELoss()

if torch.cuda.is_available():
    net.cuda()

net.train()

retrain_icCNN(
  (netDME): DME2(
    (branch3): Sequential(
      (0): Conv2d(
        (conv): Conv2d(1, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace)
      )
      (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): Conv2d(
        (conv): Conv2d(16, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (relu): ReLU(inplace)
      )
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(
        (conv): Conv2d(24, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace)
      )
      (5): Conv2d(
        (conv): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace)
      )
      (6): Conv2d(
        (conv): Conv2d(48, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace)
      )
    )
  )
  (netFCNN): FCNN304(
    (branch1): Sequential(
      (0): Conv

In [22]:
def get_training_batch(blob):
    img = blob['data']
    gt_density = blob['gt_density']

    h = img.shape[0]
    w = img.shape[1]
    
    th = int(h/3.0 - ((h/3.0) % 4))
    tw = int(w/3.0 - ((w/3.0) % 4))

    th_small = th//4
    tw_small = tw//4

    Input_HR = torch.zeros(BATCH_SIZE, 3, th, tw)
    Input_LR = torch.zeros(BATCH_SIZE, 3, th, tw)
    GT_Density = torch.zeros(BATCH_SIZE, 1, th, tw)
    GT_Density_Small = torch.zeros(BATCH_SIZE, 1, th_small, tw_small)

    for cur_step in range(0, BATCH_SIZE):
        img_crop, gt_density_crop = RandomCrop(img, gt_density, h, w, th, tw)

        lr_img = LowerResolution(img_crop)

        gt_density_crop_small = cv2.resize(gt_density_crop, (tw_small, th_small))
        gt_density_crop_small = gt_density_crop_small * ((tw*th)/(tw_small*th_small))

        Input_HR[cur_step] = ToTensor()(img_crop)
        Input_LR[cur_step] = ToTensor()(lr_img)
        GT_Density[cur_step] = torch.from_numpy(gt_density_crop)
        GT_Density_Small[cur_step] = torch.from_numpy(gt_density_crop_small)
        
    return Input_HR, Input_LR, GT_Density, GT_Density_Small

Training Networks

In [23]:
best_maeHR = float('inf') #sys.maxint
best_epochHR = 1
best_maeLR = float('inf') #sys.maxint
best_epochLR = 1

In [25]:
for epoch in range(1, MAX_EPOCH+1):
    
    count = 1
    for blob in train_data_loader:
        
        Input_HR, Input_LR, GT_Density, GT_Density_Small = get_training_batch(blob)
        
        if torch.cuda.is_available():
            Input_HR = Input_HR.cuda()
            Input_LR = Input_LR.cuda()
            GT_Density = GT_Density.cuda()
            GT_Density_Small = GT_Density_Small.cuda()
        
        Input_SR = netG(Input_LR)

        Input_SR_Gray = torch.zeros(Input_SR.size()[0], 1, Input_SR.size()[2], Input_SR.size()[3])
        Input_SR_Gray[:,0,:,:] = (0.2126 * Input_SR[:,0,:,:] + 0.7152 * Input_SR[:,1,:,:] + 0.0722 * Input_SR[:,2,:,:])

        Density_Small, Density = net(Input_SR_Gray)
        
        # Optimizing loss for icc net
        n_loss = 1000.0 * net_loss_fn(GT_Density, Density) + 0.01 * net_loss_fn(GT_Density_Small, Density_Small)
        n_loss.backward(retain_graph=True)
        optimizerN.step()
        
        # Optimizing loss for Discriminator
        netD.zero_grad()
        real_out = netD(Input_HR).mean()
        fake_out = netD(Input_SR).mean()
        d_loss = 1 - real_out + fake_out
        d_loss.backward(retain_graph=True)
        optimizerD.step()

        # Optimizing loss for Generator
        netG.zero_grad()
        g_loss = gen_criterion(fake_out, Input_SR, Input_HR, GT_Density, Density)
        g_loss.backward()
        optimizerG.step()
        
        print("Training epoch {}, Batch_Num {}/{}, G_Loss {}, D_Loss {}, N_Loss {}".format(
            epoch, count, train_data_loader.get_num_samples(), g_loss, d_loss, n_loss.item()))
        count += 1
    
    if (epoch % 5 == 0  ):
        save_name = os.path.join(output_dir, '{}_{}_icCNN.h5'.format(method, dataset_name))
        network.save_net(save_name, net)
        maeLR, mseLR, maeHR, mseHR = evaluate_model(save_name, val_data_loader)
        if maeHR < best_maeHR:
            best_maeHR = maeHR
            best_mseHR = mseHR
            best_maeLR = maeLR
            best_mseLR = mseLR
            best_epochHR = epoch
        print("EPOCH: %d, MAE_LR: %.1f, MSE_LR: %0.1f, MAE_HR: %.1f, MSE_HR: %0.1f" % (epoch, maeLR, mseLR, maeHR, mseHR))
        print("BEST MAE_LR: %0.1f, BEST MSE_LR: %0.1f" % (best_maeLR, best_mseLR))
        print("BEST MAE_HR: %0.1f, BEST MSE_HR: %0.1f, BEST Epoch: %4.2f" % (best_maeHR, best_mseHR, best_epochHR))        



Training epoch 1, Batch_Num 1/300, G_Loss 1423.5343017578125, D_Loss 1.0001497268676758, N_Loss 985.6886596679688
Training epoch 1, Batch_Num 2/300, G_Loss 1582.117431640625, D_Loss 1.004669427871704, N_Loss 611.9422607421875


KeyboardInterrupt: 

Questions?

requires_grad = True