In [None]:
%matplotlib inline

import matplotlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import random
import math
import logging
import os
import numpy as np
import PIL
import cv2
import json
import pickle

from io import BytesIO
from PIL import Image
from IPython.display import HTML

import torch
import torch.utils.data as data
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.optim.optimizer import Optimizer, required
from torch.autograd import Variable
from torch import Tensor
from torch import nn
from torch.nn import Parameter

from nets.model_main import ft_net
from utils import model_utils


config = json.load(open("params.json", "r"))
os.environ["CUDA_VISIBLE_DEVICES"] = str(config["parallels"])[1:-1]
print "Currently using GPU", str(config["parallels"])[1:-1]

## SpectralNorm
https://github.com/christiancosgrove/pytorch-spectral-normalization-gan

In [None]:
def l2normalize(v, eps=1e-12):
    return v / (v.norm() + eps)

class SpectralNorm(nn.Module):
    def __init__(self, module, name='weight', power_iterations=1):
        super(SpectralNorm, self).__init__()
        self.module = module
        self.name = name
        self.power_iterations = power_iterations
        if not self._made_params():
            self._make_params()

    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))

    def _made_params(self):
        try:
            u = getattr(self.module, self.name + "_u")
            v = getattr(self.module, self.name + "_v")
            w = getattr(self.module, self.name + "_bar")
            return True
        except AttributeError:
            return False

    def _make_params(self):
        w = getattr(self.module, self.name)

        height = w.data.shape[0]
        width = w.view(height, -1).data.shape[1]

        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
        u.data = l2normalize(u.data)
        v.data = l2normalize(v.data)
        w_bar = Parameter(w.data)

        del self.module._parameters[self.name]

        self.module.register_parameter(self.name + "_u", u)
        self.module.register_parameter(self.name + "_v", v)
        self.module.register_parameter(self.name + "_bar", w_bar)

    def forward(self, *args):
        self._update_u_v()
        return self.module.forward(*args)

## Build dataloader for GAN
- Every data iteration return in the format:  **{img, label, feature, mask}**

- You need to extract the feature with provided model (for further usage, the features will be saved!)

- You need to provide the file containing all the masks

In [None]:
# basic info 

config["batches_dir"] = "/world/data-gpu-94/sysu-reid/market_dataset"
label_dirs = [p for p in os.listdir(config["batches_dir"])
              if os.path.isdir(os.path.join(config["batches_dir"], p))]
config["num_labels"] = len(label_dirs)

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

def accimage_loader(path):
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)

def default_loader(path):
    from torchvision import get_image_backend
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)
    
def has_file_allowed_extension(filename, extensions):
    """Checks if a file is an allowed extension.

    Args:
        filename (string): path to a file

    Returns:
        bool: True if the filename ends with a known image extension
    """
    filename_lower = filename.lower()
    return any(filename_lower.endswith(ext) for ext in extensions)

## Funtions to build pytorch network and extract features

In [None]:
def build_net(config, model_path):
    net = ft_net(config,
                 model_name=config["model_params"]["model"],
                 feature_dim=config["model_params"].get("feature_dim", 256),
                 pcb_n_parts=config["model_params"].get("pcb_n_parts", 0),
                 is_training=False)
    net.train(False)
    net = net.cuda()
    net.eval()  
    
    # Restore pretrain model
    model_utils.restore_model(model_path, net)
    return net

def make_dataset(config, loader, extensions, model_path):
    net = build_net(config, model_path)
    path_feature = {}
    dir = config["batches_dir"]
    dir_list = os.listdir(dir)
    input_size = (config["img_w"], config["img_h"])
    for target in sorted(dir_list):
        # target is the class folder name (i.e. 999, 2, 1000)
        d = os.path.join(dir, target)

        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            images = []
            
            # first extract features
            for fname in sorted(fnames):
                if has_file_allowed_extension(fname, extensions):
                    path = os.path.join(root, fname)
                    path_feature[path] = {}
                    image = cv2.imread(path)
                    image = cv2.resize(image, input_size)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)     
                    images.append(image)

            images = np.asarray(images, dtype=np.float32)         
            images = ((images / 255.0) - [0.485, 0.456, 0.406]) \
                    / [0.229, 0.224, 0.225]
            images = np.transpose(images, (0, 3, 1, 2))
            images = images.astype(np.float32)
            images = torch.from_numpy(images)

            images = images.cuda()    
            with torch.no_grad():
                f = net(images)  
                f = f.cpu().numpy()
                f = f / np.linalg.norm(f, axis=1,keepdims=True)
            
            # make path_feature pair (features are not normalized)
            for idx, fname in enumerate(sorted(fnames)):
                if has_file_allowed_extension(fname, extensions):
                    path = os.path.join(root, fname)
                    path_feature[path]["feature"] = f[idx]
    net = None
    return path_feature

In [None]:
# prepare the features and mask to build dataloader

IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']

model_path = "/world/data-gpu-57/xulie/pytorch-output/market_pcb_1536_test_kaimin/model_best.pth"
path_feature_file = "/world/data-gpu-94/sysu-reid/market_dataset/path_feature_1536.pkl"
path_mask_file = "/world/data-gpu-94/sysu-reid/market_dataset/path_mask.pkl"

if os.path.exists(path_feature_file):
    with open(path_feature_file, 'r') as f:
        path_feature = pickle.load(f)
else:
    path_feature = make_dataset(config, default_loader, IMG_EXTENSIONS, model_path)
    with open(path_feature_file, 'w') as f:
        pickle.dump(path_feature, f)
        
with open(path_mask_file, 'r') as f:
    path_mask = pickle.load(f)
    

# !!!!
# this img size and batch_size is used for GAN, which is different from the reID model
config["img_w"] = 128
config["img_h"] = 128
config["batch_size"] = config["batch_sampling_params"]["batch_size"] * len(config["parallels"]) 

for key, value in path_feature.iteritems():
    mask = np.array(cv2.resize(path_mask[key]['mask'], (config["img_w"], config["img_h"])), dtype=np.float32)
    mask = np.expand_dims(mask, 0)
    path_feature[key]['mask'] = mask
    
from input_pipeline.image_data_reader import init_data_loader 
data_loader = init_data_loader(config, 4, path_feature)


# Plot some training images
real_batch = next(iter(data_loader))

plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0][:64], padding=2, normalize=True).cpu(),(1,2,0)))

plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Mask")
plt.imshow(np.transpose(vutils.make_grid(real_batch[3][:64], padding=2, normalize=True).cpu(),(1,2,0)))


## GAN training
Please refer https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html for more information

In [None]:
workers = 4
batch_size = config["batch_size"]
image_size = config["img_w"]

# Number of channels in the training images. For color images this is 3
nc = 3

# Size of z latent vector (i.e. size of generator input)
nz = config["model_params"]["feature_dim"]

# Size of feature maps in generator (depth)
ngf = 64

# Size of feature maps in discriminator (depth)
ndf = 32

num_epochs = 50
lr = 0.0002

# Beta1 hyperparam for Adam optimizers
beta1 = 0.5

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

# custom weights initialization called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
# Generator Code
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        input = input.unsqueeze(-1).unsqueeze(-1)
        return self.main(input)

netG = Generator()
netG = netG.cuda()
netG = nn.DataParallel(netG)
netG.apply(weights_init)

print (real_batch[2].shape)
print netG(real_batch[2].cuda()).shape


In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            SpectralNorm(nn.Conv2d(nc + 1, ndf, 4, 2, 1, bias=False)),
            nn.BatchNorm2d(ndf),
            nn.LeakyReLU(0.2, inplace=True),
            
            # state size. (ndf) x 32 x 32
            SpectralNorm(nn.Conv2d(ndf, ndf, 4, 2, 1, bias=False)),
            nn.BatchNorm2d(ndf),
            nn.LeakyReLU(0.2, inplace=True),
            
            # state size. (ndf) x 32 x 32
            SpectralNorm(nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False)),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            SpectralNorm(nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False)),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            SpectralNorm(nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False)),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

    
netD = Discriminator().cuda()          
netD = nn.DataParallel(netD)

print (real_batch[0].shape)
print (real_batch[3].shape)
real = torch.cat([real_batch[0], real_batch[3]], 1)
print (real.shape)
print netD(real.cuda()).shape


In [None]:
# Initialize BCELoss function
criterion = nn.BCELoss()
l1_loss = nn.L1Loss()

#fixed_noise = torch.randn(64, nz, device=device)
fixed_noise = real_batch[2].cuda()
print fixed_noise.shape

# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0

# Setup Adam optimizers for both G and D
#optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerD = torch.optim.Adam(filter(lambda p: p.requires_grad, netD.parameters()), lr=lr, betas=(beta1,0.999))
optimizerG = torch.optim.Adam(filter(lambda p: p.requires_grad, netG.parameters()), lr=lr, betas=(beta1,0.999))

## Main training loop
The L1 loss should be lower than 1.5(10x) to generate good-looking images

In [None]:
# Training Loop
save_path = "img_list_test.pkl"
torch.cuda.empty_cache()
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
D_G_z2 = 0
D_G_z1 = 0
epoch = 0
err_l1 = 0
print("Starting Training Loop...")
update_g = False

# For each epoch
for epoch in range(1000):
    for i, data in enumerate(data_loader, 0):
        # control the D's update frequency
        update_d = (random.random() > 0.0)
        
        # Generate batch of latent vectors
        noise = (data[2] + torch.randn_like(data[2]) * 0.01).cuda()
        
        # concate mask with training imgs
        real_cpu = data[0].cuda()
        real_cpu_m = torch.cat([data[0].cuda(), data[3].cuda()], 1)

        b_size = real_cpu_m.size(0)
        #noise = torch.randn(b_size, nz,  device=device)
        label = torch.full((b_size,), real_label).cuda()
        
        netD.zero_grad()
        # Forward pass real batch through D
        output = netD(real_cpu_m).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        D_x = output.mean().item()
        if update_d:
            errD_real.backward()
        
        ## Train with all-fake batch
        # Generate fake image batch with G
        fake = netG(noise)      
        fake_m = torch.cat([fake.cuda(), data[3].cuda()], 1)
        
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = netD(fake_m.detach()).view(-1)
        # Calculate D's loss on the all-fake batch

        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch
        errD = errD_real + errD_fake
        D_G_z1 = output.mean().item()
        if update_d:
            errD_fake.backward()
        # Update D
        if update_d:    
            optimizerD.step()

        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake_m).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        err_l1 = l1_loss(fake * data[3].cuda(), real_cpu * data[3].cuda()) * 10
        # Calculate gradients for G
        errG.backward(retain_graph=True)
        
        #err_l1.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()
        
        # Output training stats
        if i % 30 == 0:
        #if D_G_z1 != D_G_z2:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f L1: %4f'
                  % (epoch, num_epochs, i, len(data_loader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2, err_l1))
        
        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        
        # Check how the generator is doing by saving G's output on fixed_noise
        if (i == len(data_loader)-1) or (iters % 200 == 0 and iters > 0):
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True).numpy())
            img_list = img_list[-3:]
            with open(save_path, 'w') as f:
                pickle.dump(img_list[-3:], f)
            
        iters += 1

In [None]:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()

## Show results
You can write this code in another file that you don't need to stop training to plot the results

In [None]:
fig = plt.figure(figsize=(20,20))
plt.axis("off")
#plt.imshow(np.transpose(img_list[2],(1,2,0)), animated=True)
ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list[-20:]]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)
HTML(ani.to_jshtml())

In [None]:
# savemodel
save_path = "256_1536_with_mask"
state_dict_g = netG.state_dict()
state_dict_d = netD.state_dict()

netD.eval()
netG.eval()

torch.save(state_dict_g, save_path + "_g.pth")
torch.save(state_dict_d, save_path + "_d.pth")

net.train()
torch.cuda.empty_cache()