In [1]:
import torch
from torch.optim.optimizer import Optimizer, required

from torch.autograd import Variable
import torch.nn.functional as F
from torch import nn
from torch import Tensor
from torch.nn import Parameter

def l2normalize(v, eps=1e-12):
    return v / (v.norm() + eps)


class SpectralNorm(nn.Module):
    def __init__(self, module, name='weight', power_iterations=1):
        super(SpectralNorm, self).__init__()
        self.module = module
        self.name = name
        self.power_iterations = power_iterations
        if not self._made_params():
            self._make_params()

    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))

    def _made_params(self):
        try:
            u = getattr(self.module, self.name + "_u")
            v = getattr(self.module, self.name + "_v")
            w = getattr(self.module, self.name + "_bar")
            return True
        except AttributeError:
            return False


    def _make_params(self):
        w = getattr(self.module, self.name)

        height = w.data.shape[0]
        width = w.view(height, -1).data.shape[1]

        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
        u.data = l2normalize(u.data)
        v.data = l2normalize(v.data)
        w_bar = Parameter(w.data)

        del self.module._parameters[self.name]

        self.module.register_parameter(self.name + "_u", u)
        self.module.register_parameter(self.name + "_v", v)
        self.module.register_parameter(self.name + "_bar", w_bar)


    def forward(self, *args):
        self._update_u_v()
        return self.module.forward(*args)

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

class Self_Attn(nn.Module):
    """ Self attention Layer"""
    def __init__(self, in_dim):
        super().__init__()
        
        # Construct the module
        self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//2 , kernel_size= 1)
        self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//2 , kernel_size= 1)
        self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
        
        self.gamma = nn.Parameter(torch.zeros(1))
        self.softmax  = nn.Softmax(dim=-1)
        
    def forward(self,x):
        """
            inputs :
                x : input feature maps( B * C * W * H)
            returns :
                out : self attention value + input feature 
                attention: B * N * N (N is Width*Height)
        """
        m_batchsize,C,width ,height = x.size()
        
        proj_query  = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0,2,1) # B * N * C
        proj_key =  self.key_conv(x).view(m_batchsize, -1, width*height) # B * C * N
        energy =  torch.bmm(proj_query, proj_key) # batch matrix-matrix product
        
        attention = self.softmax(energy) # B * N * N
        proj_value = self.value_conv(x).view(m_batchsize, -1, width*height) # B * C * N
        out = torch.bmm(proj_value, attention.permute(0,2,1)) # batch matrix-matrix product
        out = out.view(m_batchsize,C,width,height) # B * C * W * H
        
        out = self.gamma*out + x
        return out, attention

class Generator(nn.Module):
    """
    Generator
    input: 
        z: latent matrix with shape of (batch_size, 100)
    output: 
        out: generated image with shape (batch_size, 1, 64, 64)
        p1: attention matrix generated by attn layer
    """
    def __init__(self, batch_size=64, attn=True, image_size=64, z_dim=100, conv_dim=64):
        super().__init__()
        self.attn = attn
        
        # Layer 1 turn 100 dims -> 512 dims, size 1 -> 4
        layer1 = []
        layer1.append(SpectralNorm(nn.ConvTranspose2d(in_channels = z_dim, out_channels = conv_dim*8, kernel_size = 4)))
        layer1.append(nn.BatchNorm2d(conv_dim*8))
        layer1.append(nn.ReLU())
        self.l1 = nn.Sequential(*layer1)
        
        # Layer 2 turn 512 dims -> 256 dims, size 4 -> 8
        layer2 = []
        layer2.append(SpectralNorm(nn.ConvTranspose2d(in_channels = conv_dim*8, out_channels = conv_dim*4, 
                                                      kernel_size = 4, stride = 2, padding = 1)))
        layer2.append(nn.BatchNorm2d(conv_dim*4))
        layer2.append(nn.ReLU())
        self.l2 = nn.Sequential(*layer2)
        
        # Layer 3 turn 256 dims -> 128 dims, size 8 -> 16
        layer3 = []
        layer3.append(SpectralNorm(nn.ConvTranspose2d(in_channels = conv_dim*4, out_channels = conv_dim*2, 
                                                      kernel_size = 4, stride = 2, padding = 1)))
        layer3.append(nn.BatchNorm2d(conv_dim*2))
        layer3.append(nn.ReLU())
        self.l3 = nn.Sequential(*layer3)

        # Attn1 layer turn 128 dims -> 128 dims
        self.attn1 = Self_Attn(conv_dim*2)
        
        # Layer 4 turn 128 dims -> 64 dims, size 16 -> 32
        layer4 = []
        layer4.append(SpectralNorm(nn.ConvTranspose2d(in_channels = conv_dim*2, out_channels = conv_dim, 
                                                      kernel_size = 4, stride = 2, padding = 1)))
        layer4.append(nn.BatchNorm2d(conv_dim))
        layer4.append(nn.ReLU())
        self.l4 = nn.Sequential(*layer4)
        
        # Attn2 layer turn 64 dims -> 64 dims
        self.attn2 = Self_Attn(conv_dim)
        
        # Layer 5 turn 64 dims -> 3 dims, size 32 -> 64
        layer5 = []
        layer5.append(nn.ConvTranspose2d(conv_dim, 3, 4, 2, 1))
        layer5.append(nn.Tanh())
        self.l5 = nn.Sequential(*layer5)
        

    def forward(self, z):
        # z is the input random matrix for generator
        z = z.view(z.size(0), z.size(1), 1, 1)
        out=self.l1(z)
        out=self.l2(out)
        out=self.l3(out)
        if self.attn == True:
            out,_ = self.attn1(out)
        out=self.l4(out)
        if self.attn == True:
            out,_ = self.attn2(out)
        out=self.l5(out)

        return out


class Discriminator(nn.Module):
    """
    Discriminator
    input:
        x: one batch of data with shape of (batch_size, 1, 64, 64)
    output: 
        out.squeeze: a batch of scalars indicating the predict results
        p1: attention matrix generated by attn layer
    """
    def __init__(self, batch_size=64, attn=True, image_size=64, conv_dim=64):
        super().__init__()
        self.attn = attn
        
        # Layer 1 turn 3 dims -> 64 dims, size 64 -> 32
        layer1 = []
        layer1.append(SpectralNorm(nn.Conv2d(3, conv_dim, 4, 2, 1)))
        layer1.append(nn.LeakyReLU(0.1))
        curr_dim = conv_dim
        self.l1 = nn.Sequential(*layer1)
        
        # Layer 2 turn 64 dims -> 128 dims, size 32 -> 16
        layer2 = []
        layer2.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer2.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2
        self.l2 = nn.Sequential(*layer2)
        
        # Layer 3 turn 128 dims -> 256 dims, size 16 -> 8
        layer3 = []
        layer3.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer3.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2
        self.l3 = nn.Sequential(*layer3)
        
        # Attn1 layer remains the same dim and size
        self.attn1 = Self_Attn(curr_dim)
        
        # Layer 4 turn 256 dims -> 512 dims, size 8 -> 4
        layer4 = []
        layer4.append(SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)))
        layer4.append(nn.LeakyReLU(0.1))
        curr_dim = curr_dim * 2
        self.l4 = nn.Sequential(*layer4)
        
        # Attn2 layer remains the same dim and size
        self.attn2 = Self_Attn(curr_dim)
        
        # Layer 5 turn 512 dims -> 1 dims, size 4 -> 1
        layer5 = []
        layer5.append(nn.Conv2d(curr_dim, 1, 4, 1, 0))
        self.l5 = nn.Sequential(*layer5)

    def forward(self, x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        if self.attn == True:
            out,_ = self.attn1(out)
        out = self.l4(out)
        if self.attn == True:
            out,_ = self.attn2(out)
        out = self.l5(out)

        return out.squeeze()

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import ImageFolder
from torchvision.datasets import CIFAR10
from IPython.display import clear_output
import datetime
import time
import os

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
batch_size = 64

# Utility functions
def cuda(data):
    if torch.cuda.is_available():
        return data.cuda()
    else:
        return data

def denorm(x):
    out = (x + 1) / 2
    return out.clamp_(0, 1)

transform = transforms.Compose([transforms.Resize(64),
                                transforms.CenterCrop(64),
                                transforms.ToTensor(),
                                transforms.Normalize([0.5]*3,[0.5]*3)])

train_data = ImageFolder('/content/drive/My Drive/Colab Notebooks2/SAGAN/gifs_celeba4000', transform=transform)

dataloader = DataLoader(train_data, shuffle=True, batch_size=batch_size, num_workers=4)

# Fix a random latent input for samples
fixed_z = cuda(torch.randn(64, 100))

  cpuset_checked))


In [1]:
def train(steps = 100000, batch_size = 64, z_dim = 100, attn = True):
    # Initialize model
    G = cuda(Generator(batch_size, attn))
    D = cuda(Discriminator(batch_size, attn))
    
    # Make directory for samples and models
    cwd = os.getcwd()
    post='_attn' if attn else ''
    if not os.path.exists(cwd+'/samples_celeba'+post):
        os.makedirs(cwd+'/samples_celeba'+post)

    # Initialize optimizer with filter, lr and coefficients
    g_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, G.parameters()), 0.0001, [0.0,0.9])
    d_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, D.parameters()), 0.0004, [0.0,0.9])
    
    # Load data
    Iter = iter(dataloader)
    
    # Start timer
    start_time = time.time()
    
    for step in range(steps):
        # ================== Train D ================== #
        D.train(); G.train()
        try:
            real_images,_ = next(Iter)
        except:
            Iter = iter(dataloader)
            real_images,_ = next(Iter)
        
        # Compute loss with real images
        d_out_real = D(cuda(real_images))
        d_loss_real = torch.nn.ReLU()(1.0 - d_out_real).mean()
        
        # Compute loss with fake images
        z = cuda(torch.randn(batch_size, z_dim))
        fake_images = G(z)
        d_out_fake = D(fake_images)
        d_loss_fake = torch.nn.ReLU()(1.0 + d_out_fake).mean()
        
        # Backward + Optimize
        d_loss = d_loss_real + d_loss_fake
        d_optimizer.zero_grad(); g_optimizer.zero_grad()
        d_loss.backward()
        d_optimizer.step()
        
        # ================== Train G ================== #
        # Create random noise
        z = cuda(torch.randn(batch_size, z_dim))
        fake_images = G(z)
        g_out_fake = D(fake_images)
        g_loss_fake = - g_out_fake.mean()
        d_optimizer.zero_grad(); g_optimizer.zero_grad()
        g_loss_fake.backward()
        g_optimizer.step()
        
        # Print out log info
        if (step + 1) % 10 == 0:
            elapsed = time.time() - start_time
            expect = elapsed/(step + 1)*(steps-step-1)
            elapsed = str(datetime.timedelta(seconds=elapsed))
            expect = str(datetime.timedelta(seconds=expect))
            clear_output(wait=True)
            print("Elapsed [{}], Expect [{}], step [{}/{}], D_real_loss: {:.4f}, "
                  " ave_generator_gamma1: {:.4f}, ave_generator_gamma2: {:.4f}".
                  format(elapsed,expect,step + 1,steps,d_loss_real.item(),
                         G.attn1.gamma.mean().item(),
                         G.attn2.gamma.mean().item()))
        
        # Sample images
        if (step + 1) % (200) == 0:
            fake_images= G(fixed_z)
            save_image(denorm(fake_images), os.path.join('/content/drive/My Drive/Colab Notebooks2/SAGAN/samples_celeba4000', '{}_fake.png'.format(step + 1)))
        
        # Save models
        #if (step+1) % (100) == 0:
            #torch.save(G.state_dict(),os.path.join('./models', '{}_G.pth'.format(step + 1)))
            #torch.save(D.state_dict(),os.path.join('./models', '{}_D.pth'.format(step + 1)))

In [None]:
train(steps = 20000, attn = True)
print('Done training part 1')
#train(steps = 20000, attn = False)
#print('Done training part 2')

Elapsed [1:38:55.119023], Expect [0:01:33.442707], step [19690/20000], D_real_loss: 0.0317,  ave_generator_gamma1: 0.1627, ave_generator_gamma2: -0.1456


### Generate gif files

In [4]:
from PIL import Image, ImageDraw, ImageFont

font = ImageFont.truetype("/content/drive/My Drive/Colab Notebooks2/SAGAN/arial.ttf", 18)
def create_image_with_text(img, wh, text):
    width, height = wh
    draw = ImageDraw.Draw(img)
    draw.text((width, height), text, font = font, fill="white")
    return img

frames = []

for i in range(200, 19601, 200):
    #img = Image.open('/content/drive/My Drive/Colab Notebooks/SAGAN/samples_celeba/{}_fake.png'.format(str(i)))
    img1 = Image.open('/content/drive/My Drive/Colab Notebooks2/SAGAN/samples_celeba4000/{}_fake.png'.format(str(i)))
    width, height = img1.size
    expand = Image.new(img1.mode, (width, height), "black")
    expand.paste(img1, (0, 0))
    #expand.paste(img1, (width + 10, 0))
    epoch = round(i*64/202600,2)
    #new_frame = create_image_with_text(expand,(10,546), "After "+str(epoch)+" epoches")
    #new_frame = create_image_with_text(new_frame,(10,526), "Without Attention")
    #new_frame = create_image_with_text(new_frame,(width + 20,526), "With Attention")
    frames.append(expand)
    
frames[0].save('/content/drive/My Drive/Colab Notebooks2/SAGAN/gifs_celeba4000/sagan_celebA4000.gif', format='GIF',
               append_images=frames[1:],
               save_all=True,
               duration=80, loop=0)