## Imports

In [1]:
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable

## Inits

In [3]:
use_gpu = torch.cuda.is_available()
nz = 100
ngf = 32
ndf = 64
nc = 3
batch_size = 64
beta1 = 0.5
image_size = 64
lr = 0.0002
epochs = 25
# dataroot = "./svhn"
dataroot = "./cifar10"
workers = 1
out_dir = "./dcgan_model"

## Dataset and data loader

In [4]:
dataset = dset.CIFAR10(
    root=dataroot,
    download=True,
    transform=transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar10/cifar-10-python.tar.gz


In [5]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

## Model

### Helpers

In [6]:
# custom weights initialization called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

### Generator

In [7]:
class _netG(nn.Module):
    def __init__(self):
        super(_netG, self).__init__()
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(     nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, inputs):
        if isinstance(inputs.data, torch.cuda.FloatTensor) and use_gpu:
            output = nn.parallel.data_parallel(self.main, inputs, range(1))
        else:
            output = self.main(inputs)
        return output

In [8]:
netG = _netG()
netG.apply(weights_init)
print(netG)

_netG(
  (main): Sequential(
    (0): ConvTranspose2d (100, 256, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
    (3): ConvTranspose2d (256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU(inplace)
    (6): ConvTranspose2d (128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (8): ReLU(inplace)
    (9): ConvTranspose2d (64, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (11): ReLU(inplace)
    (12): ConvTranspose2d (32, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)


### Discriminator

In [9]:
class _netD(nn.Module):
    def __init__(self):
        super(_netD, self).__init__()
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, inputs):
        if isinstance(inputs.data, torch.cuda.FloatTensor) and use_gpu:
            output = nn.parallel.data_parallel(self.main, inputs, range(1))
        else:
            output = self.main(inputs)

        return output.view(-1, 1).squeeze(1)


In [10]:
netD = _netD()
netD.apply(weights_init)
print(netD)

_netD(
  (main): Sequential(
    (0): Conv2d (3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(0.2, inplace)
    (2): Conv2d (64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (4): LeakyReLU(0.2, inplace)
    (5): Conv2d (128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (7): LeakyReLU(0.2, inplace)
    (8): Conv2d (256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
    (10): LeakyReLU(0.2, inplace)
    (11): Conv2d (512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid()
  )
)


### Criterion, labels, inputs, etc.

In [11]:
criterion = nn.BCELoss()

inputs = torch.FloatTensor(batch_size, 3, image_size, image_size)
noise = torch.FloatTensor(batch_size, nz, 1, 1)
fixed_noise = torch.FloatTensor(batch_size, nz, 1, 1).normal_(0, 1)
label = torch.FloatTensor(batch_size)
real_label = 1
fake_label = 0

if use_gpu:
    netD.cuda()
    netG.cuda()
    criterion.cuda()
    inputs, label = inputs.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

fixed_noise = Variable(fixed_noise)

# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))


## Training

In [12]:
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
    
for epoch in range(epochs):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        # train with real
        netD.zero_grad()
        real_cpu, _ = data
        batch_size = real_cpu.size(0)
        if use_gpu:
            real_cpu = real_cpu.cuda()
        inputs.resize_as_(real_cpu).copy_(real_cpu)
        label.resize_(batch_size).fill_(real_label)
        inputv = Variable(inputs)
        labelv = Variable(label)

        output = netD(inputv)
        errD_real = criterion(output, labelv)
        errD_real.backward()
        D_x = output.data.mean()

        # train with fake
        noise.resize_(batch_size, nz, 1, 1).normal_(0, 1)
        noisev = Variable(noise)
        fake = netG(noisev)
        labelv = Variable(label.fill_(fake_label))
        output = netD(fake.detach())
        errD_fake = criterion(output, labelv)
        errD_fake.backward()
        D_G_z1 = output.data.mean()
        errD = errD_real + errD_fake
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        labelv = Variable(label.fill_(real_label))  # fake labels are real for generator cost
        output = netD(fake)
        errG = criterion(output, labelv)
        errG.backward()
        D_G_z2 = output.data.mean()
        optimizerG.step()

        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
              % (epoch, epochs, i, len(dataloader),
                 errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2))
        if i % 100 == 0:
            vutils.save_image(real_cpu,
                    '%s/real_samples.png' % out_dir,
                    normalize=True)
            fake = netG(fixed_noise)
            vutils.save_image(fake.data,
                    '%s/fake_samples_epoch_%03d.png' % (out_dir, epoch),
                    normalize=True)

    # do checkpointing
    torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (out_dir, epoch))
    torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (out_dir, epoch))

[0/25][0/782] Loss_D: 1.9807 Loss_G: 6.7601 D(x): 0.6410 D(G(z)): 0.7007 / 0.0017
[0/25][1/782] Loss_D: 1.0047 Loss_G: 4.3817 D(x): 0.4793 D(G(z)): 0.0653 / 0.0198
[0/25][2/782] Loss_D: 0.4207 Loss_G: 4.1543 D(x): 0.8543 D(G(z)): 0.1974 / 0.0191
[0/25][3/782] Loss_D: 0.5109 Loss_G: 5.1706 D(x): 0.8762 D(G(z)): 0.2661 / 0.0082
[0/25][4/782] Loss_D: 0.3649 Loss_G: 5.9571 D(x): 0.8978 D(G(z)): 0.1970 / 0.0034
[0/25][5/782] Loss_D: 0.4023 Loss_G: 5.0804 D(x): 0.8228 D(G(z)): 0.0954 / 0.0086
[0/25][6/782] Loss_D: 0.4272 Loss_G: 6.4189 D(x): 0.9355 D(G(z)): 0.2682 / 0.0023
[0/25][7/782] Loss_D: 0.3424 Loss_G: 6.1785 D(x): 0.8636 D(G(z)): 0.1243 / 0.0029
[0/25][8/782] Loss_D: 0.3768 Loss_G: 6.0307 D(x): 0.8447 D(G(z)): 0.1485 / 0.0032
[0/25][9/782] Loss_D: 0.3379 Loss_G: 6.7913 D(x): 0.8889 D(G(z)): 0.1788 / 0.0014
[0/25][10/782] Loss_D: 0.2853 Loss_G: 6.6505 D(x): 0.8863 D(G(z)): 0.1096 / 0.0018
[0/25][11/782] Loss_D: 0.3104 Loss_G: 7.3809 D(x): 0.9113 D(G(z)): 0.1512 / 0.0009
[0/25][12/782]

[0/25][99/782] Loss_D: 0.0001 Loss_G: 27.6306 D(x): 0.9999 D(G(z)): 0.0000 / 0.0000
[0/25][100/782] Loss_D: 0.0003 Loss_G: 27.6309 D(x): 0.9997 D(G(z)): 0.0000 / 0.0000
[0/25][101/782] Loss_D: 0.0003 Loss_G: 27.6309 D(x): 0.9997 D(G(z)): 0.0000 / 0.0000
[0/25][102/782] Loss_D: 0.0003 Loss_G: 27.6309 D(x): 0.9997 D(G(z)): 0.0000 / 0.0000
[0/25][103/782] Loss_D: 0.0003 Loss_G: 27.6309 D(x): 0.9997 D(G(z)): 0.0000 / 0.0000
[0/25][104/782] Loss_D: 0.0002 Loss_G: 27.6309 D(x): 0.9998 D(G(z)): 0.0000 / 0.0000
[0/25][105/782] Loss_D: 0.0002 Loss_G: 27.6309 D(x): 0.9998 D(G(z)): 0.0000 / 0.0000
[0/25][106/782] Loss_D: 0.0002 Loss_G: 27.6309 D(x): 0.9998 D(G(z)): 0.0000 / 0.0000
[0/25][107/782] Loss_D: 0.0002 Loss_G: 27.6309 D(x): 0.9998 D(G(z)): 0.0000 / 0.0000
[0/25][108/782] Loss_D: 0.0003 Loss_G: 27.6309 D(x): 0.9997 D(G(z)): 0.0000 / 0.0000
[0/25][109/782] Loss_D: 0.0001 Loss_G: 27.6309 D(x): 0.9999 D(G(z)): 0.0000 / 0.0000
[0/25][110/782] Loss_D: 0.0001 Loss_G: 27.6309 D(x): 0.9999 D(G(z)

Process Process-1:
Traceback (most recent call last):
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
    r = index_queue.get()
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/Users/d068545/anaconda3/envs/aind2/lib/python3.5/multiprocessing/connection.

KeyboardInterrupt: 