In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision as tv
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim

%matplotlib inline
from matplotlib import pyplot as plt

from DARTS_model import *
from models import *

In [2]:
# Parameters
image_size = 32
label_dim = 10
G_in_dim = 100
G_out_dim = 3
D_in_dim = 3
D_out_dim = 1
num_channels = [512, 256, 128]

learning_rate = 0.0002
betas = (0.5, 0.999)
batch_size = 16
num_epochs = 150
save_dir = '/model'

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = tv.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [len(trainset)//2, len(trainset)//2])

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

testset = tv.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [4]:
onehot = torch.eye(label_dim, device = torch.device('cuda')).view(label_dim, label_dim, 1, 1)
fill = torch.zeros([label_dim, label_dim, image_size, image_size], device = torch.device('cuda'))
for i in range(label_dim):
    fill[i, i, :, :] = 1

    Found GPU0 GeForce GT 750M which is of cuda capability 3.0.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability that we support is 3.5.
    


In [5]:
G = Generator(G_in_dim, label_dim, G_out_dim, num_channels)
D = Discriminator(16, 10, 9)
clf =  resnet_transfer()

if torch.cuda.is_available():
    G, D, clf = G.cuda(), D.cuda(), clf.cuda()
    
optim_G = optim.Adam(G.parameters(), lr = learning_rate, betas = betas)
optim_D = optim.RMSprop(D.parameters(), lr = learning_rate/2)
optim_clf = optim.SGD(clf.parameters(), lr = 0.01, momentum = 0.9)
optim_arch = optim.Adam(D.arch_parameters(), lr = 0.01)

In [6]:
G.train()
D.train()
clf.train()

epoch = 0

while epoch < num_epochs:
    #print(epoch+1)
    G_running_loss = 0.0
    D_running_real_loss = 0.0
    D_running_fake_loss = 0.0
    
    for i, ((images, labels), (val_images, val_labels)) in enumerate(zip(trainloader, valloader)):
        print(epoch, i)
        mini_batch = images.size()[0]
        x_ = images.cuda(non_blocking = True)
        val_images, val_labels = val_images.cuda(non_blocking = True), val_labels.cuda(non_blocking = True)
        
        y_real_ = torch.ones(mini_batch, device = torch.device('cuda'))
        y_fake_ = torch.zeros(mini_batch, device = torch.device('cuda'))
        c_fill_ = fill[labels]
        
        # Train discriminator
        optim_D.zero_grad()
        D_real_decision = D(x_, c_fill_).squeeze()
        D_real_loss = D.loss(D_real_decision, y_real_)

        z_ = torch.randn(mini_batch, G_in_dim, device = torch.device('cuda')).view(-1, G_in_dim, 1, 1)
        c_ = (torch.rand(mini_batch, 1) * label_dim).type(torch.LongTensor).squeeze()
        c_onehot_ = onehot[c_]
        gen_image = G(z_, c_onehot_)
        
        c_fill_ = fill[c_]
        D_fake_decision = D(gen_image, c_fill_).squeeze()
        D_fake_loss = D.loss(D_fake_decision, y_fake_)
        
        D_loss = D_real_loss + D_fake_loss
        D_running_real_loss += D_real_loss.item()
        D_running_fake_loss += D_fake_loss.item()
        D_loss.backward()
        optim_D.step()
        
        # Train generator
        z_ = torch.randn(mini_batch, G_in_dim, device = torch.device('cuda')).view(-1, G_in_dim, 1, 1)
        c_ = (torch.rand(mini_batch, 1) * label_dim).type(torch.LongTensor).squeeze()
        c_onehot_ = onehot[c_]
        
        optim_G.zero_grad()
        optim_arch.zero_grad()
        gen_image = G(z_, c_onehot_)

        c_fill_ = fill[c_]
        D_fake_decision = D(gen_image, c_fill_).squeeze()
        G_loss = G.loss(D_fake_decision, y_real_)
        G_running_loss += G_loss.item()

        #grad = torch.autograd.grad(G_loss, D.arch_parameters(), create_graph = True)
        G_loss.backward(create_graph = True)
        optim_G.step()
        
        # Train Resnet
        z_ = torch.randn(mini_batch, G_in_dim, device = torch.device('cuda')).view(-1, G_in_dim, 1, 1)
        c_ = (torch.rand(mini_batch, 1) * label_dim).type(torch.LongTensor).squeeze()
        c_onehot_ = onehot[c_]
        
        c_ = c_.cuda(non_blocking = True)
        labels = labels.cuda(non_blocking = True)
        
        gen_image = G(z_, c_onehot_)
        
        optim_clf.zero_grad()
        clf_fake_decision = clf(gen_image)
        clf_fake_loss = clf.loss(clf_fake_decision, c_)      
        clf_real_decision = clf(x_)
        clf_real_loss = clf.loss(clf_real_decision, labels)
        
        clf_loss = clf_fake_loss + clf_real_loss
        clf_loss.backward(create_graph = True)
        optim_clf.step()
        
        # Train architecture
        y = clf(val_images)
        loss = clf.loss(y, val_labels)
        loss.backward()
        optim_arch.step()
        
        
        print(loss)
        print(D.alphas_normal[0])
        for param in G.parameters():
            param.grad = None
        for param in D.parameters():
            param.grad = None
        for param in clf.parameters():
            param.grad = None
        for param in D.arch_parameters():
            param.grad = None

0 0
tensor(2.3221, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0103,  0.0092, -0.0098, -0.0096,  0.0105,  0.0112, -0.0091, -0.0097],
       device='cuda:0', grad_fn=<SelectBackward>)
0 1
tensor(2.5134, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0194,  0.0173, -0.0116, -0.0132,  0.0152,  0.0206, -0.0189, -0.0171],
       device='cuda:0', grad_fn=<SelectBackward>)
0 2
tensor(3.0918, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0286,  0.0262, -0.0135, -0.0185,  0.0174,  0.0209, -0.0163, -0.0213],
       device='cuda:0', grad_fn=<SelectBackward>)
0 3
tensor(2.8659, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0364,  0.0312, -0.0148, -0.0220,  0.0210,  0.0239, -0.0196, -0.0250],
       device='cuda:0', grad_fn=<SelectBackward>)
0 4
tensor(3.2459, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0404,  0.0330, -0.0190, -0.0276,  0.0254,  0.0259, -0.0181, -0.0288],
       device='cuda:0', grad_fn=<SelectBackward>)
0 5
tensor(1.8115, device='cud

tensor(4.1423, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0527,  0.0628, -0.0509, -0.0699,  0.0747, -0.0009, -0.0274, -0.0503],
       device='cuda:0', grad_fn=<SelectBackward>)
0 43
tensor(4.5220, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0528,  0.0634, -0.0504, -0.0694,  0.0753, -0.0020, -0.0286, -0.0504],
       device='cuda:0', grad_fn=<SelectBackward>)
0 44
tensor(5.1960, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0528,  0.0639, -0.0500, -0.0687,  0.0757, -0.0029, -0.0299, -0.0502],
       device='cuda:0', grad_fn=<SelectBackward>)
0 45
tensor(2.6456, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0527,  0.0643, -0.0497, -0.0682,  0.0761, -0.0035, -0.0310, -0.0501],
       device='cuda:0', grad_fn=<SelectBackward>)
0 46
tensor(3.3418, device='cuda:0', grad_fn=<NllLossBackward>)
tensor([-0.0526,  0.0646, -0.0493, -0.0675,  0.0763, -0.0041, -0.0320, -0.0498],
       device='cuda:0', grad_fn=<SelectBackward>)
0 47
tensor(6.0009, device='cu

Traceback (most recent call last):
  File "/home/qzf/anaconda3/envs/torch/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/qzf/anaconda3/envs/torch/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/qzf/anaconda3/envs/torch/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/qzf/anaconda3/envs/torch/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 