In [0]:
from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torchvision.datasets as datas
import torchvision.transforms as tf
import torchvision.utils as tutils
from torch.autograd import Variable
from PIL import Image
import numpy as np
import torchvision.models as models
import torchvision.models.vgg as VGG

In [0]:
class VGG(nn.Module):
    def __init__(self, pooling):
        super(VGG,self).__init__()
        # VGG structures
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.p1 = pooling(kernel_size=2, stride=2)

        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.p2 = pooling(kernel_size=2, stride=2)

        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.p3 = pooling(kernel_size=2, stride=2)

        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.p4 = pooling(kernel_size=2, stride=2)

        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.p5 = pooling(kernel_size=2, stride=2)

    def forward(self, x, out_params = None):
        out = {}
        # Building up the VGG net that's going to be used
        out['re11'] = F.relu(self.conv1_1(x))
        out['re12'] = F.relu(self.conv1_2(out['re11']))
        out['p1'] = self.p1(out['re12'])
        h_relu1_2 = out['re12']
        out['re21'] = F.relu(self.conv2_1(out['p1']))
        out['re22'] = F.relu(self.conv2_2(out['re21']))
        out['p2'] = self.p2(out['re22'])
        h_relu2_2 = out['re22']
        out['re31'] = F.relu(self.conv3_1(out['p2']))
        out['re32'] = F.relu(self.conv3_2(out['re31']))
        out['re33'] = F.relu(self.conv3_3(out['re32']))
        out['re34'] = F.relu(self.conv3_4(out['re33']))
        out['p3'] = self.p3(out['re34'])
        h_relu3_3 = out['re33']
        out['re41'] = F.relu(self.conv4_1(out['p3']))
        out['re42'] = F.relu(self.conv4_2(out['re41']))
        out['re43'] = F.relu(self.conv4_3(out['re42']))
        out['re44'] = F.relu(self.conv4_4(out['re43']))
        h_relu4_3 = out['re43']
        out['p4'] = self.p4(out['re44'])
        out['re51'] = F.relu(self.conv5_1(out['p4']))
        out['re52'] = F.relu(self.conv5_2(out['re51']))
        out['re53'] = F.relu(self.conv5_3(out['re52']))
        out['re54'] = F.relu(self.conv5_4(out['re53']))
        out['p5'] = self.p5(out['re54'])
        if out_params is not None:
             return [out[param] for param in out_params]
        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
        return out



In [0]:
class GramMatrix(nn.Module):
    def forward(self, input):
        b, c, h, w = input.size()
        f = input.view(b, c, h*w) #bxcx(hxw)
        # torch.bmm(batch1, batch2, out=None)
        # batch1 : bxmxp, batch2 : bxpxn -> bxmxn
        G = torch.bmm(f, f.transpose(1, 2)) # f: bxcx(hxw), f.transpose: bx(hxw)xc -> bxcxc
        return G.div(h*w)

class styleLoss(nn.Module):
    def forward(self, input, target):
        GramInput = GramMatrix()(input)
        return nn.MSELoss()(GramInput, target)

In [0]:
#Dataset Processing
class Normalization(nn.Module):
    def __init__(self, mean, std):
        super(Normalization, self).__init__()
        # .view the mean and std to make them [C x 1 x 1] so that they can
        # directly work with image Tensor of shape [B x C x H x W].
        # B is batch size. C is number of channels. H is height and W is width.
        self.mean = torch.tensor(mean).view(-1, 1, 1)
        self.std = torch.tensor(std).view(-1, 1, 1)

    def forward(self, img):
        # normalize img
        return (img - self.mean) / self.std

transform = tf.Compose([
    tf.Resize(512), #Default image_size
    #tf.Grayscale(num_output_channels=1),
    tf.ToTensor(), #Transform it to a torch tensor
    tf.Lambda(lambda x:x[torch.LongTensor([2, 1, 0])]), #Converting from RGB to BGR
    tf.Normalize(mean=[0.333, 0.333, 0.333], std=[0.5,0.5,0.5]), #subracting imagenet mean
    tf.Lambda(lambda x: x.mul_(255))
    ])

def load_img(path):
    img = Image.open(path)
    img = Variable(transform(img))
    img = img.unsqueeze(0)
    return img

def save_img(img):
    post = tf.Compose([
         tf.Lambda(lambda x: x.mul_(1./255)),
         tf.Normalize(mean=[-0.333, -0.333, -0.333], std=[1,1,1]),
         tf.Lambda(lambda x: x[torch.LongTensor([2,1,0])]), #turn to RGB
         ])
    img = post(img)
    img = img.clamp_(0,1)
    tutils.save_image(img,
                '%s/transfer_LBFGS_30_5cl_2.png' % ("./images"),
                normalize=True)
    return


In [0]:
def run_style_transfer(pooling, vgg_directory,
                       content_image, style_image, num_steps=20,
                       style_weight=1, content_weight=9):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    vgg = VGG(pooling)
    #print(vgg.state_dict())
    vgg.load_state_dict(torch.load(vgg_directory))
    for param in vgg.parameters():
        param.requires_grad = False
    vgg.to(device)

    styleImage = load_img(style_image)
    contentImage = load_img(content_image)
    styleImage = styleImage.to(device)
    contentImage = contentImage.to(device)

    content_layers = ['re12', 're22', 're32', 're42', 're52']
    style_layers = ['re11','re21','re31','re41','re51']
    style_Losses = [styleLoss()] * len(style_layers)
    content_Losses = [nn.MSELoss()] * len(content_layers)

    styleTargets = []
    for t in vgg(styleImage, style_layers):
        t = t.detach()
        styleTargets.append(GramMatrix()(t))

    contentTargets = []
    for t in vgg(contentImage, content_layers):
        t = t.detach()
        contentTargets.append(t)

    losses = style_Losses + content_Losses
    targets = styleTargets + contentTargets
    loss_layers = style_layers + content_layers
    weights = [style_weight] * len(style_layers) + [content_weight] * len(content_layers)


    #setting seed for pytorch
    #torch.cuda.manual_seed_all(random.randint(1, 10000))
    torch.cuda.manual_seed_all(1218)
    if not os.path.exists("images/"):
        os.makedirs("images/")
    #The below flag allows you to enable the cudnn auto-tuner
    #to find the best algorithm for your hardware
    cudnn.benchmark = True


    optimImage = Variable(contentImage.data.clone(), requires_grad=True)
    optimizer = optim.LBFGS([optimImage], lr=1, max_iter=20)
    #optimizer = optim.SGD([optimImg], lr = 0.001, momentum=0.9)

    #Shifting everything to cuda
    for loss in losses:
        loss = loss.to(device)
    optimImage.to(device)

    def closure():
      optimizer.zero_grad()
      out = vgg(optimImage, loss_layers)
      totalLossList = []
      for i in range(len(out)):
          layer_output = out[i]
          loss_i = losses[i]
          target_i = targets[i]
          totalLoss = loss_i(layer_output, target_i) * weights[i]
          totalLossList.append(totalLoss)
      totalLoss = sum(totalLossList)
      totalLoss.backward()
      print('Loss: %f'%(totalLoss.data))
      return totalLoss

    # Training
    for step in range(num_steps):
        print('Epoch: {}/{}'.format(step + 1, num_steps))
        optimizer.step(closure)
    print('Style transfer is completed')
    outImage = optimImage.data[0].to('cpu')
    save_img(outImage.squeeze())

In [45]:
run_style_transfer(pooling = nn.AvgPool2d, 
                   vgg_directory = "/content/drive/My Drive/UPenn_Courses/Fall2019/MachineLearning/Project/Images/vgg_conv.pth",
                   content_image = "/content/drive/My Drive/UPenn_Courses/Fall2019/MachineLearning/Project/Images/paris2.jpg", 
                   style_image = "/content/drive/My Drive/UPenn_Courses/Fall2019/MachineLearning/Project/Images/xingkong.jpg", 
                   num_steps=20, style_weight=1, content_weight=9)

Epoch: 1/20
Loss: 124752519168.000000
Loss: 124752519168.000000
Epoch: 2/20
Loss: 124752519168.000000
Loss: 19108456559584215040.000000
Loss: 124392374272.000000
Loss: 124020826112.000000
Loss: 123641192448.000000
Loss: 123254923264.000000
Loss: 122864164864.000000
Loss: 122470342656.000000
Loss: 122075021312.000000
Loss: 121679446016.000000
Loss: 65378324480.000000
Loss: 32826925056.000000
Loss: 18385330176.000000
Loss: 13689076736.000000
Loss: 7454119424.000000
Loss: 5329922560.000000
Loss: 3958648832.000000
Loss: 3468248064.000000
Loss: 2785124352.000000
Loss: 2219844096.000000
Epoch: 3/20
Loss: 2025869056.000000
Loss: 1672670592.000000
Loss: 1557288320.000000
Loss: 1423690880.000000
Loss: 1243010048.000000
Loss: 1048098560.000000
Loss: 935187584.000000
Loss: 819880320.000000
Loss: 770093376.000000
Loss: 711027968.000000
Loss: 671191872.000000
Loss: 629932800.000000
Loss: 561109696.000000
Loss: 531008256.000000
Loss: 483393056.000000
Loss: 458380800.000000
Loss: 425766912.000000
Los