<a href="https://colab.research.google.com/github/vyragosa/Deep-Learning-with-Pytorch/blob/main/Lesson3/VanGoghStyleTransfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Модель

In [1]:
!pip install gradio
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt


import torchvision.transforms as transforms
import torchvision.models as models

from tqdm.auto import tqdm

import copy

import gradio as gr

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

Будем использовать алгоритм, предложенный Леоном Гатисом.

Основаная идея:

Взять исходное изображение и рассматривать его пиксели, как настраиваемые параметры в алгоритме градиентного спуска. Критерий качества должен быть выбран так, чтобы он уменьшался по мере приближения исходного изображения к желаемому стилизованному.

В качестве оценки качеаства используются 2 критерия:
- степень соответствия преобразованного изображения исходному;
- степень стилизации преобразованного изображения. 

В решении используется сверточная нейросеть VGG19, обученая на базе изображений ImageNet.

Вот как выглядит VGG19 внутри

![VGG Layers](https://www.researchgate.net/profile/Clifford_Yang/publication/325137356/figure/fig2/AS:670371271413777@1536840374533/llustration-of-the-network-architecture-of-VGG-19-model-conv-means-convolution-FC-means.jpg)

In [3]:
class ContentLoss(nn.Module):
    def __init__(self, target, ):
        super(ContentLoss, self).__init__()
        self.target = target.detach()

    def forward(self, input):
        self.loss = F.mse_loss(input, self.target)
        return input


class StyleLoss(nn.Module):
    def __init__(self, target_feature):
        super(StyleLoss, self).__init__()
        self.target = StyleLoss.gram_matrix(target_feature).detach()

    def forward(self, input):
        self.loss = F.mse_loss(StyleLoss.gram_matrix(input), self.target)
        return input

    @staticmethod
    def gram_matrix(input):
        a, b, c, d = input.size()
        features = input.view(a * b, c * d)
        return torch.mm(features, features.t()).div(a * b * c * d)


class Normalization(nn.Module):
    def __init__(self, mean, std):
        super(Normalization, self).__init__()
        self.mean = torch.tensor(mean).view(-1, 1, 1)
        self.std = torch.tensor(std).view(-1, 1, 1)

    def forward(self, img):
        return (img - self.mean) / self.std


In [4]:
loader = transforms.Compose([transforms.Resize(512), transforms.ToTensor()])
unloader = transforms.ToPILImage()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def open_image(url):
    """
    With requests, receives and returns an image
    :param url: the address of the image to upload should, if possible, start with http
    :return: PIL type image
    """
    print(type(url))
    if isinstance(url, str) and url.startswith("http"):
        response = requests.get(url)
        return Image.open(BytesIO(response.content))
    return Image.open(url)


def save_image(tensor, filename):
    """
    Saves the image to the file specified in the parameter
    :param tensor: Image in the form of a tensor
    :param filename: The name of the file to save the image
    :return: None
    """
    image = tensor.cpu().clone()
    image = image.squeeze(0)
    image = unloader(image)
    image.save(filename)


def image_loader(image_name, im_size):
    """
    Converts the image to RGB format and leads to the size specified in the parameters
    :param im_size: Image size tuple (width, height)
    :param image_name: Name of the image to convert
    :return: A picture in the form of a tensor with Float data
    """
    image = loader(image_name.resize(im_size).convert('RGB')).unsqueeze(0)
    return image.to(device, torch.float)


def image_unloader(image_name):
    """
    Converts the image to RGB format and leads to the size specified in the parameters
    :param image_name: Name of the image to convert
    :return: A picture in the form of a tensor with Float data
    """
    image_name = image_name.squeeze(0)
    image_name = unloader(image_name)
    return image_name


def get_style_model_and_losses(cnn, normalization_mean, normalization_std, style_img, content_img,
                               content_layers='conv_5',
                               style_layers=('conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5')):
    """

    :param cnn:
    :param normalization_mean:
    :param normalization_std:
    :param style_img:
    :param content_img:
    :param content_layers:
    :param style_layers:
    :return:
    """
    normalization = Normalization(normalization_mean, normalization_std).to(device)

    content_losses = []
    style_losses = []

    model = nn.Sequential(normalization)

    i = 0
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = 'conv_{}'.format(i)
        elif isinstance(layer, nn.ReLU):
            name = 'relu_{}'.format(i)

            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = 'pool_{}'.format(i)
        elif isinstance(layer, nn.BatchNorm2d):
            name = 'bn_{}'.format(i)
        else:
            raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))

        model.add_module(name, layer)

        if name in content_layers:
            target = model(content_img).detach()
            content_loss = ContentLoss(target)
            model.add_module("content_loss_{}".format(i), content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            target_feature = model(style_img).detach()
            style_loss = StyleLoss(target_feature)
            model.add_module("style_loss_{}".format(i), style_loss)
            style_losses.append(style_loss)

    for i in range(len(model) - 1, -1, -1):
        if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
            break

    model = model[:(i + 1)]

    return model, style_losses, content_losses


def get_input_optimizer(input_img):
    optimizer = optim.LBFGS([input_img])
    return optimizer


def run_style_transfer(cnn, content_img, style_img, input_img,
                       normalization_mean=torch.tensor([0.485, 0.456, 0.406]),
                       normalization_std=torch.tensor([0.229, 0.224, 0.225]), num_steps=300,
                       style_weight=1000000, content_weight=1):
    """

    :param cnn:
    :param content_img:
    :param style_img:
    :param input_img:
    :param size:
    :param normalization_mean:
    :param normalization_std:
    :param num_steps:
    :param style_weight:
    :param content_weight:
    :return:
    """
    print('Building the style transfer model..')
    model, style_losses, content_losses = get_style_model_and_losses(cnn, normalization_mean.to(device),
                                                                     normalization_std.to(device), style_img,
                                                                     content_img)
    input_img.requires_grad_(True)
    model.requires_grad_(False)

    optimizer = get_input_optimizer(input_img)

    i = [0]
    progress_bar = tqdm(total=num_steps)
    while i[0] <= num_steps:
        def closure():
            with torch.no_grad():
                input_img.clamp_(0, 1)
            optimizer.zero_grad()
            model(input_img)
            style_score = sum(sl.loss for sl in style_losses) * style_weight
            content_score = sum(cl.loss for cl in content_losses) * content_weight
            loss = style_score + content_score
            loss.backward()
            progress_bar.update(1)
            i[0] += 1
            return style_score + content_score
        optimizer.step(closure)
    with torch.no_grad():
        input_img.clamp_(0, 1)
    return input_img


def interfence(style_img, content_img, size, numb_step):
    """

    :param numb_step:number of steps in training
    :param size: size of image
    :param style_img:
    :param content_img:
    :return:
    """
    size = int(size)
    style = image_loader(style_img, (size, size))
    content = image_loader(content_img, (size, size))
    image = content.clone()
    output = run_style_transfer(models.vgg19(pretrained=True).features.to(device).eval(), content, style, image,
                                num_steps=numb_step)

    output = image_unloader(output)
    return output 


In [None]:
castle = "https://raw.githubusercontent.com/vyragosa/Deep-Learning-with-Pytorch/main/Lesson3/content.jpg"


night = "https://raw.githubusercontent.com/vyragosa/Deep-Learning-with-Pytorch/main/Lesson3/style.jpg"


#open image from url and save it. Get string of image path
content = Image.open(requests.get(castle, stream=True).raw).convert('RGB').save('content.jpg')
style= Image.open(requests.get(night, stream=True).raw).convert('RGB').save('style.jpg')

ex = ["style.jpg", "content.jpg"]

app = gr.Interface(interfence, inputs=[gr.inputs.Image(type="pil", label="Style Image"),
                                       gr.inputs.Image(type="pil", label="Content Image"),
                                       gr.Radio(["128", "256", "512", "1024"], label="Size"),
                                       gr.Slider(50, 500, step=50, label="Steps")],
                   outputs=gr.outputs.Image(type="pil"), title="Style Transfer",
                   description="Upload your style and content image and get the output image",
                   examples=[ex])
app.launch(share=True, debug=True)





Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://58e91fb5-b3e9-406f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces




Building the style transfer model..


  self.mean = torch.tensor(mean).view(-1, 1, 1)
  self.std = torch.tensor(std).view(-1, 1, 1)


  0%|          | 0/300 [00:00<?, ?it/s]