In [52]:
# Import dependencies

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torchvision import models
from PIL import Image
from torchvision.utils import save_image
from tqdm import tqdm

In [53]:
# Define model

class VGG(nn.Module):
    def __init__(self):
        super().__init__()

        self.chosen_features = [0, 5, 10, 19, 28]
        self.model = models.vgg19(pretrained=True).features[:29]

    def forward(self, x):
        features = []

        for layer_num, layer in enumerate(self.model):
            x = layer(x)
            if layer_num in self.chosen_features:
                features.append(x)

        return features

In [54]:
# Initialize model

def load_image(image_name):
    img = Image.open(image_name)
    img = loader(img).unsqueeze(0).to(device)
    return img

img_size = 356
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

loader = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

original_img = load_image(r"C:\Users\yniti\Downloads\myphotos\20241025_164745.jpg")
style_img = load_image(r"C:\Users\yniti\Downloads\myphotos\images.jpg")
generated_img = original_img.clone().requires_grad_(True)
model = VGG().to(device).eval()



In [55]:
# training

total_steps = 10000
lr = 0.001
alpha = 1
beta = 0.01
optimizer = torch.optim.Adam([generated_img], lr=lr)
i=1

for step in tqdm(range(total_steps)):

    original_features = model(original_img)
    style_features = model(style_img)
    generated_features = model(generated_img)

    style_loss = original_loss = 0

    for original_feature, style_feature, generated_feature in zip(original_features, style_features, generated_features):
        
        batch_size, img_channels, height, width = generated_feature.shape

        original_loss += torch.mean((generated_feature - original_feature)**2)

        # compute the Gram Matrix of generated image
        G = generated_feature.view(img_channels, height*width).mm(
            generated_feature.view(img_channels, height*width).t()
        )

        # compute the Gram Matrix for style image

        S = style_feature.view(img_channels, height*width).mm(
            style_feature.view(img_channels, height*width).t()
        )

        style_loss += torch.mean((G-S)**2)

    total_loss = original_loss * alpha + style_loss*beta
        
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    if (step+1) % 2000 == 0:
        print(total_loss)
        save_image(generated_img, f'./NST/Run3/generated_{i}.png')
        i+=1

 20%|██        | 2003/10000 [02:54<11:04, 12.03it/s]

tensor(6340.5425, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 4003/10000 [05:45<08:52, 11.25it/s]

tensor(2286.2439, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 6003/10000 [08:38<05:52, 11.33it/s]

tensor(987.9391, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 8003/10000 [11:32<02:57, 11.27it/s]

tensor(623.9799, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 10000/10000 [14:26<00:00, 11.54it/s]

tensor(513.7217, device='cuda:0', grad_fn=<AddBackward0>)



