**This notebook calculates the Frechet Inception Distance (FID) of 50000 generated images using the official implementation of FID to Pytorch [2].**

References: <br>
[1] Kaggle dataset: https://www.kaggle.com/datasets/yuanhaowang486/chinese-calligraphy-styles-by-calligraphers <br>
[2] Github repository: https://github.com/mseitzer/pytorch-fid

Install the package of the official implementation of FID to Pytorch [2]

In [None]:
! pip install pytorch-fid

Commands for downloading the dataset [1] used for training from kaggle in google colab. Note: file 'kaggle.json' is needed

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download -d yuanhaowang486/chinese-calligraphy-styles-by-calligraphers

In [None]:
! unzip /content/chinese-calligraphy-styles-by-calligraphers.zip

In [None]:
import os
from tqdm.auto import tqdm
import torchvision.transforms as transforms
from torchvision.utils import save_image
from PIL import Image
import shutil

Move both the training and test set of the original data to a seperate folder for calculating FID

In [None]:
if not os.path.exists('dataset_liu_gongquan'):
    os.makedirs('dataset_liu_gongquan')

In [None]:
for name in tqdm(os.listdir(os.path.join('data','data','train','lgq'))):
    source = os.path.join('data','data','train','lgq',name)
    destination = 'dataset_liu_gongquan'
    shutil.copy(source, destination)

  0%|          | 0/5410 [00:00<?, ?it/s]

In [None]:
for name in tqdm(os.listdir(os.path.join('data','data','test','lgq'))):
    source = os.path.join('data','data','test','lgq',name)
    destination = 'dataset_liu_gongquan'
    shutil.copy(source, destination)

  0%|          | 0/1353 [00:00<?, ?it/s]

In [None]:
if not os.path.exists('dataset_mi_fu'):
    os.makedirs('dataset_mi_fu')

In [None]:
for name in tqdm(os.listdir(os.path.join('data','data','train','mf'))):
    source = os.path.join('data','data','train','mf',name)
    destination = 'dataset_mi_fu'
    shutil.copy(source, destination)

  0%|          | 0/5410 [00:00<?, ?it/s]

In [None]:
for name in tqdm(os.listdir(os.path.join('data','data','test','mf'))):
    source = os.path.join('data','data','test','mf',name)
    destination = 'dataset_mi_fu'
    shutil.copy(source, destination)

  0%|          | 0/1353 [00:00<?, ?it/s]

In [None]:
print('Total number of Liu Gongquan style images used for training: {}'.format(len(os.listdir('dataset_liu_gongquan'))))
print('Total number of Mi Fu style images used for training: {}'.format(len(os.listdir('dataset_mi_fu'))))

Total number of Liu Gongquan style images used for training: 6763
Total number of Mi Fu style images used for training: 6763


In [None]:
from math import log2
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torchvision.utils import make_grid
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import shutil

In [None]:
def show_tensor_images(image_tensor, num_images=10, nrow=5, figsize=(12,12)):
    image_clipped = torch.clamp(image_tensor, min=-1.0, max=1.0)
    image_shifted = (image_clipped + 1) / 2
    image_grid = make_grid(image_shifted[:num_images].detach().cpu(), nrow=nrow)
    plt.figure(figsize = figsize)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()

The model <br>
Note: weights for the model are stored in my personal google drive

In [None]:
DATASET                 = '/content/dataset'
DEVICE                  = "cuda"
image_size              = 64
CHANNELS_IMG            = 3
Z_DIM                   = 512
IN_CHANNELS             = 512

In [None]:
class ELConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(ELConv2d, self).__init__()
        self.weight = nn.Parameter(torch.zeros(out_channels, in_channels, kernel_size, kernel_size))
        self.bias = nn.Parameter(torch.zeros(out_channels))
        self.stride = stride
        self.padding = padding
        self.fan_in = in_channels * kernel_size * kernel_size
        self.scale = (2 / self.fan_in) ** (0.5)

        nn.init.normal_(self.weight)
        nn.init.zeros_(self.bias)

    def forward(self, x):
        return F.conv2d(input=x,
                        weight=self.weight * self.scale,
                        bias=self.bias,
                        stride=self.stride,
                        padding=self.padding)

class ELConvTranspose2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=4, stride=1, padding=0):
        super(ELConvTranspose2d, self).__init__()
        self.weight = nn.Parameter(torch.zeros(in_channels, out_channels, kernel_size, kernel_size))
        self.bias = nn.Parameter(torch.zeros(out_channels))
        self.stride = stride
        self.padding = padding
        self.fan_in = in_channels
        self.scale = (2 / self.fan_in) ** (0.5)

        nn.init.normal_(self.weight)
        nn.init.zeros_(self.bias)

    def forward(self, x):
        return F.conv_transpose2d(input=x,
                                  weight=self.weight * self.scale,
                                  bias=self.bias,
                                  stride=self.stride,
                                  padding=self.padding)

class ELLinear(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ELLinear, self).__init__()
        self.weight = nn.Parameter(torch.zeros(out_channels, in_channels))
        self.bias = nn.Parameter(torch.zeros(out_channels))
        self.fan_in = in_channels
        self.scale = (2 / self.fan_in) ** (0.5)

    def forward(self, x):
        return F.linear(input=x,
                        weight=self.weight * self.scale,
                        bias=self.bias)

class PixelNorm(nn.Module):
    def __init__(self):
        super(PixelNorm, self).__init__()
        self.epsilon = 1e-8

    def forward(self, x):
        return x / torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.epsilon)

class MiniBatchSTD(nn.Module):
    def __init__(self):
        super(MiniBatchSTD, self).__init__()
        self.alpha = 1e-8

    def forward(self, x):
        batch_size, _, height, width = x.shape
        y = x - x.mean(dim=0, keepdim=True)
        y = torch.sqrt(y.pow(2).mean(dim=0, keepdim=False) + self.alpha)
        y = y.mean()
        y = y.repeat(batch_size, 1, height, width)
        y = torch.cat([x, y], 1)
        return y

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, use_pixelnorm=True):
        super(ConvBlock, self).__init__()
        self.use_pn = use_pixelnorm
        self.conv1 = ELConv2d(in_channels, out_channels)
        self.conv2 = ELConv2d(out_channels, out_channels)
        self.leaky = nn.LeakyReLU(0.2)
        self.pn = PixelNorm()

    def forward(self, x):
        x = self.leaky(self.conv1(x))
        x = self.pn(x) if self.use_pn else x
        x = self.leaky(self.conv2(x))
        x = self.pn(x) if self.use_pn else x
        return x

In [None]:
class Generator(nn.Module):
    def __init__(self, z_dim, in_channels, img_channels=3):
        super(Generator, self).__init__()

        self.initial_block = nn.Sequential(
            PixelNorm(),
            ELLinear(z_dim, in_channels * 4 * 4),
            nn.Unflatten(1, (in_channels, 4, 4)),
            nn.LeakyReLU(0.2),
            PixelNorm(),
            ELConv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.2),
            PixelNorm(),
        )  # 4

        self.prog_blocks = nn.ModuleList([
            ConvBlock(in_channels, in_channels),              # 8
            ConvBlock(in_channels, in_channels),              # 16
            ConvBlock(in_channels, in_channels),              # 32
            ConvBlock(in_channels, in_channels // 2),         # 64
            # ConvBlock(in_channels // 2, in_channels // 4),    # 128
        ])

        self.to_rgb_layers = nn.ModuleList([
            ELConv2d(in_channels, img_channels, kernel_size=1, stride=1, padding=0),        # 4
            ELConv2d(in_channels, img_channels, kernel_size=1, stride=1, padding=0),        # 8
            ELConv2d(in_channels, img_channels, kernel_size=1, stride=1, padding=0),        # 16
            ELConv2d(in_channels, img_channels, kernel_size=1, stride=1, padding=0),        # 32
            ELConv2d(in_channels // 2, img_channels, kernel_size=1, stride=1, padding=0),   # 64
            # ELConv2d(in_channels // 4, img_channels, kernel_size=1, stride=1, padding=0),   # 128
        ])

    def forward(self, x, alpha, steps):
        assert alpha >= 0 and alpha <= 1, "Value of alpha out of range"

        out = self.initial_block(x)

        if steps == 0:
            return self.to_rgb_layers[0](out)

        for step in range(steps):
            upscaled = F.interpolate(out, scale_factor=2, mode="nearest")
            out = self.prog_blocks[step](upscaled)

        if alpha < 1:
            final_upscaled = self.to_rgb_layers[steps - 1](upscaled)
            final_out = self.to_rgb_layers[steps](out)
            return alpha * final_out + (1 - alpha) * final_upscaled
        else:
            final_out = self.to_rgb_layers[steps](out)
            return final_out

Function for generating a batch of images

In [None]:
def generate_by_batch(gen_ema, alpha, steps, batch_size=100, n=200, name='name'):
    gen_ema.eval()
    for i in tqdm(range(n//batch_size)):
        with torch.no_grad():
            noise = torch.randn(batch_size, Z_DIM).to(DEVICE)
            img = gen_ema(noise, alpha, steps)
            img = torch.clamp(img, min=-1.0, max=1.0)
            if not os.path.exists(name + '_generated'):
                os.makedirs(name + '_generated')
            for j in range(batch_size):
                save_image(img[j,:,:,:]*0.5+0.5, name + "_generated/img_{}_{}.png".format(i,j))

Load weights for the model that generates Liu Gongquan style images

In [None]:
gen_ema = Generator(Z_DIM, IN_CHANNELS, img_channels=CHANNELS_IMG).to(DEVICE)
loaded_state = torch.load('drive/MyDrive/Caligraphy/Liu_Gongquan_style_weights.pt')
gen_ema.load_state_dict(loaded_state["gen_ema"]);

In [None]:
gen_ema.eval();

Generating 50000 Liu Gongquan style images

In [None]:
print('Generating 50000 Liu Gongquan style images for calculateing FID ...')
generate_by_batch(gen_ema, 1, 4, batch_size=100, n=50000, name='liu_gongquan_style')
print('Image generation finished')

Generating 50000 Liu Gongquan style images for calculateing FID ...


  0%|          | 0/500 [00:00<?, ?it/s]

Image generation finished


Calculate FID for the generated Liu Gongquan style images against the training data

In [None]:
! python -m pytorch_fid 'dataset_liu_gongquan' 'liu_gongquan_style_generated'

Downloading: "https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth" to /root/.cache/torch/hub/checkpoints/pt_inception-2015-12-05-6726825d.pth
100% 91.2M/91.2M [00:05<00:00, 16.6MB/s]
100% 136/136 [00:25<00:00,  5.31it/s]
100% 1000/1000 [03:10<00:00,  5.26it/s]
FID:  8.607782680691628


Load weights for the model that generates Mi Fu style images

In [None]:
gen_ema = Generator(Z_DIM, IN_CHANNELS, img_channels=CHANNELS_IMG).to(DEVICE)
loaded_state = torch.load('drive/MyDrive/Caligraphy/Mi_Fu_style_weights.pt')
gen_ema.load_state_dict(loaded_state["gen_ema"]);

Generating 50000 Mi Fu style images

In [None]:
print('Generating 50000 Mi Fu style images for calculateing FID ...')
generate_by_batch(gen_ema, 1, 4, batch_size=100, n=50000, name='mi_fu_style')
print('Image generation finished')

Generating 50000 Mi Fu style images for calculateing FID ...


  0%|          | 0/500 [00:00<?, ?it/s]

Image generation finished


Calculate FID for the generated Mi Fu style images against the training data

In [None]:
! python -m pytorch_fid 'dataset_mi_fu' 'mi_fu_style_generated'

100% 136/136 [00:25<00:00,  5.26it/s]
100% 1000/1000 [03:12<00:00,  5.20it/s]
FID:  11.888844291941695
