In [1]:
import torch
from torch import nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader

from torchvision import transforms

from PIL import Image
import numpy as np
from scipy import linalg

import os
import os.path as osp

In [2]:
class InceptionV3Dataset(Dataset):
    def __init__(self, images_path, device='cuda'):
        super().__init__()

        self.images_path = images_path
        self.device = device

        self.image_files = os.listdir(images_path)

        self.transform = transforms.Compose([
            transforms.Resize(299),
            transforms.CenterCrop(299),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image = Image.open(osp.join(self.images_path, self.image_files[idx]))
        image_tensor = self.transform(image).to(self.device)

        return image_tensor

In [3]:
inception_v3 = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', weights='IMAGENET1K_V1')

inception_v3.to('cuda')
inception_v3.eval()

print('inception v3 model loaded')

Using cache found in /home/jun/.cache/torch/hub/pytorch_vision_v0.10.0


inception v3 model loaded


In [4]:
# replace last two layers of inception_v3
class ID(nn.Module):
    def __init__(self):
        super(ID, self).__init__()
    def forward(self, x):
        return x
    
inception_v3.dropout = ID()
inception_v3.fc = ID()

In [5]:
output_dir = '../output/2024_02_22_17:33:26_pair/'
gt_dir = '../DATA/VITON-HD/test/image/'

output_dataset = InceptionV3Dataset(output_dir)
gt_dataset = InceptionV3Dataset(gt_dir)

print('# of output images:', len(output_dataset))
print('# of gt images:', len(gt_dataset))

# of output images: 2032
# of gt images: 2032


In [6]:
output_dataloader = DataLoader(dataset=output_dataset, batch_size=512)
gt_dataloader = DataLoader(dataset=gt_dataset, batch_size=512)

output_activation = []
gt_activation = []

with torch.no_grad():
    for batch in output_dataloader:
        act = inception_v3(batch).detach().cpu().numpy()
        output_activation.append(act)
    for batch in gt_dataloader:
        act = inception_v3(batch).detach().cpu().numpy()
        gt_activation.append(act)

output_activation = np.vstack(output_activation)
gt_activation = np.vstack(gt_activation)

In [8]:
# reference: https://github.com/mseitzer/pytorch-fid

# define frechet_distance
def frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return (diff.dot(diff) + np.trace(sigma1)
            + np.trace(sigma2) - 2 * tr_covmean)

In [9]:
mu1 = output_activation.mean(axis=0)
sigma1 = np.cov(output_activation, rowvar=False)

mu2 = gt_activation.mean(axis=0)
sigma2 = np.cov(gt_activation, rowvar=False)

FID = frechet_distance(mu1, sigma1, mu2, sigma2)
print('[FID]')
print('%.3f' % FID)

[FID]
7.177
