In [1]:
# !pip install torch numpy pillow scikit-image pytorch-ignite ipykernel matplotlib pandas tqdm pytorch-msssim

In [2]:
import os
import time
import numpy as np
from tqdm import tqdm

print(f"Importing Pandas...")
begin = time.time()
import pandas as pd
print(f"Pandas import time: {time.time()-begin:.4f} s")

print(f"Importing Pillow...")
begin = time.time()
from PIL import Image
print(f"Pillow import time: {time.time()-begin:.4f} s")

print(f"Importing Scikit-Image...")
begin = time.time()
from skimage.metrics import structural_similarity
print(f"Scikit-Image import time: {time.time()-begin:.4f} s")

print(f"Importing PyTorch...")
begin = time.time()
from pytorch_msssim import ssim, ms_ssim
import torch
print(f"PyTorch import time: {time.time()-begin:.4f} s")

print(f"Importing Ignite...")
begin = time.time()
from ignite.metrics import SSIM
print(f"Ignite import time: {time.time()-begin:.4f} s")

Importing Pandas...
Pandas import time: 1.3809 s
Importing Pillow...
Pillow import time: 0.0842 s
Importing Scikit-Image...
Scikit-Image import time: 0.9846 s
Importing PyTorch...
PyTorch import time: 11.1569 s
Importing Ignite...
Ignite import time: 6.5652 s


In [3]:
# x = 0
# for i in tqdm(range(10_000_000)):
#     x += i
# print(x)

In [4]:
DEVICE = 'cuda:0'
torch.set_default_device(DEVICE)

In [5]:
ignite_ssim_cpu = SSIM(
    data_range=255, gaussian=True, sigma=(1.5, 1.5), 
    kernel_size=(11, 11), 
    k1=0.01, k2=0.03,
    device="cpu")

ignite_ssim_cuda = SSIM(
    data_range=255, gaussian=True, sigma=(1.5, 1.5), 
    kernel_size=(11, 11), 
    k1=0.01, k2=0.03,
    device=DEVICE)

def ssim_skimage(clean_image, noisy_image):
    return structural_similarity(
        clean_image, noisy_image, win_size=11, multichannel=True,
        sigma=1.5, data_range=255, channel_axis=2, 
        use_sample_covariance=False, 
        # use_sample_covariance=True, 
        gaussian_weights=True,
        # gaussian_weights=False,
        K1=0.01, K2=0.03
    )
    
def ssim_torch(clean_image, noisy_image):
    return ssim(clean_image, noisy_image,
                data_range=255, win_size=11, 
                win_sigma=1.5, win=None, K=(0.01, 0.03),
                nonnegative_ssim=True)
    
def ssim_ignite_cpu(clean_image, noisy_image):
    ignite_ssim_cpu.reset()
    ignite_ssim_cpu.update((clean_image, noisy_image))
    return ignite_ssim_cpu.compute()

def ssim_ignite_cuda(clean_image, noisy_image):
    ignite_ssim_cuda.reset()
    ignite_ssim_cuda.update((clean_image, noisy_image))
    return ignite_ssim_cuda.compute()

In [6]:
sigma_range = 101
sigmas = list(range(0, sigma_range, 10))

In [7]:
df_result = pd.DataFrame(columns=[
    'sigma', 'ski', 
    'pm_ssim_cpu', 'pm_ssim_cuda', 
    'ignite_cpu', 'ignite_cuda', 
    'pm_msssim_cpu', 'pm_msssim_cuda',
])
df_time = pd.DataFrame(columns=[
    'sigma', 'ski (ms)',
    'pm_ssim_cpu (ms)', 'pm_ssim_cuda (ms)',
    'ignite_cpu (ms)', 'ignite_cuda (ms)',
    'pm_msssim_cpu (ms)', 'pm_msssim_cuda (ms)',
])
# Set sigmas in df
df_result['sigma'] = sigmas
df_time['sigma'] = sigmas

In [8]:
import urllib
test_image = 'kodim10.png'
if not os.path.isfile("kodim10.png"):
    print("Downloading test image kodim10.png...")
    urllib.request.urlretrieve(
        "http://r0k.us/graphics/kodak/kodak/kodim10.png", "kodim10.png")

img = Image.open('kodim10.png')
img = np.array(img).astype(np.float32)

# params = torch.nn.Parameter( torch.ones(img.shape[2], img.shape[0], img.shape[1]), requires_grad=True ) # C, H, W
img_torch_cpu = torch.from_numpy(img).to(device="cpu").unsqueeze(0).permute(0, 3, 1, 2)  # 1, C, H, W
img_torch_cuda = img_torch_cpu.to(device=DEVICE)

In [9]:
img_batch_cpu = []
img_noise_batch_cpu = []
single_image_ssim_cpu = []

img_batch_cuda = []
img_noise_batch_cuda = []
single_image_ssim_cuda = []

noisy_images_np = []
noisy_images_torch_cpu = []
noisy_images_torch_cuda = []

np.random.seed(42)

for sigma in sigmas:
    noise = sigma * np.random.rand(*img.shape)
    img_noise = (img + noise).astype(np.float32).clip(0,255)
    noisy_images_np.append(img_noise)
    img_noise_torch_cpu = torch.from_numpy(img_noise).to(device="cpu").unsqueeze(0).permute(0, 3, 1, 2)  # 1, C, H, W
    img_noise_torch_cuda = img_noise_torch_cpu.to(device=DEVICE)
    noisy_images_torch_cpu.append(img_noise_torch_cpu)
    noisy_images_torch_cuda.append(img_noise_torch_cuda)
    
    img_batch_cpu.append(img_torch_cpu)
    img_noise_batch_cpu.append(img_noise_torch_cpu)
    
    img_batch_cuda.append(img_torch_cuda)
    img_noise_batch_cuda.append(img_noise_torch_cuda)

In [10]:
def process(clean_image, noisy_images, func, col, n_repeat=1):
    for i in tqdm(range(len(sigmas))):
        sigma = sigmas[i]
        img_noise = noisy_images[i]
        
        begin = time.time()
        for _ in range(n_repeat):
            score = func(clean_image, img_noise)
        time_ms = (time.time()-begin) / n_repeat * 1000
        
        # Convert to numpy if tensor
        if isinstance(score, torch.Tensor):
            score = score.cpu().numpy()
        df_result.loc[df_result['sigma']==sigma, col] = score
        df_time.loc[df_time['sigma']==sigma, f"{col} (ms)"] = time_ms

In [11]:
# Warm up the GPU
process(img_torch_cuda, noisy_images_torch_cuda, ssim_torch, 'pm_ssim_cuda', n_repeat=10)
process(img_torch_cuda, noisy_images_torch_cuda, ssim_ignite_cuda, 'ignite_cuda', n_repeat=10)
process(img_torch_cuda, noisy_images_torch_cuda, ms_ssim, 'pm_msssim_cuda', n_repeat=10)

100%|██████████| 11/11 [00:01<00:00,  6.34it/s]
100%|██████████| 11/11 [00:00<00:00, 53.95it/s]
100%|██████████| 11/11 [00:01<00:00,  9.73it/s]


In [12]:
N_REPEAT = 200

In [13]:
# Actual GPU benchmark
process(img_torch_cuda, noisy_images_torch_cuda, ssim_torch, 'pm_ssim_cuda', n_repeat=N_REPEAT)
process(img_torch_cuda, noisy_images_torch_cuda, ssim_ignite_cuda, 'ignite_cuda', n_repeat=N_REPEAT)
process(img_torch_cuda, noisy_images_torch_cuda, ms_ssim, 'pm_msssim_cuda', n_repeat=N_REPEAT)

100%|██████████| 11/11 [00:05<00:00,  2.17it/s]
100%|██████████| 11/11 [00:02<00:00,  5.06it/s]
100%|██████████| 11/11 [00:13<00:00,  1.22s/it]


In [14]:
df_result

Unnamed: 0,sigma,ski,pm_ssim_cpu,pm_ssim_cuda,ignite_cpu,ignite_cuda,pm_msssim_cpu,pm_msssim_cuda
0,0,,,1.0,,1.0,,1.0
1,10,,,0.925051,,0.93338,,0.989004
2,20,,,0.773435,,0.788487,,0.963898
3,30,,,0.62326,,0.641529,,0.929061
4,40,,,0.502339,,0.521008,,0.890179
5,50,,,0.410533,,0.428362,,0.850126
6,60,,,0.340449,,0.357037,,0.811729
7,70,,,0.286809,,0.301982,,0.772992
8,80,,,0.245654,,0.259704,,0.736515
9,90,,,0.212196,,0.225221,,0.703794


In [15]:
df_time

Unnamed: 0,sigma,ski (ms),pm_ssim_cpu (ms),pm_ssim_cuda (ms),ignite_cpu (ms),ignite_cuda (ms),pm_msssim_cpu (ms),pm_msssim_cuda (ms)
0,0,,,1.770556,,0.976009,,6.143879
1,10,,,1.78089,,0.983979,,6.077118
2,20,,,1.761029,,0.97937,,6.157712
3,30,,,1.758968,,0.98076,,6.232932
4,40,,,1.758389,,0.980421,,6.118175
5,50,,,1.76852,,0.976683,,6.093109
6,60,,,1.770424,,0.979468,,6.080821
7,70,,,1.762365,,0.981872,,6.087167
8,80,,,1.754644,,0.98296,,6.080809
9,90,,,1.774627,,0.982845,,6.099255


In [16]:
process(img, noisy_images_np, ssim_skimage, 'ski', n_repeat=N_REPEAT)

100%|██████████| 11/11 [02:32<00:00, 13.82s/it]


In [17]:
process(img_torch_cpu, noisy_images_torch_cpu, ssim_torch, 'pm_ssim_cpu', n_repeat=N_REPEAT)

100%|██████████| 11/11 [00:39<00:00,  3.56s/it]


In [18]:
process(img_torch_cpu, noisy_images_torch_cpu, ssim_ignite_cpu, 'ignite_cpu', n_repeat=N_REPEAT)

100%|██████████| 11/11 [01:07<00:00,  6.09s/it]


In [19]:
process(img_torch_cpu, noisy_images_torch_cpu, ms_ssim, 'pm_msssim_cpu', n_repeat=N_REPEAT)

100%|██████████| 11/11 [00:54<00:00,  4.93s/it]


In [20]:
df_result

Unnamed: 0,sigma,ski,pm_ssim_cpu,pm_ssim_cuda,ignite_cpu,ignite_cuda,pm_msssim_cpu,pm_msssim_cuda
0,0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,10,0.932475,0.932476,0.925051,0.93338,0.93338,0.991116,0.989004
2,20,0.785769,0.78577,0.773435,0.788488,0.788487,0.967452,0.963898
3,30,0.637239,0.637241,0.62326,0.64153,0.641529,0.934444,0.929061
4,40,0.515662,0.515664,0.502339,0.521009,0.521008,0.897658,0.890179
5,50,0.422473,0.422474,0.410533,0.428363,0.428362,0.858975,0.850126
6,60,0.350911,0.350912,0.340449,0.357038,0.357037,0.821748,0.811729
7,70,0.295807,0.295808,0.286809,0.301982,0.301982,0.784397,0.772992
8,80,0.253616,0.253617,0.245654,0.259705,0.259704,0.748474,0.736515
9,90,0.219286,0.219287,0.212196,0.225221,0.225221,0.716415,0.703794


In [21]:
df_time

Unnamed: 0,sigma,ski (ms),pm_ssim_cpu (ms),pm_ssim_cuda (ms),ignite_cpu (ms),ignite_cuda (ms),pm_msssim_cpu (ms),pm_msssim_cuda (ms)
0,0,70.169756,18.487931,1.770556,30.798527,0.976009,24.499149,6.143879
1,10,70.015085,17.736758,1.78089,29.635338,0.983979,25.055882,6.077118
2,20,68.490763,18.464452,1.761029,29.015168,0.97937,24.244493,6.157712
3,30,69.294481,17.223086,1.758968,30.276556,0.98076,24.893183,6.232932
4,40,68.621014,16.835908,1.758389,27.313126,0.980421,24.674724,6.118175
5,50,68.212535,18.334048,1.76852,32.100331,0.976683,24.643792,6.093109
6,60,69.911158,17.54367,1.770424,30.590572,0.979468,24.570827,6.080821
7,70,69.408867,18.364719,1.762365,30.008297,0.981872,24.332108,6.087167
8,80,68.718978,17.624662,1.754644,31.902542,0.98296,24.42374,6.080809
9,90,67.992766,17.99126,1.774627,31.246952,0.982845,25.032125,6.099255
