In [1]:
import ot
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import cv2
import glob
import itertools
import ot
from utils.image import Image
from utils.Visualizations import *
from utils.utils import *
import time
from tqdm import tqdm

We will be looking into three types of distances,

The euclidean distance $L2$:
$$L_2(\mu, \nu)=\sum{|\mu-\nu|^2}$$

The Wasserstein distance($W_2$):
$$W_p(\mu,\nu)=inf_{\pi\in\Pi(\mu,\nu)} \bigg({ \sum |x-y|^p \cdot \pi(x,y)}\bigg)^\frac{1}{p}$$

The fourier distance ($f_{1,2}$):
$$f_{1,2}(\mu,\nu)=\bigg( \frac{1}{|T|^2}\int_{[0,T]^2} \frac{|\hat{\mu}(k) - \hat{\mu}(k)|^2}{|k|^2} dk \bigg)^\frac{1}{2}$$

## Images dataset - v1

This image dataset will contain images created by simply creating two different images which have a zone of white pixels. 
Every image is created by adding a zone of ones to a zeros dataset.
Only wasserstein and L2 distance will be calculated.

we will denote the images by im1=p, im2=q:
* $\tilde{p} = p_+ + q_-$
* $\tilde{q} = q_+ + p_-$

The Wasserstein distance will be calculated between the normalized and processed images $\tilde{p}, \tilde{q}$. 

The L2 distance will simply be calculated on the noised images.

In [None]:
im_size_values = [int(x) for x in np.linspace(start=10, stop=50, num=5)]
SNR_values = np.logspace(start=3, stop=-2, num=31)
df_im_l1 = pd.DataFrame()
distance_metric = 'L1' # 'L1' or 'L2'
n_samples = 30

In [None]:
for im_size in tqdm.tqdm(im_size_values):
    for SNR in SNR_values:
        im1 = np.zeros((im_size, im_size))
        im1[int(0.1 * im_size): int(0.3 * im_size), int(0.1 * im_size): int(0.3 * im_size)] = 1
        im2 = np.zeros((im_size, im_size))
        im2[int(0.7 * im_size): int(0.9 * im_size), int(0.7 * im_size): int(0.9 * im_size)] = 1 
        
        df_im_l1 = run_experiment_and_append_images(df=df_im_l1, im1=im1, im2=im2, SNR=SNR, 
                                                    distance_metric=distance_metric, 
                                                    n_samples=n_samples)

## Images dataset - v2

This image dataset contains the distances between different images in the DOTMark dataset.

We will denote the images by im1=p, im2=q:
* $\tilde{p} = p_+ + q_-$
* $\tilde{q} = q_+ + p_-$

The Wasserstein anf Fourier distances will be calculated between the normalized and processed images $\tilde{p}, \tilde{q}$. 

The L2 distance will simply be calculated on the noised images.

In [None]:
dotmark_pictures_path = "..\\DOTmark_1.0\\Pictures\\"
full_path = os.path.join(os.getcwd(), dotmark_pictures_path)
resolutions = [32, 64, 128, 256, 512]
image_numbers = ['01','02','03','04','05','06','07','08','09','10']
categories_pattern = os.path.join(dotmark_pictures_path, "*")
category_dirs = [path for path in glob.glob(categories_pattern) if os.path.isdir(path)]
categories_pattern = os.path.join(dotmark_pictures_path, "*")
category_names = [os.path.basename(category) for category in category_dirs if os.path.isdir(category)]

In [None]:
category = category_names[0]
res = 32
num_samples = 5
category_dir = os.path.join(full_path, category)
SNR_values = np.logspace(start=5, stop=1, num=31)
noise_values = np.logspace(start=-5, stop=-1, num=31)
pairs = list(itertools.combinations(image_numbers, 2))
cost_matrix = calculate_costs((res, res))
df_im_l1 = pd.DataFrame()

In [None]:
for SNR in tqdm(SNR_values):
    noise_param = noise_from_SNR(SNR, 1, res)
    for image_pair in pairs:
        image1 = Image(res, category, image_pair[0], full_path)
        image2 = Image(res, category, image_pair[1], full_path)

        # Calculate original distances without noise
        w1_dist_original, w1_time_original = calculate_and_time_wasserstein(image1.image, image2.image, cost_matrix)
        f_dist_original, f_time_original = calculate_and_time_fourier1(image1.image, image2.image)
        l2_dist_original, l2_time_original = calculate_and_time_l2(image1.image, image2.image)

        results = Image.analyze_image_pair(image1, image2, cost_matrix, num_samples, noise_param)
        w1_dist_noised, f_dist_noised, l2_dist_noised, time_w1, time_f, time_l2 = results

        new_row = {
            'Category': category,
            'image1_index': image_pair[0],
            'image2_index': image_pair[1],
            'Noise': noise_param,
            'SNR': SNR,
            'Resolution': res,
            'Wasserstein Original': w1_dist_original,
            'Wasserstein Noised': w1_dist_noised,
            'Wasserstein Ratio': w1_dist_original / w1_dist_noised,
            'Wasserstein Time': time_w1,
            'Fourier Original': f_dist_original, 
            'Fourier Noised': f_dist_noised,
            'Fourier Ratio': f_dist_original / f_dist_noised,
            'Fourier Time': time_f, 
            'L2 Original': l2_dist_original,
            'L2 Noised': l2_dist_noised,
            'L2 Ratio': l2_dist_original / l2_dist_noised,
            'L2 Time': time_l2}  
        
        df_im_l1 = df_im_l1._append(new_row, ignore_index=True)
df_im_l1.to_csv('results.csv', index=False)


## Images dataset - v3

This image dataset contains the distances between different images in the DOTMark dataset.

We will denote the images by im1=p, im2=q:
* $\tilde{p} = p_+ + q_-$
* $\tilde{q} = q_+ + p_-$

The Wasserstein distance will be calculated between the normalized and processed images $\tilde{p}, \tilde{q}$. 

The L2 and Fourier distances will simply be calculated on the noised images.

In [None]:
dotmark_pictures_path = "..\\DOTmark_1.0\\Pictures\\"
full_path = os.path.join(os.getcwd(), dotmark_pictures_path)
resolutions = [32, 64, 128, 256, 512]
image_numbers = ['01','02','03','04','05','06','07','08','09','10']
categories_pattern = os.path.join(dotmark_pictures_path, "*")
category_dirs = [path for path in glob.glob(categories_pattern) if os.path.isdir(path)]
categories_pattern = os.path.join(dotmark_pictures_path, "*")
category_names = [os.path.basename(category) for category in category_dirs if os.path.isdir(category)]

In [None]:
category = category_names[0]
res = 32
num_samples = 5
category_dir = os.path.join(full_path, category)
SNR_values = np.logspace(start=5, stop=1, num=31)
noise_values = np.logspace(start=-5, stop=-1, num=31)
pairs = list(itertools.combinations(image_numbers, 2))
cost_matrix = calculate_costs((res, res))
df_im_l1 = pd.DataFrame()

In [None]:
for SNR in tqdm(SNR_values):
    noise_param = noise_from_SNR(SNR, 1, res)
    for image_pair in pairs:
        image1 = Image(res, category, image_pair[0], full_path)
        image2 = Image(res, category, image_pair[1], full_path)

        # Calculate original distances without noise
        w1_dist_original, w1_time_original = calculate_and_time_wasserstein(image1.image, image2.image, cost_matrix)
        f_dist_original, f_time_original = calculate_and_time_fourier(image1.image, image2.image)
        l2_dist_original, l2_time_original = calculate_and_time_l2(image1.image, image2.image)

        results = Image.analyze_image_pair(image1, image2, cost_matrix, num_samples, noise_param)
        w1_dist_noised, f_dist_noised, l2_dist_noised, time_w1, time_f, time_l2 = results

        new_row = {
            'Category': category,
            'image1_index': image_pair[0],
            'image2_index': image_pair[1],
            'Noise': noise_param,
            'SNR': SNR,
            'Resolution': res,
            'Wasserstein Original': w1_dist_original,
            'Wasserstein Noised': w1_dist_noised,
            'Wasserstein Ratio': w1_dist_original / w1_dist_noised,
            'Wasserstein Time': time_w1,
            'Fourier Original': f_dist_original, 
            'Fourier Noised': f_dist_noised,
            'Fourier Ratio': f_dist_original / f_dist_noised,
            'Fourier Time': time_f, 
            'L2 Original': l2_dist_original,
            'L2 Noised': l2_dist_noised,
            'L2 Ratio': l2_dist_original / l2_dist_noised,
            'L2 Time': time_l2}  
        
        df_im_l1 = df_im_l1._append(new_row, ignore_index=True)
df_im_l1.to_csv('results.csv', index=False)


## Images dataset v4