In [1]:
import os
import glob
import random
import itertools
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [2]:
iteration_list = [250, 500]
perplexity_list = [3, 10, 30]
pca_dim_list = [25, 50]
learning_rate_list = [10, 50]
SEED=2020

datasets_path = {
    'BDD':'./bdd100k/seg/images',
    'cityscape':'./cityscape/leftImg8bit',
    'GTA':'./GTA/*_images'
}

def set_seed(SEED):
    random.seed(SEED)
    np.random.seed(SEED)

## Sample images

In [7]:
def sample_images(datasets_path, SEED=2020, n_sample=800):
    images_path = []
    images_label = []
    
    set_seed(SEED)
    for key, dataset_path in datasets_path.items():
        all_image_path = glob.glob(os.path.join(dataset_path, '**', '*.*'), recursive=True)
        image_path_sample = random.sample(all_image_path, n_sample)
        
        images_path.extend(image_path_sample)
        images_label.extend([key] * n_sample)
    
    ids = [i for i in range(n_sample * len(datasets_path.keys()))]
    random.shuffle(ids)
    images_label = [images_label[i] for i in ids]
    images_path = [images_path[i] for i in ids]
    return images_path, images_label

In [8]:
images_path, images_label = sample_images(datasets_path)

# Generaete tsne

In [15]:
def generate_tsne(images_path, images_label, iteration_list, perplexitay_list, pca_dim_list, learning_rate_list, output_path='./csv_data'):
    os.makedirs(output_path, exist_ok=True)
    
    for mode in ['L', 'RGB']:
        images = []
        for image_path in images_path:
            img = Image.open(image_path)
            img = img.resize((512, 512))
            img = img.convert(mode)
            images.append(np.array(img))

        images = np.stack(images)
        images = images.reshape([images.shape[0], -1])
        
        hparam_list = list(itertools.product(iteration_list, perplexitay_list, pca_dim_list, learning_rate_list))
        for hparam in hparam_list:
            iteration, perplexity, pca_dim, learning_rate = hparam
            pca = PCA(n_components=min(images.shape[0], pca_dim))
            images_pca = pca.fit_transform(images)

            tsne = TSNE(
                n_components=3,
                n_iter=iteration,
                perplexity=perplexity,
                learning_rate=learning_rate,
                random_state=SEED
            )

            emb = tsne.fit_transform(images_pca)
            emb_df = pd.DataFrame(emb, columns=['x', 'y', 'z'])
            emb_df['img_path'] = images_path
            emb_df['img_label'] = images_label
            emb_df_path = os.path.join(output_path, f'tsne_{iteration}_{perplexity}_{pca_dim}_{learning_rate}_{mode}.csv')
            emb_df.to_csv(emb_df_path, index=False)

In [None]:
generate_tsne(images_path, images_label, iteration_list, perplexity_list, pca_dim_list, learning_rate_list)