# Libraries

In [None]:
from keras.models import load_model
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.manifold import TSNE
from sklearn.utils import shuffle 

from resources.data_utils import split_dataset, DataGenerator
from resources.model_utils import evaluate_model
from resources.utils import label_str_to_dec,binary_label_to_decimal,aggregate_generator_labels

In [None]:
def inv_softmax(x, C):
    return np.log(x) + C

# Load data

In [None]:
dataset = 'dataset_name'
path = "/home/datasets/"+dataset+'/'
path_plaintext = path
path_final = path + "preprocessed_80/"
path_out = '/home/tsne/'
if not os.path.exists(path_out):
    os.makedirs(path_out)
    
char_list = [str(i) for i in range(1, num_classes+1)]
char2int = {char: i for i, char in enumerate(char_list)}

filename        = path_plaintext + 'labels_spot_binary.csv'
num_classes     = 15
num_replicates  = 1000
dataset_size    = num_classes * num_replicates
image_dimension = (80, 80)

In [None]:
# Labels
labels = []
with open(filename, "r") as csvFile:
    for row in csvFile:
        labels.append(row[:-1])
labels = np.asarray(labels)
main_labels = label_str_to_dec(labels[0:dataset_size], char2int)

# collecting the paths to all images in a set
image_prefix = "FImg_ID_"
image_suffix = ".jpg"
images_str = ["{}{}{}{}".format(path_final, image_prefix, img_idx, image_suffix) for img_idx in range(1, dataset_size + 1)]
main_dataset = pd.DataFrame({"img_path": images_str, "label": main_labels})

# Split data sets
generation_params = {"dim": image_dimension,\
                     "nb_classes": num_classes,\
                     "column_img": "img_path",\
                     "column_label": "label",}

df_train, df_valid, df_test = split_dataset(data_frame=main_dataset,\
                                            rank=1,\
                                            column_label="label",\
                                            random_state=25)

train_generator = DataGenerator(data_frame=df_train, batch_size=25, shuffle=True, **generation_params)
valid_generator = DataGenerator(data_frame=df_valid, batch_size=100, shuffle=False, **generation_params)
test_generator  = DataGenerator(data_frame=df_test, batch_size=100, shuffle=False, **generation_params)

# Load trained model

In [None]:
path_model = '/home/trianed_model/model.h5'
model = load_model(path_model)

# Get raw predictions

In [None]:
# TRAIN
train_predictions = evaluate_model(model, train_generator, 6)
train_true_labels = binary_label_to_decimal(aggregate_generator_labels(data_generator=train_generator))
train_predictions_filtered = np.where(train_predictions > (10**-31), train_predictions, (10**-32))
train_inv_softmax_prediction = inv_softmax(train_predictions_filtered,0)

# TEST
test_predictions = evaluate_model(model, test_generator, 6)
test_true_labels = binary_label_to_decimal(aggregate_generator_labels(data_generator=test_generator))
test_predictions_filtered = np.where(test_predictions > (10**-31), test_predictions, (10**-32))
test_inv_softmax_prediction = inv_softmax(test_predictions_filtered,0)

# tSNE

In [None]:
# Train TSNE 
tsne = TSNE(n_components=2, perplexity=50, n_iter= 5000,verbose =1)
output_tsne = tsne.fit_transform(train_inv_softmax_prediction)
print('DONE')

In [None]:
# Visualize results
X = output_tsne[:,0]
Y = output_tsne[:,1]
cmap1 = sn.color_palette("Paired", 12)
cmap2 = sn.color_palette("Set2", 3)
cmap = cmap1+cmap2
sn.set_style("white")
fig = plt.figure(figsize=(10,10))

# Plot by class
start = 0
end = start+100
for i in range(num_classes):
    idx = np.where(train_true_labels == i)
    plt.scatter(x=X[idx][start:end],y=Y[idx][start:end],label=i+1,color=cmap[i])
    
plt.legend(loc='right',bbox_to_anchor=(1.3,0.5),fontsize=20)
plt.title('tSNE: latent variables',fontsize = 20,fontname='Calibri',color='black')
plt.tick_params(axis='x',which='both',bottom=False,top=False,labelbottom=False)
plt.tick_params(axis='y',which='both',left=False,right=False,labelleft=False)
plt.show()