In [1]:
import sys
import time
import gensim
import re
import nltk
import os
import gzip

import pandas as pd
import numpy as np

from PIL import Image
from gensim.models import word2vec

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity

%matplotlib widget
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from mpl_toolkits.mplot3d import Axes3D


import datafold.pcfold as pfold
from datafold.dynfold import DiffusionMaps
from datafold.utils.plot import plot_pairwise_eigenvector

from utils.utils import Loaders, Plotters

In [2]:
# Load MNIST fashion data
X_train, y_train = Loaders.load_mnist('data/fashion', kind='train')
X_test, y_test = Loaders.load_mnist('data/fashion', kind='t10k')
X_train = X_train/255.0
X_test = X_test/255.0

In [3]:
number_of_elements_train = 1000

X_pcm = pfold.PCManifold(X_train[0:number_of_elements_train])
X_pcm.optimize_parameters(result_scaling=2)

print(f'epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}')

t0 = time.time()
dmap = DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
                     n_eigenpairs=6, dist_kwargs=dict(cut_off=X_pcm.cut_off))

dmap_ = dmap.fit(X_pcm)

plot_pairwise_eigenvector(eigenvectors=dmap.eigenvectors_[:, 1:],
                          n=1,
                          idx_start=1,
                          fig_params=dict(figsize=(10,10)),
                          scatter_params=dict(c=y_train[0:number_of_elements_train]))



epsilon=30.760421952779076, cut-off=23.803947410910805


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
dmap = dmap_.set_coords([1, 2])
X_dmap_train = dmap.transform(X_pcm)
Plotters.plot_embedding_fashion_mnist(X_dmap_train, y_train, X_train)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
number_of_elements_train = 1000

X_pcm = pfold.PCManifold(X_train[0:number_of_elements_train])
X_pcm.optimize_parameters(result_scaling=3)

print(f'epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}')

t0 = time.time()
dmap = DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
                     n_eigenpairs=6, dist_kwargs=dict(cut_off=X_pcm.cut_off))

dmap_ = dmap.fit(X_pcm)

dmap = dmap_.set_coords([2, 3, 4])
X_dmap_train = dmap.transform(X_pcm)


epsilon=69.21094939375291, cut-off=35.705921116366206


In [8]:
X =   X_dmap_train
y = y_train
title=None
Plotters.plot_embedding_fashion_mnist_3d(X, y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …