# Manifold embedding
___
Trying different manifold embedding techniques on simple datasets.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

from sklearn.datasets import load_iris, load_digits
from sklearn.model_selection import train_test_split

from umap import UMAP
from sklearn.manifold import TSNE

%matplotlib inline

plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (10, 8)
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14

## t-SNE

In [None]:
digits = load_digits()

In [None]:
X = digits.data

tsne = TSNE()

X_emb = tsne.fit_transform(X)
X_emb.shape

In [None]:
plt.scatter(X_emb[:, 0], X_emb[:, 1], c=digits.target, cmap='Spectral', s=5)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
plt.title('t-SNE projection of the Digits dataset', fontsize=24);

## U-Map

### Iris dataset

In [None]:
iris = load_iris()

In [None]:
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3),iris.target_names)))

sns.pairplot(iris_df, hue='species');

In [None]:
X = iris.data

umap = UMAP()

X_emb = umap.fit_transform(X)
X_emb.shape

In [None]:
plt.scatter(X_emb[:, 0], X_emb[:, 1], c=[sns.color_palette()[x] for x in iris.target])
plt.gca().set_aspect('equal', 'datalim')
plt.title('UMAP projection of the Iris dataset');

### MNIST dataset

In [None]:
digits = load_digits()

In [None]:
fig, ax_array = plt.subplots(20, 20)
axes = ax_array.flatten()
for i, ax in enumerate(axes):
    ax.imshow(digits.images[i], cmap='gray_r')
plt.setp(axes, xticks=[], yticks=[], frame_on=False)
plt.tight_layout(h_pad=0.5, w_pad=0.01)

In [None]:
digits_df = pd.DataFrame(digits.data[:,:10])
digits_df['digit'] = pd.Series(digits.target).map(lambda x: 'Digit {}'.format(x))
sns.pairplot(digits_df, diag_kind='hist', hue='digit', palette='Spectral');

In [None]:
X = digits.data

umap = UMAP()

umap.fit(X)

In [None]:
X_emb = umap.transform(X)
# Verify that the result of calling transform is
# idenitical to accessing the embedding_ attribute
assert(np.all(X_emb == umap.embedding_))
X_emb.shape

In [None]:
umap.embedding_

In [None]:
X_emb

In [None]:
plt.scatter(X_emb[:, 0], X_emb[:, 1], c=digits.target, cmap='Spectral', s=5)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
plt.title('UMAP projection of the Digits dataset', fontsize=24);

In [None]:
from io import BytesIO
from PIL import Image
import base64

In [None]:
def embeddable_image(data):
    img_data = 255 - 15 * data.astype(np.uint8)
    image = Image.fromarray(img_data, mode='L').resize((64, 64), Image.BICUBIC)
    buffer = BytesIO()
    image.save(buffer, format='png')
    for_encoding = buffer.getvalue()
    return 'data:image/png;base64,' + base64.b64encode(for_encoding).decode()

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral10

output_notebook()

In [None]:
digits_df = pd.DataFrame(X_emb, columns=('x', 'y'))
digits_df['digit'] = [str(x) for x in digits.target]
digits_df['image'] = list(map(embeddable_image, digits.images))

datasource = ColumnDataSource(digits_df)
color_mapping = CategoricalColorMapper(factors=[str(9 - x) for x in digits.target_names],
                                       palette=Spectral10)

plot_figure = figure(
    title='UMAP projection of the Digits dataset',
    plot_width=600,
    plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <img src='@image' style='float: left; margin: 5px 5px 5px 5px'/>
    </div>
    <div>
        <span style='font-size: 16px; color: #224499'>Digit:</span>
        <span style='font-size: 18px'>@digit</span>
    </div>
</div>
"""))

plot_figure.circle(
    'x',
    'y',
    source=datasource,
    color=dict(field='digit', transform=color_mapping),
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4
)
show(plot_figure)

### 4D uniformly distributed data
The data can be interpreted a (R, G, B, $\alpha&) specifying colour and transparency.

In [None]:
np.random.seed(42)

X = np.random.rand(800, 4)

umap = UMAP()
X_emb = umap.fit_transform(X)

In [None]:
plt.scatter(X_emb[:,0], X_emb[:,1], c=X)
plt.title('UMAP embedding of random colours');

In [None]:
def draw_umap(n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', title=''):
    
    umap = UMAP(n_neighbors=n_neighbors,
                min_dist=min_dist,
                n_components=n_components,
                metric=metric)
    X_emb = umap.fit_transform(X);
    
    fig = plt.figure()
    plt.rcParams['figure.figsize'] = (5, 5)
    
    if n_components == 1:
        ax = fig.add_subplot(111)
        ax.scatter(X_emb[:,0], range(len(X_emb)), c=X)
        
    if n_components == 2:
        ax = fig.add_subplot(111)
        ax.scatter(X_emb[:,0], X_emb[:,1], c=X)
        
    if n_components == 3:
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(X_emb[:,0], X_emb[:,1], X_emb[:,2], c=X, s=100)
        
    plt.title(title, fontsize=18)

In [None]:
for n in (2, 5, 10, 20, 50, 100, 200):
    draw_umap(n_neighbors=n, title='n_neighbors = {}'.format(n))

In [None]:
for d in (0.0, 0.1, 0.25, 0.5, 0.8, 0.99):
    draw_umap(min_dist=d, title='min_dist = {}'.format(d))

In [None]:
draw_umap(n_components=1, title='n_components = 1')

In [None]:
draw_umap(n_components=3, title='n_components = 3')