#### Dimensionality Reduction Algorithms:

    Map high dimensional data to low dimensional data using below algorithms

    ```
        Linear:
            PCA
            Dual PCA

        Manifold Learning: 
            MDS(Multi Dimensional Scaling)
            Isomap
            LLE (Locally Linear Embedding)
            Spectral Embedding (Laplacian Eigen Map)
            t-SNE
    ```    

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import decomposition
from sklearn.datasets import load_digits
from sklearn.datasets import load_iris
from sklearn.datasets import make_s_curve
from sklearn.datasets import make_swiss_roll
from sklearn.manifold import MDS
from sklearn.manifold import Isomap
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.manifold import SpectralEmbedding
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances

#### Prepare Data 

In [11]:
# Dataset1
digits = load_digits()
X_digits = digits.data
y_digits = digits.target

# Dataset2
iris = load_iris()
X_iris = iris.data
y_iris = iris.target

# Dataset3
X_s_curve, color = make_s_curve(n_samples=1000, noise=0.0, random_state=0)

# Dataset4
X_swiss_role,y_swiss_role = make_swiss_roll(n_samples=1000, noise=0.0, random_state=0)

print("Dataset1: %s" % (X_digits.shape, ))
print("Dataset2: %s" % (X_iris.shape, ))
print("Dataset3: %s" % (X_s_curve.shape, ))
print("Dataset4: %s" % (X_swiss_role.shape, ))

Dataset1: (1797, 64)
Dataset2: (150, 4)
Dataset3: (1000, 3)
Dataset4: (1000, 3)


In [3]:
def plot_3d(X_dataset1, y_dataset1, X_dataset2, y_dataset2, X_dataset3, y_dataset3, title):
    fig = plt.figure(figsize=(15, 8))
    #Clear the current figure
    plt.clf()
    ax = fig.add_subplot(1, 3, 1, projection='3d')
    scatter = ax.scatter(X_dataset1[:, 0], X_dataset1[:, 1], X_dataset1[:, 2], c=y_dataset1, cmap=plt.cm.nipy_spectral, edgecolor='k')
    
    ax = fig.add_subplot(1, 3, 2, projection='3d')
    scatter = ax.scatter(X_dataset2[:, 0], X_dataset2[:, 1], X_dataset2[:, 2], c=y_dataset2, cmap=plt.cm.nipy_spectral, edgecolor='k')
    
    ax = fig.add_subplot(1, 3, 3, projection='3d')
    scatter = ax.scatter(X_dataset3[:, 0], X_dataset3[:, 1], X_dataset3[:, 2], c=y_dataset3, cmap=plt.cm.nipy_spectral, edgecolor='k')
    plt.show()

In [4]:
## Using plotly
def plot_3d_interactive(X_dataset1, y_dataset1, X_dataset2, y_dataset2):
    # Initialize figure with subplots
    fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "scatter3d", "colspan": 2}, None],
           [{"type": "scatter3d", "colspan": 2}, None]],
    subplot_titles=("Dataset 1", "Dataset 2"))
    

    trace0 = go.Scatter3d(x=X_dataset1[:, 0], y=X_dataset1[:, 1], z=X_dataset1[:, 2], mode='markers', marker_color=y_dataset1)
    trace1 = go.Scatter3d(x=X_dataset2[:, 0], y=X_dataset2[:, 1], z=X_dataset2[:, 2], mode='markers', marker_color=y_dataset2)
    
    fig.append_trace(trace0, 1, 1)
    fig.append_trace(trace1, 2, 1)
    
    fig.update_layout(
        title_text='3D subplots with different colorscales',
        height=1000,
        width=900,
        margin=dict(l=0, r=0, b=0, t=0),
        showlegend=False
    )
    
    fig.show()

In [5]:
## Using plotly
def plot_2d_interactive(X_dataset1, y_dataset1, X_dataset2, y_dataset2):
    # Initialize figure with subplots
    fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "scatter", "colspan": 2}, None],
           [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Dataset 3", "Dataset 4"))

    trace0 = go.Scatter(x=X_dataset1[:, 0], y=X_dataset1[:, 1], mode='markers', marker_color=y_dataset1)
    trace1 = go.Scatter(x=X_dataset2[:, 0], y=X_dataset2[:, 1], mode='markers', marker_color=y_dataset2)
    
    fig.append_trace(trace0, 1, 1)
    fig.append_trace(trace1, 2, 1)
    
    fig.update_layout(
        title_text='2D subplots with different colorscales',
        height=1000,
        width=900,
        margin=dict(l=0, r=0, b=0, t=0),
        showlegend=False
    )
    
    fig.show()

#### PCA (Principal Component Analysis)

In [6]:
pca_digits = decomposition.PCA(n_components=3)
# Fit the model with X and apply the dimensionality reduction on X.
X_digits = pca_digits.fit_transform(X_digits) 

pca_iris = decomposition.PCA(n_components=3)
# Fit the model with X and apply the dimensionality reduction on X.
X_iris = pca_iris.fit_transform(X_iris) 

pca_s_curve = decomposition.PCA(n_components=2)
# Fit the model with X and apply the dimensionality reduction on X.
X_s_curve = pca_s_curve.fit_transform(X_s_curve) 

pca_swiss_roll = decomposition.PCA(n_components=2)
# Fit the model with X and apply the dimensionality reduction on X.
X_swiss_role = pca_swiss_roll.fit_transform(X_swiss_role)

In [7]:
plot_3d_interactive(X_digits, y_digits, X_iris, y_iris)

In [8]:
plot_2d_interactive(X_s_curve, color, X_swiss_role, y_swiss_role)

In [9]:
#Percentage of variance explained by each of the selected components.
print("Percentage of variance explained by each of the selected components(digits dataset): %s" % np.around(pca_digits.explained_variance_ratio_, 2))
print("Percentage of variance explained by each of the selected components(iris dataset): %s" % np.around(pca_iris.explained_variance_ratio_, 2))
print("Percentage of variance explained by each of the selected components(s-curve dataset): %s" % np.around(pca_s_curve.explained_variance_ratio_, 2))
print("Percentage of variance explained by each of the selected components(swiss-roll dataset): %s" % np.around(pca_s_curve.explained_variance_ratio_, 2))

Percentage of variance explained by each of the selected components(digits dataset): [0.15 0.14 0.12]
Percentage of variance explained by each of the selected components(iris dataset): [0.92 0.05 0.02]
Percentage of variance explained by each of the selected components(s-curve dataset): [0.69 0.18]
Percentage of variance explained by each of the selected components(swiss-roll dataset): [0.69 0.18]


#### Manifold Learning

A manifold is a topological space that locally resembles euclidean space near each point. More precisely, each point of an n-dimensional manifold has a neighborhood that is homeomorphic to the euclidean space of dimensions n.

#### MDA (Multi Dimensional Scaling)

```
 PCA = MDS with euclidean distance 
 Isomap = MDS with geodesic distance
 ```

In [12]:
mds_digits = MDS(n_components=3)
# Fit the data from X, and returns the embedded coordinates
X_digits = mds_digits.fit_transform(X_digits) 

mds_iris = MDS(n_components=3)
# Fit the data from X, and returns the embedded coordinates
X_iris = mds_iris.fit_transform(X_iris) 

mds_s_curve = MDS(n_components=2)
# Fit the data from X, and returns the embedded coordinates
X_s_curve = mds_s_curve.fit_transform(X_s_curve) 

mds_swiss_role = MDS(n_components=2)
# Fit the data from X, and returns the embedded coordinates
X_swiss_role = mds_swiss_role.fit_transform(X_s_curve) 

In [13]:
plot_3d_interactive(X_digits, y_digits, X_iris, y_iris)


In [14]:
plot_2d_interactive(X_s_curve, color, X_swiss_role, y_swiss_role)

In [15]:
# In MDS, we are trying to model the distances. Hence, the most obvious choice 
# for a goodness-of-fit statistic is one based on the differences between the actual distances and their
# predicted values. Such a measure is called stress.
#**  MDS fits with stress values near zero are the best **

print("Stress Value (digits dataset): %s" % np.around(mds_digits.stress_, 2))
print("Stress Value (iris dataset): %s" % np.around(mds_iris.stress_, 2))
print("Stress Value (s-curve dataset): %s" % np.around(mds_s_curve.stress_, 2))
print("Stress Value (swiss-roll dataset): %s" % np.around(mds_s_curve.stress_, 2))

Stress Value (digits dataset): 200134173.67
Stress Value (iris dataset): 17.73
Stress Value (s-curve dataset): 42263.57
Stress Value (swiss-roll dataset): 42263.57


#### Isomap

The main idea of Isomap is to perform MDS not in the input space but in the geodesic space of the non-linear data manifold.

```
 Important Parameters:
     1. n_neighbors => number of neighbors to consider for each point.
     2. n_components => number of coordinates for the manifold.
     3. neighbors_algorithm [‘auto’|’brute’|’kd_tree’|’ball_tree’] => Algorithm to use for nearest neighbors search.
```

In [16]:
isomap_digits = Isomap(n_neighbors=10, n_components = 3, neighbors_algorithm='ball_tree')
# Fit the data from X, and returns the embedded coordinates
X_digits = isomap_digits.fit_transform(X_digits) 

isomap_iris = Isomap(n_neighbors=10, n_components = 3, neighbors_algorithm='kd_tree')
# Fit the data from X, and returns the embedded coordinates
X_iris = isomap_iris.fit_transform(X_iris) 

isomap_s_curve = Isomap(n_neighbors=10, n_components = 3, neighbors_algorithm='ball_tree')
# Fit the data from X, and returns the embedded coordinates
X_s_curve = isomap_s_curve.fit_transform(X_s_curve) 

isomap_swiss_role = Isomap(n_neighbors=10, n_components = 3, neighbors_algorithm='ball_tree')
# Fit the data from X, and returns the embedded coordinates
X_swiss_role = isomap_swiss_role.fit_transform(X_s_curve) 

In [17]:
plot_3d_interactive(X_digits, y_digits, X_iris, y_iris)

In [18]:
plot_2d_interactive(X_s_curve, color, X_swiss_role, y_swiss_role)

In [None]:
# The cost function of an isomap embedding is
# E = frobenius_norm[K(D) - K(D_fit)] / n_samples
# Where D is the matrix of distances for the input data X, D_fit is the matrix of distances for the output embedding X_fit,
# and K is the isomap kernel
print("Error (digits dataset): %s" % np.around(isomap_digits.reconstruction_error(), 2))
print("Error (iris dataset): %s" % np.around(isomap_iris.reconstruction_error(), 2))
print("Error (s-curve dataset): %s" % np.around(isomap_s_curve.reconstruction_error(), 2))
print("Error (swiss-roll dataset): %s" % np.around(isomap_swiss_role.reconstruction_error(), 2))

#### Locally Linear Embedding(LLE)

1. Compute the neighbours of each data point.
2. Compute the weights $W_{ij}$ that best reconstruct each data point $X_{i}$ from its neighbours, minimizing the below cost function.
    
     E(w) =  $\sum_{i=1}^t || x_i - \sum_{j=1}^k w_{ij}x_j||^2$

3. Compute the vectors $Y_i$ best reconstructed by the weights $W_{ij}$, minimizing below equation

     f(y) =  $\sum_{i=1}^t || y_i - \sum_{j=1}^t w_{ij}y_j||^2$

```
 Important Parameters:
     1. n_neighbors => number of neighbors to consider for each point.
     2. n_components => number of coordinates for the manifold.
     3. neighbors_algorithm [‘auto’|’brute’|’kd_tree’|’ball_tree’] => Algorithm to use for nearest neighbors search.
```

In [19]:
lle_digits = LocallyLinearEmbedding(n_neighbors=10, n_components = 3, neighbors_algorithm='ball_tree')
# Fit the data from X, and returns the embedded coordinates
X_digits = lle_digits.fit_transform(X_digits) 

lle_iris = LocallyLinearEmbedding(n_neighbors=10, n_components = 3, neighbors_algorithm='kd_tree')
# Fit the data from X, and returns the embedded coordinates
X_iris = lle_iris.fit_transform(X_iris) 

lle_s_curve = LocallyLinearEmbedding(n_neighbors=10, n_components = 3, neighbors_algorithm='ball_tree')
# Fit the data from X, and returns the embedded coordinates
X_s_curve = lle_s_curve.fit_transform(X_s_curve) 

lle_swiss_role = LocallyLinearEmbedding(n_neighbors=10, n_components = 3, neighbors_algorithm='ball_tree')
# Fit the data from X, and returns the embedded coordinates
X_swiss_role = lle_swiss_role.fit_transform(X_s_curve) 

In [20]:
plot_3d_interactive(X_digits, y_digits, X_iris, y_iris)

In [21]:
plot_2d_interactive(X_s_curve, color, X_swiss_role, y_swiss_role)

In [22]:
print("Error (digits dataset): %s" % np.around(lle_digits.reconstruction_error_, 7))
print("Error (iris dataset): %s" % np.around(lle_iris.reconstruction_error_, 7))
print("Error (s-curve dataset): %s" % np.around(lle_s_curve.reconstruction_error_, 7))
print("Error (swiss-role dataset): %s" % np.around(lle_swiss_role.reconstruction_error_, 7))


Error (digits dataset): 1e-07
Error (iris dataset): 4e-07
Error (s-curve dataset): 1e-07
Error (swiss-role dataset): 0.0


#### Laplacian Eigen Map
1. Transform the raw input data into graph representation using affinity (adjacency) matrix representation.

2. Calculate the Laplacian Matrix L = D - W where D -> Diagonal Matrix and W -> Weight Matrix.

3. Eigenvalue decomposition is done on graph Laplacian

```
 Important Parameters:

        n_components => The dimension of the projected subspace.

        affinity => 
        How to construct the affinity matrix.
        ‘nearest_neighbors’ : construct the affinity matrix by computing a graph of nearest neighbors.

        ‘rbf’ : construct the affinity matrix by computing a radial basis function (RBF) kernel.

        ‘precomputed’ : interpret X as a precomputed affinity matrix.

        ‘precomputed_nearest_neighbors’ : interpret X as a sparse graph of precomputed nearest neighbors, and constructs the affinity matrix by selecting the n_neighbors nearest neighbors.

        callable : use passed in function as affinity the function takes in data matrix (n_samples, n_features) and return affinity matrix (n_samples, n_samples).

        gamma => Kernel coefficient for rbf kernel.

        n_neighbors => Number of nearest neighbors for nearest_neighbors graph building.
```


In [None]:
spectral_digits = SpectralEmbedding(n_components = 3,affinity='rbf', n_neighbors=10)
# Fit the data from X, and returns the embedded coordinates
X_digits = spectral_digits.fit_transform(X_digits) 

spectral_iris = SpectralEmbedding(n_components = 3,affinity='rbf', n_neighbors=10)
# Fit the data from X, and returns the embedded coordinates
X_iris = spectral_iris.fit_transform(X_iris) 

spectral_s_curve = SpectralEmbedding(n_components = 3,affinity='rbf', n_neighbors=10)
# Fit the data from X, and returns the embedded coordinates
X_s_curve = spectral_s_curve.fit_transform(X_s_curve) 

spectral_swiss_role = SpectralEmbedding(n_components = 3,affinity='rbf', n_neighbors=10)
# Fit the data from X, and returns the embedded coordinates
X_swiss_role = spectral_swiss_role.fit_transform(X_s_curve) 

In [None]:
plot_3d_interactive(X_digits, y_digits, X_iris, y_iris)

In [None]:
plot_2d_interactive(X_s_curve, color, X_swiss_role, y_swiss_role)

#### TSNE

t-distributed Stochastic Neighbor Embedding.

```
    Important Parameters:
        n_components => Dimension of the embedded space.
        perplexity => The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. Different values can result in significanlty different results.
        learning_rate => The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a ‘ball’ with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers. If the cost function gets stuck in a bad local minimum increasing the learning rate may help.
        
```

In [31]:
tsne_digits = TSNE(n_components = 2,perplexity=10)
# Fit the data from X, and returns the embedded coordinates
X_digits = tsne_digits.fit_transform(X_digits) 

tsne_iris = TSNE(n_components = 2,perplexity=10)
# Fit the data from X, and returns the embedded coordinates
X_iris = tsne_digits.fit_transform(X_iris) 

tsne_s_curve = TSNE(n_components = 2,perplexity=10)
# Fit the data from X, and returns the embedded coordinates
X_s_curve = tsne_digits.fit_transform(X_s_curve) 

tsne_swiss_role = TSNE(n_components = 2,perplexity=10)
# Fit the data from X, and returns the embedded coordinates
X_swiss_role = tsne_digits.fit_transform(X_s_curve) 

In [24]:
plot_2d_interactive(X_digits, y_digits, X_iris, y_iris)

In [25]:
 plot_2d_interactive(X_s_curve, color, X_swiss_role, y_swiss_role)