## PCA

### From scratch using Numpy

In [1]:
import numpy as np

In [2]:
# Randomly generate 100 points with dim of 10
X = np.random.random([100, 10])

# Center the matrix and perform singular value decomposition
X_centered = X - X.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)

$$
X_{d-proj} = XW_d
$$

In [3]:
# Project on the first two principal components
W2 = Vt.T[:, :2]
X2D_1 = X_centered.dot(W2)

In [4]:
X2D_1.shape

(100, 2)

### Using Scikit-Learn

In [5]:
from sklearn.decomposition import PCA

In [6]:
pca = PCA(n_components=2)
X2D_2 = pca.fit_transform(X)

In [7]:
X2D_2.shape

(100, 2)

In [8]:
# The results of using both methods are the same (the sign does not matter)
np.allclose(abs(X2D_1), abs(X2D_2))

True

In [9]:
# The first 2 PCs can be viewed using the .components_ method
pca.components_

array([[ 0.08680432,  0.3390177 , -0.06339472, -0.35469416, -0.08689938,
        -0.0288259 ,  0.05690308,  0.60931973, -0.09878672, -0.59586527],
       [ 0.02491019,  0.43252561, -0.1706518 ,  0.49451538, -0.44816046,
        -0.21724864,  0.37937707, -0.30823017, -0.01843462, -0.22653073]])

In [10]:
# The variance ratio can be shown using .explained_variance_ratio_ method
pca.explained_variance_ratio_

array([0.14530557, 0.13691307])

#### Inverse PCA

In [12]:
# PCA can be seen as a compression, so it can go both ways (may lose information)
pca = PCA(n_components=9)
X_reduced = pca.fit_transform(X)
X_recovered = pca.inverse_transform(X_reduced)

In [13]:
X_recovered.shape

(100, 10)

In [14]:
W2.shape

(10, 2)

#### Incremental PCA

In [15]:
from sklearn.decomposition import IncrementalPCA

In [16]:
n_batches = 5
inc_pca = IncrementalPCA(n_components=2)
for X_batch in np.array_split(X, n_batches):
    inc_pca.partial_fit(X_batch)
    
X_reduced = inc_pca.transform(X)

## Kernel PCA

In [17]:
from sklearn.decomposition import KernelPCA

In [18]:
rbf_pca = KernelPCA(n_components=2,
                    kernel='rbf',
                    gamma=0.04)
X_reduced = rbf_pca.fit_transform(X)

In [19]:
X_reduced.shape

(100, 2)

## LLE

In [20]:
from sklearn.manifold import LocallyLinearEmbedding

In [21]:
lle = LocallyLinearEmbedding(n_components=2,
                             n_neighbors=10)
X_reduced = lle.fit_transform(X)

In [22]:
X_reduced.shape

(100, 2)