## PCA

### From scratch using Numpy

In [132]:
import numpy as np

In [155]:
# Randomly generate 100 points with dim of 10
X = np.random.random([100, 10])

# Center the matrix and perform singular value decomposition
X_centered = X - X.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)

$$
X_{d-proj} = XW_d
$$

In [156]:
# Project on the first two principal components
W2 = Vt.T[:, :2]
X2D_1 = X_centered.dot(W2)

In [157]:
X2D_1.shape

(100, 2)

### Using Scikit-Learn

In [136]:
from sklearn.decomposition import PCA

In [137]:
pca = PCA(n_components=2)
X2D_2 = pca.fit_transform(X)

In [138]:
X2D_2.shape

(100, 2)

In [139]:
# The results of using both methods are the same (the sign does not matter)
np.allclose(abs(X2D_1), abs(X2D_2))

True

In [140]:
# The first 2 PCs can be viewed using the .components_ method
pca.components_

array([[-0.26602597,  0.36252585, -0.18502824,  0.05458974,  0.38495564,
        -0.05947472,  0.34137184, -0.59372708,  0.2010397 ,  0.3152746 ],
       [ 0.22322314,  0.18081269,  0.33062011, -0.64263943,  0.00448331,
         0.06822088, -0.41331581, -0.23273967,  0.4043883 ,  0.04451176]])

In [142]:
# The variance ratio can be shown using .explained_variance_ratio_ method
pca.explained_variance_ratio_

array([0.1709725 , 0.12735772])

In [130]:
X2D_2[-5:,:]

array([[ 0.57232262,  0.5296775 ,  0.61235958, -0.07694953],
       [ 0.3085293 , -0.32596753,  0.2078087 ,  0.16875202],
       [-0.1104531 , -0.10381207,  0.48917273, -0.12500469],
       [-0.63597548,  0.27113837,  0.36044951, -0.52671295],
       [ 0.36507859,  0.30441173,  0.89981291,  0.16045539]])

#### Inverse PCA

In [145]:
# PCA can be seen as a compression, so it can go both ways (may lose information)
pca = PCA(n_components=9)
X_reduced = pca.fit_transform(X)
X_recovered = pca.inverse_transform(X_reduced)

In [147]:
X_recovered.shape

(100, 10)

In [148]:
W2.shape

(10, 2)

#### Incremental PCA

In [149]:
from sklearn.decomposition import IncrementalPCA

In [158]:
n_batches = 5
inc_pca = IncrementalPCA(n_components=2)
for X_batch in np.array_split(X, n_batches):
    inc_pca.partial_fit(X_batch)
    
X_reduced = inc_pca.transform(X)

## Kernel PCA

In [161]:
from sklearn.decomposition import KernelPCA

In [162]:
rbf_pca = KernelPCA(n_components=2,
                    kernel='rbf',
                    gamma=0.04)
X_reduced = rbf_pca.fit_transform(X)

In [163]:
X_reduced.shape

(100, 2)

In [194]:
l = [1, 2, 3, 3, 3, 5]

In [195]:
import bisect

In [197]:
bisect.insort(l, 3)

In [198]:
l

[1, 2, 3, 3, 3, 3, 5]

In [186]:
bisect.bisect_right(l, 3)

5