# SVD

[Relationship between PCA and SVD](https://stats.stackexchange.com/a/134283)

$\large \mathbf{X} = {\begin{pmatrix} x_{1,1}&\cdots&x_{1,p} \\ \vdots & \ddots &\vdots \\ x_{n,1}&\cdots&x_{n,p} \end{pmatrix} }_{n\times p} = {\begin{pmatrix} X_1 & \cdots&X_p \end{pmatrix} }_{n\times p}$ where $n$ is the number of samples and $p$ is the number of features.
<br/><br/>
$\large \mathrm{Cov}(X) = {\begin{pmatrix} \mathrm{cov}[X_1, X_1] & \cdots & \mathrm{cov}[X_1, X_p] \\ \vdots & \ddots &\vdots \\ \mathrm{cov}[X_p, X_1] & \cdots & \mathrm{cov}[X_p, X_p]\end{pmatrix}}_{p\times p} = {\begin{pmatrix} \mathrm{var}[X_1] & \cdots & \mathrm{cov}[X_1, X_p] \\ \vdots & \ddots &\vdots \\ \mathrm{cov}[X_p, X_1] & \cdots & \mathrm{var}[X_p]\end{pmatrix}}_{p\times p}$

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity

$\Large \bullet \textbf{ Gram-Schmidt process}\\
\large \displaystyle {\mathbf{V} = \begin{pmatrix} \mathbf{v}_1 &\mathbf{v}_2 & \cdots & \mathbf{v}_p \end{pmatrix} \normalsize{\text{ where  $\mathbf{v}_i\ \text{for}\ i \in \{1,2,\dots,p\}$ are column vectors} } \\
\begin{align}  \mathbf{u}_1 &= \mathbf{v}_1 \\ \mathbf{u}_2 &= \mathbf{v}_2 - \frac{\left\langle \mathbf{u}_1, \mathbf{v}_2 \right\rangle}{\left\langle \mathbf{u}_1, \mathbf{u}_1 \right\rangle} \mathbf{u}_1\\ \mathbf{u}_3 &= \mathbf{v}_3- \frac{\left\langle \mathbf{u}_2, \mathbf{v}_3 \right\rangle}{\left\langle \mathbf{u}_2, \mathbf{u}_2\right\rangle} \mathbf{u}_2 - \frac{\left\langle \mathbf{u}_1, \mathbf{v}_3 \right\rangle}{\left\langle \mathbf{u}_1, \mathbf{u}_1\right\rangle} \mathbf{u}_1\\ &\ \vdots \\\mathbf{u}_p &= \mathbf{v}_p - \sum^{p-1}_{i=1} \frac{\langle \mathbf{u}_i, \mathbf{v}_p \rangle}{\left\langle \mathbf{u}_i, \mathbf{u}_i\right\rangle} \mathbf{u}_i\\
\end{align} \\
\therefore \mathbf{U} = \begin{pmatrix} \mathbf{u}_1 & \mathbf{u}_2 &\cdots & \mathbf{u}_p \end{pmatrix}}$

In [81]:
class gram_schmidt:
    '''def __init__(self, X):
        self.X = X.astype(float)'''
        
    def _proj(self, u, v):
        return ((u.T).dot(v) / (u.T).dot(u))*u
    
    def orthonormal(self, X, col_vec = True, normal = True):
        #X = X.astype(float)
        if col_vec:
            mat = X.copy()
        else:
            mat = (X.T).copy()
        
        N = mat.shape[1]
        mat_orth = np.array([]).reshape(mat.shape[0], -1)
        for n in range(N):
            u = mat[:, n:n+1].copy()
            if n ==0:
                mat_orth = np.hstack((mat_orth,u))
            else:
                for i in range(n):
                    u -= self._proj(mat_orth[:, i:i+1], mat[:, n:n+1])
                mat_orth = np.hstack((mat_orth,u))
        
        if normal:
            result = mat_orth / np.linalg.norm(mat_orth, axis=0)
            if col_vec:
                return result
            else:
                return result.T
        else:
            if col_vec:
                return mat_orth
            else:
                return mat_orth.T

# Codes

In [3]:
data = make_classification(#n_features=4, 
                           n_samples=10**3, weights=[0.9], random_state= 42, )
#data = make_classification(n_features=2, n_informative=2,n_redundant=0,n_samples=1000, n_clusters_per_class=2, weights=[0.9], random_state= 42, )

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], test_size = .2, random_state=42,)

$\large \mathbf{X}_{\normalsize scaled} = \begin{pmatrix} \frac{X_1 -  \mu_1}{\sigma_1} & \frac{X_2 -  \mu_2}{\sigma_2} & \cdots &\frac{X_p -  \mu_p}{\sigma_p}  \end{pmatrix} \normalsize \text{, where $\mu_i$ and $\sigma_i$ are means and standard deviations of vector $X_i$ for $i \in \{1,2,\cdots,p\}$.}\\
\text{Therefore, the means of each column of matrix $\mathbf{X}_{\normalsize scaled}$ are all 0 and standard deviations are all 1.} $

In [5]:
pca = PCA(n_components=2, random_state=42, svd_solver='auto')

scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train)
xpca = pca.fit_transform(X_train_sc)

$
\large {X = U\; \Sigma\; V \\
U = \normalsize{\text{orthonormal eigenvectors of }} M M^* \\
S = \normalsize{\text{eigenvalues of both of}}  M M^* and M^* M \\
V^* = \normalsize{\text{orthonormal eigenvectors of }} M^* M
}$

In [6]:
u, s, v = np.linalg.svd(X_train_sc, full_matrices=True, )

In [14]:
S = np.zeros(X_train_sc.shape, float)
np.fill_diagonal(S, s)

In [17]:
np.allclose(np.dot(u, np.dot(S, v)), X_train_sc )

True

In [145]:
eval_u, evec_u = np.linalg.eig(X_train_sc.dot(X_train_sc.T))
eval_v, evec_v = np.linalg.eig(X_train_sc.T.dot(X_train_sc))

In [161]:
np.sqrt(abs(np.real(eval_v))).argsort()

array([19, 18,  4,  5,  6,  7,  8, 10, 11, 12, 13, 17, 16, 15, 14,  9,  3,
        2,  1,  0], dtype=int64)

In [170]:
v.T[0,:]

array([-2.29222837e-02,  5.05722912e-01, -6.56554623e-02,  1.11001843e-01,
       -2.38002695e-01,  8.32706762e-02,  1.72357613e-01,  8.64398786e-02,
       -1.29605480e-02,  4.39800060e-01, -3.06539027e-01,  3.62347473e-02,
       -2.50551049e-03, -1.59530740e-02, -2.07104101e-01,  2.14519904e-01,
        4.72252971e-01, -1.58942830e-01,  5.97211875e-17, -3.02034327e-17])

In [172]:
np.real(evec_v[:,np.sqrt(abs(np.real(eval_v))).argsort()[::-1]])[0,:]

array([-2.29222837e-02, -5.05722912e-01,  6.56554623e-02,  1.11001843e-01,
       -2.38002695e-01, -8.32706762e-02, -1.72357613e-01, -8.64398786e-02,
        1.29605480e-02,  4.39800060e-01,  3.06539027e-01,  3.62347473e-02,
       -2.50551049e-03,  1.59530740e-02, -2.07104101e-01,  2.14519904e-01,
        4.72252971e-01,  1.58942830e-01, -3.21000690e-17, -3.21000690e-17])

In [129]:
gs = gram_schmidt()
ev_orth = np.real(gs.orthonormal(ev, ))

In [132]:
v.T[0,:]

array([-2.29222837e-02,  5.05722912e-01, -6.56554623e-02,  1.11001843e-01,
       -2.38002695e-01,  8.32706762e-02,  1.72357613e-01,  8.64398786e-02,
       -1.29605480e-02,  4.39800060e-01, -3.06539027e-01,  3.62347473e-02,
       -2.50551049e-03, -1.59530740e-02, -2.07104101e-01,  2.14519904e-01,
        4.72252971e-01, -1.58942830e-01,  5.97211875e-17, -3.02034327e-17])

In [133]:
ev_orth[0,:]

array([-2.29222837e-02, -5.05722912e-01,  6.56554623e-02,  1.11001843e-01,
        1.58942830e-01,  4.72252971e-01,  2.14519904e-01, -2.07104101e-01,
        1.59530740e-02, -2.38002695e-01, -2.50551049e-03,  3.62347473e-02,
        3.06539027e-01,  4.39800060e-01, -8.32706762e-02, -1.72357613e-01,
       -8.64398786e-02,  1.29605480e-02,  1.56755213e-17, -8.81978737e-18])

In [124]:
np.linalg.norm(ev_orth[:,1])

1.0000000000000002

In [123]:
np.linalg.norm(v.T[:,1])

1.0000000000000002

In [96]:
np.isclose(v[:,:3],np.real(gs.orthonormal(ev, )[:,:3]))

array([[ True, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False]])

In [97]:
v.T[:,:3] 

array([[-0.02292228,  0.50572291, -0.06565546],
       [ 0.55962697,  0.05425466, -0.04748907],
       [-0.0203438 , -0.01816242, -0.51729607],
       [ 0.00109865, -0.1577597 , -0.07062832],
       [-0.04118024, -0.25275819, -0.01802699],
       [-0.47676204,  0.15049368, -0.05232398],
       [ 0.0125351 ,  0.09318769,  0.45878404],
       [ 0.01736969, -0.20223319, -0.02511544],
       [ 0.03330077,  0.05702416, -0.17681541],
       [-0.00782674, -0.28581579,  0.09669366],
       [-0.01586192,  0.22138756, -0.18911348],
       [ 0.01047088, -0.24002708, -0.27383818],
       [ 0.02324671,  0.06861466, -0.03325629],
       [ 0.05600357,  0.1658956 ,  0.48019901],
       [-0.37526174, -0.21968459,  0.11830003],
       [ 0.00791182, -0.35257137, -0.07828296],
       [-0.02624688,  0.30492018,  0.00310865],
       [-0.01452093,  0.1160483 , -0.10764729],
       [ 0.55590289, -0.05176735,  0.00266235],
       [-0.02001138,  0.27290482, -0.30312492]])

In [100]:
ev_orth

array([[-0.02292228, -0.50572291,  0.06565546],
       [ 0.55962697, -0.05425466,  0.04748907],
       [-0.0203438 ,  0.01816242,  0.51729607],
       [ 0.00109865,  0.1577597 ,  0.07062832],
       [-0.04118024,  0.25275819,  0.01802699],
       [-0.47676204, -0.15049368,  0.05232398],
       [ 0.0125351 , -0.09318769, -0.45878404],
       [ 0.01736969,  0.20223319,  0.02511544],
       [ 0.03330077, -0.05702416,  0.17681541],
       [-0.00782674,  0.28581579, -0.09669366],
       [-0.01586192, -0.22138756,  0.18911348],
       [ 0.01047088,  0.24002708,  0.27383818],
       [ 0.02324671, -0.06861466,  0.03325629],
       [ 0.05600357, -0.1658956 , -0.48019901],
       [-0.37526174,  0.21968459, -0.11830003],
       [ 0.00791182,  0.35257137,  0.07828296],
       [-0.02624688, -0.30492018, -0.00310865],
       [-0.01452093, -0.1160483 ,  0.10764729],
       [ 0.55590289,  0.05176735, -0.00266235],
       [-0.02001138, -0.27290482,  0.30312492]])

In [99]:
gs = gram_schmidt()
ev_orth = np.real(gs.orthonormal(ev, )[:,:3])

In [60]:
u

array([[-3.58069699e-02,  3.01698041e-02, -3.92649977e-03, ...,
         2.50448298e-02, -3.82197662e-02, -3.66543863e-02],
       [-5.22878445e-03, -3.06603742e-02,  1.55813512e-02, ...,
        -2.54367666e-02, -4.40285614e-02, -1.38019509e-02],
       [ 4.67882864e-03, -7.81796912e-02, -1.31807645e-02, ...,
        -3.06775784e-02,  3.01705551e-02, -1.05746981e-02],
       ...,
       [-2.34054071e-02, -6.97085261e-02,  8.71660084e-03, ...,
         9.75102822e-01,  7.05642576e-04,  2.80920160e-03],
       [-1.42460634e-02, -7.98736725e-03, -2.16604159e-02, ...,
         1.85549046e-03,  9.82551168e-01,  6.20014067e-03],
       [-1.94011010e-02,  2.70296071e-02,  4.82344894e-03, ...,
         6.53096767e-04,  5.39052629e-03,  9.84550389e-01]])

In [18]:
cov= np.cov(X_train_sc - X_train_sc.mean(axis=0), rowvar=False)
eigenvalues, eigenvectors = np.linalg.eig(cov)

In [109]:
u.dot(np.diag(s))

(800, 20)

In [104]:
xpca

array([[ 1.77774354, -0.58563607],
       [ 0.2612659 ,  0.59186165],
       [-0.23103433,  2.09704588],
       ...,
       [ 1.16470754,  2.06088412],
       [ 0.70823629,  0.47223255],
       [ 0.96628558, -1.09295898]])

In [101]:
(X_train_sc - X_train_sc.mean(axis=0)).dot(pca.components_.T)

array([[ 1.78141977, -0.94647922],
       [ 0.26013539,  0.96186926],
       [-0.23277473,  2.45263287],
       ...,
       [ 1.16443405,  2.18687769],
       [ 0.70875081,  0.2505776 ],
       [ 0.96521724, -0.84796578]])

In [79]:
np.allclose(xpca, X_train_sc.dot(pca.components_.T))

False

In [75]:
X_train_sc.var()

0.9999999999999993

In [71]:
pca.explained_variance_

array([3.0668676 , 0.93813866])

In [74]:
(X_train_sc - X_train_sc.mean(axis=0)).dot(pca.components_.T).var(axis=0, ddof=1)

array([3.0668676 , 0.93813866])

In [19]:
np.allclose(v[0], eigenvectors.T[0])

False

In [20]:
eigenvectors[0,1]

-0.5057229118627715

In [21]:
np.isclose(abs(v), abs(eigenvectors.T))

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [False, False, False, False],
       [False, False, False, False]])

In [20]:
np.allclose(v.T.dot(v), np.eye(v.shape[0]))

True

In [22]:
np.isclose(eigenvectors.T.dot(eigenvectors), np.eye(eigenvectors.shape[0]))

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True, False],
       [ True,  True, False,  True]])

In [26]:
X_train_sc.shape

(800, 4)

In [69]:
comp_idx = 0
cosine_similarity(v[comp_idx:comp_idx+1,:], eigenvectors[:,comp_idx].reshape(1,-1))

array([[-1.]])

In [85]:
((u.T).dot(u) - np.eye(800)).min()

-1.7763568394002505e-15

In [27]:
np.allclose((u.T).dot(u) , np.eye(u.shape[0]))

True

In [28]:
np.allclose((v.T).dot(v) , np.eye(v.shape[0]))

True

In [82]:
np.round((u.T).dot(u) - np.eye(800), 2)

array([[ 0., -0., -0., ...,  0.,  0.,  0.],
       [-0., -0., -0., ...,  0.,  0.,  0.],
       [-0., -0., -0., ...,  0.,  0.,  0.],
       ...,
       [ 0.,  0.,  0., ...,  0., -0.,  0.],
       [ 0.,  0.,  0., ..., -0.,  0., -0.],
       [ 0.,  0.,  0., ...,  0., -0., -0.]])

In [77]:
np.round((v.T).dot(v), 2)

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0., -0.],
       [ 0.,  0.,  1.,  0.],
       [ 0., -0.,  0.,  1.]])

In [75]:
eigenvectors.dot( np.matrix(eigenvectors).H)

matrix([[ 1.01230942,  0.06288618, -0.03379583, -0.08403019],
        [ 0.06288618,  1.14011871,  0.01014754, -0.09029393],
        [-0.03379583,  0.01014754,  0.90831919, -0.11137876],
        [-0.08403019, -0.09029393, -0.11137876,  0.93925268]])

In [78]:
np.round((eigenvectors.T).dot(eigenvectors),2)

array([[ 1.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  1.  , -0.  , -0.  ],
       [ 0.  , -0.  ,  1.  ,  0.22],
       [ 0.  , -0.  ,  0.22,  1.  ]])

In [36]:
np.square(s)/(X_train_sc.shape[0]-1)

array([3.06686760e+00, 9.38138656e-01, 1.54891299e-31, 4.74019816e-32])

In [34]:
np.square(s)/(X_train_sc.shape[0]-1)

array([3.06686760e+00, 9.38138656e-01, 1.54891299e-31, 4.74019816e-32])

In [173]:
cov= np.cov(X_train_sc - X_train_sc.mean(axis=0), rowvar=False)
eigenvalues, eigenvectors = np.linalg.eig(cov)

In [183]:
np.isclose(np.round(eigenvectors.T.dot(eigenvectors),2),np.eye(20))

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True],
       [ T

In [178]:
np.linalg.norm(pca.components_[1])

0.9999999999999997

In [8]:
eigenvalues, eigenvectors = np.linalg.eig(cov)

In [9]:
eigenvalues

array([3.06686760e+00, 9.38138656e-01, 9.63907292e-17, 2.74949812e-16])

In [65]:
pca.explained_variance_

array([3.0668676 , 0.93813866])

In [68]:
pca.explained_variance_ratio_

array([0.7657585, 0.2342415])

In [11]:
pca.components_

array([[ 0.37537327, -0.56343987,  0.47853489, -0.55913753],
       [-0.77887225,  0.17161566,  0.5645045 , -0.21269872]])

In [12]:
-(eigenvectors[:, :2].T)

array([[ 0.37537327, -0.56343987,  0.47853489, -0.55913753],
       [-0.77887225,  0.17161566,  0.5645045 , -0.21269872]])

In [13]:
xpca

array([[ 0.23675929,  0.79205581],
       [ 0.96430644, -1.33873515],
       [-2.64001914,  0.06398657],
       ...,
       [ 1.64837173, -0.88211564],
       [-2.08038828,  0.35941666],
       [ 0.01904595, -0.24080798]])

In [14]:
(X_train_sc - X_train_sc.mean(axis=0)).dot(pca.components_.T)

array([[ 0.23675929,  0.79205581],
       [ 0.96430644, -1.33873515],
       [-2.64001914,  0.06398657],
       ...,
       [ 1.64837173, -0.88211564],
       [-2.08038828,  0.35941666],
       [ 0.01904595, -0.24080798]])

1st and 2nd component vectors are orthonormal

In [15]:
np.round((eigenvectors[:, :2].T).dot(eigenvectors[:, :2]), 2)

array([[1., 0.],
       [0., 1.]])