# SVD

[Relationship between PCA and SVD](https://stats.stackexchange.com/a/134283)

$\large \mathbf{X} = {\begin{pmatrix} x_{1,1}&\cdots&x_{1,p} \\ \vdots & \ddots &\vdots \\ x_{n,1}&\cdots&x_{n,p} \end{pmatrix} }_{n\times p} = {\begin{pmatrix} X_1 & \cdots&X_p \end{pmatrix} }_{n\times p}$ where $n$ is the number of samples and $p$ is the number of features.
<br/><br/>
$\large \mathrm{Cov}(X) = {\begin{pmatrix} \mathrm{cov}[X_1, X_1] & \cdots & \mathrm{cov}[X_1, X_p] \\ \vdots & \ddots &\vdots \\ \mathrm{cov}[X_p, X_1] & \cdots & \mathrm{cov}[X_p, X_p]\end{pmatrix}}_{p\times p} = {\begin{pmatrix} \mathrm{var}[X_1] & \cdots & \mathrm{cov}[X_1, X_p] \\ \vdots & \ddots &\vdots \\ \mathrm{cov}[X_p, X_1] & \cdots & \mathrm{var}[X_p]\end{pmatrix}}_{p\times p}$

In [218]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

In [2]:
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity

$\Large \bullet \textbf{ Gram-Schmidt process}$

$\large \displaystyle \mathrm{V} = \begin{pmatrix} \mathbf{v}_1 &\mathbf{v}_2 & \cdots & \mathbf{v}_p \end{pmatrix}$
, where  $\mathbf{v}_i$ for $i \in \{1,2,\dots,p\}$ are column vectors.

$\large \begin{align}  \mathbf{u}_1 &= \mathbf{v}_1 \\ \mathbf{u}_2 &= \mathbf{v}_2 - \frac{\left\langle \mathbf{u}_1, \mathbf{v}_2 \right\rangle}{\left\langle \mathbf{u}_1, \mathbf{u}_1 \right\rangle} \mathbf{u}_1\\ \mathbf{u}_3 &= \mathbf{v}_3- \frac{\left\langle \mathbf{u}_2, \mathbf{v}_3 \right\rangle}{\left\langle \mathbf{u}_2, \mathbf{u}_2\right\rangle} \mathbf{u}_2 - \frac{\left\langle \mathbf{u}_1, \mathbf{v}_3 \right\rangle}{\left\langle \mathbf{u}_1, \mathbf{u}_1\right\rangle} \mathbf{u}_1\\ &\ \vdots \nonumber \\\mathbf{u}_p &= \mathbf{v}_p - \sum^{p-1}_{i=1} \frac{\langle \mathbf{u}_i, \mathbf{v}_p \rangle}{\left\langle \mathbf{u}_i, \mathbf{u}_i\right\rangle} \mathbf{u}_i\\
\end{align} $

The orthogonal column vectors matrix is

$\large \therefore \mathrm{U} = \begin{pmatrix} \mathbf{u}_1 & \mathbf{u}_2 &\cdots & \mathbf{u}_p \end{pmatrix}$

In [162]:
class gram_schmidt:
    '''def __init__(self, X):
        self.X = X.astype(float)'''
        
    def _proj(self, u, v):
        return (np.vdot(v, u) / np.vdot(u,u))*u
    
    def fit_transform(self, X, col_vec = True, normal = True):
        #X = X.astype(float)
        if col_vec:
            mat = X.copy()
        else:
            mat = (X.T).copy()
        
        N = mat.shape[1]
        mat_orth = np.array([]).reshape(mat.shape[0], -1)
        for n in range(N):
            u = mat[:, n:n+1].copy()
            if n ==0:
                mat_orth = np.hstack((mat_orth,u))
            else:
                for i in range(n):
                    u -= self._proj(mat_orth[:, i:i+1], mat[:, n:n+1])
                mat_orth = np.hstack((mat_orth,u))
        
        if normal:
            result = mat_orth / np.linalg.norm(mat_orth, axis=0)
            if col_vec:
                return result
            else:
                return result.T
        else:
            if col_vec:
                return mat_orth
            else:
                return mat_orth.T

In [263]:
'''def svd(data, reduced='thin'):
    dim = data.shape[1]
    rank = np.linalg.matrix_rank(data)
    relu = np.vectorize(lambda x: np.real(x) if np.real(x)>=0 else .0)
    
    eval_u, evec_u = np.linalg.eig(data.dot(data.T), )
    eval_v, evec_v = np.linalg.eig(data.T.dot(data), )
    
    gs = gram_schmidt()
    evec_u_gs = gs.fit_transform(evec_u)
    evec_v_gs = gs.fit_transform(evec_v)
    print(np.linalg.det(evec_u))
    print(np.linalg.det(evec_u_gs))
    
    
    s = eval_v.copy()
    s = np.sqrt(relu(s))
    s1 = np.sort(s)[::-1]
    if dim > rank:
        s1[-(dim-rank):] = 0
    
    u_idx = np.sqrt(relu(np.real(eval_u))).argsort()[-dim:][::-1]
    if reduced=='thin':
        u = evec_u_gs[:, u_idx ]
        S = np.eye(dim)
        S *= s1     
    else:        
        u = np.hstack((evec_u_gs[:, u_idx ], np.delete(evec_u_gs, u_idx, axis=1)))
        S = np.zeros(data.shape)
        np.fill_diagonal(S, s1)
    
    v_idx = np.sqrt(relu(np.real(eval_v))).argsort()[-dim:][::-1]
    v = evec_v_gs[:, v_idx ]
    
    return u, S, s, v'''

In [283]:
def svd_thin(data, ):
    dim = data.shape[1]
    rank = np.linalg.matrix_rank(data)
    relu = np.vectorize(lambda x: np.real(x) if np.real(x)>=0 else .0)
    
    #eval_u, evec_u = np.linalg.eig(data.dot(data.T), )
    eval_v, evec_v = np.linalg.eig(data.T.dot(data), )
    
    gs = gram_schmidt()
    #evec_u_gs = gs.fit_transform(evec_u)
    evec_v_gs = gs.fit_transform(evec_v)
        
    s = eval_v.copy()
    s = np.sqrt(relu(s))
    s1 = np.sort(s)[::-1]
    if dim > rank:
        s1[-(dim-rank):] = 0
    
    S = np.eye(dim)*s1
    
    v_idx = np.sqrt(relu(np.real(eval_v))).argsort()[-dim:][::-1]
    v = evec_v_gs[:, v_idx ]
    
    u = data.dot(v)/s1
    
    return u, S,  v

# Codes

In [295]:
data = make_classification(n_features = 6, n_redundant=0,
                           n_samples=10**3, weights=[0.9], random_state= 42, )

In [296]:
data_x = data[0]
data_y = data[1]

$\large \displaystyle \mathbf{X}_{\normalsize standardized} = \begin{pmatrix} \frac{X_1 -  \mu_1}{\sigma_1} & \frac{X_2 -  \mu_2}{\sigma_2} & \cdots &\frac{X_p -  \mu_p}{\sigma_p}  \end{pmatrix} $
, where $\mu_i$ and $\sigma_i$ are mean and standard deviation of the vector $X_i$ for $i \in \{1,2,\cdots,p\}$.

Therefore, the means of each column of matrix $\mathbf{X}_{\normalsize standardized}$ are all 0 and standard deviations are all 1.

In [297]:
scaler = StandardScaler()
data_x_std = scaler.fit_transform(data_x)

$\large X_{n\times p} = U\; \Sigma\; V $

$U_{n\times n} =$ orthonormal eigenvectors of $XX^* $

$\Sigma_{n\times p} =$  the square roots of the non-negative eigenvalues of both $X X^*$ and $X^* X $

${V^*}_{p\times p} =$ orthonormal eigenvectors of $X^*X$

## numpy

In [307]:
u, s, vh = np.linalg.svd(data_x_std, full_matrices=True, )

$U$ and $V$ are all unitary matrices,

i.e. $U U^* = V V^* = I$

In [308]:
np.allclose(u.dot(u.conjugate().T), np.eye(u.shape[0]))

True

In [309]:
np.allclose(vh.dot(vh.conjugate().T), np.eye(vh.shape[0]))

True

In [310]:
S = np.zeros(data_x_std.shape, float)
np.fill_diagonal(S, s)

In [311]:
np.allclose(np.dot(u, np.dot(S, vh)), data_x_std )

True

$\large \therefore X \simeq U \Sigma V$

## mine: reduced(thin) SVD

In [312]:
uu, ss, vv = svd_thin(data_x_std)

In [313]:
np.allclose(vh.dot(vh.conjugate().T), np.eye(vh.shape[0]))

True

In [316]:
np.allclose(np.dot(uu, np.dot(ss, vv.T)), data_x_std )

True