In [1]:
import numpy as np

In [2]:
class SVD_PCA:

    def __init__(self, n_components):
        self._mean = None
        self._var = None
        self._cov = None
        self.n_components = n_components

    def zero_centering(self):

        # Calculate Sample means along columns
        self._mean = np.mean(self.X, axis = 0, dtype=np.float64)
        # Calculate sample variance
        self._var = np.var(self.X, axis = 0, dtype=np.float64)
        # Return normalized data
        self.X = self.X-self._mean#/np.sqrt(self._var)

    def covariance(self):

        n_sample = self.X.shape[0]
        S = 1/n_sample * np.dot(self.X.T , self.X)

        return S

    def score(self, X):
        
        n_sample = X.shape[0]
        mean = np.mean(X, axis = 0, dtype=np.float64)
        
        X_0 = X - mean
        
        cov = 1/n_sample * np.dot(X_0.T , X_0)
        U, s, V = np.linalg.svd(cov)
        idx = np.argsort(s)[::-1]
        s = s[idx]
        
        scores = {}
        
        for d, i in zip(idx,s):
            scores[d] = "{:.0%}".format(i/sum(s))
            
        return scores
    
    def fit(self, X, y=None):

        self.X = X

        n_sample, n_feature = self.X.shape

        self.zero_centering()
        
        cov = 1/n_sample * np.dot(self.X.T,self.X)
        
        U, s, V = np.linalg.svd(cov)
        
        # Find the idx of inverse order singular values
        idx = np.argsort(s)[::-1]
        
        # Reorder singular values from high to low
        s = s[idx]
        
        # Reorder V 
        V = V[idx]
        
        # Take first n_components eigenvectors
        self.components = V[:self.n_components]


    def transform(self, X, y = None):

        X = X - self._mean
    #         X = X/self._var
        return np.dot(X, self.components.T)


import test data

In [4]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
boston = datasets.load_boston()
iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)

Calculate the score of each feature

In [5]:
pca=SVD_PCA(n_components=2)
pca.fit(X_train)
pca.score(X_train)

{0: '92%', 1: '5%', 2: '2%', 3: '1%'}

We can keep two features without lossing a lot info

import test data, and compare the result with sklearn

In [6]:
pca=SVD_PCA(n_components=2)
pca.fit(X_train)

In [7]:
from sklearn.decomposition import PCA
sk_pca = PCA(n_components=2)
sk_pca.fit(X_train)

PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

Compare covariance matrix

In [8]:
pca.covariance()

array([[ 0.690875, -0.021465,  1.24206 ,  0.489915],
       [-0.021465,  0.174779, -0.266976, -0.095609],
       [ 1.24206 , -0.266976,  2.984144,  1.230796],
       [ 0.489915, -0.095609,  1.230796,  0.552739]])

In [9]:
sk_pca.get_covariance()

array([[ 0.69208576, -0.01410431,  1.25037794,  0.51255054],
       [-0.01410431,  0.16684255, -0.26574396, -0.11583759],
       [ 1.25037794, -0.26574396,  3.02162976,  1.23067474],
       [ 0.51255054, -0.11583759,  1.23067474,  0.56644899]])

Compare components

In [10]:
pca.components

array([[-0.36873621,  0.06977632, -0.85630117, -0.35484246],
       [-0.68187006, -0.69682861,  0.1867726 ,  0.12082672]])

In [11]:
sk_pca.components_

array([[ 0.36873621, -0.06977632,  0.85630117,  0.35484246],
       [ 0.68187006,  0.69682861, -0.1867726 , -0.12082672]])

Compare transform new data

In [12]:
pca.transform(X_test)[:10]

array([[-0.87004085,  0.11550914],
       [ 2.23549168, -0.9776333 ],
       [-3.74816334, -0.29230812],
       [-0.7613821 ,  0.11290678],
       [-1.28475481, -0.3189573 ],
       [ 2.45397801, -0.51961269],
       [ 0.22775193,  0.19339412],
       [-1.8769441 , -0.43141706],
       [-0.88397277,  0.4643128 ],
       [-0.08135668,  0.24033494]])

In [13]:
sk_pca.transform(X_test)[:10]

array([[ 0.87004085, -0.11550914],
       [-2.23549168,  0.9776333 ],
       [ 3.74816334,  0.29230812],
       [ 0.7613821 , -0.11290678],
       [ 1.28475481,  0.3189573 ],
       [-2.45397801,  0.51961269],
       [-0.22775193, -0.19339412],
       [ 1.8769441 ,  0.43141706],
       [ 0.88397277, -0.4643128 ],
       [ 0.08135668, -0.24033494]])