### 1. use numpy to realize the algorithm of PCA

In [7]:
import numpy as np

##### 1.1 rough version

In [17]:
class PCA():
    def __init__(self,n_components:float):
        self.n_components = n_components
    def fit_transform(self,X):
        # get covariance matrix
        X = X - X.mean(axis=0)
        self.covariance = np.dot(X.T,X)/X.shape[0]
        # get the eig vals & vector of covariance matrix
        eig_vals,eig_vectors = np.linalg.eig(self.covariance)
        # get important eig vals 
        idx = np.argsort(-eig_vals)
        self.components_ = eig_vectors[:,idx[:self.n_components]] # According idx, get vector
        return np.dot(X,self.components_)

In [18]:
pca = PCA(n_components=2)
X = np.array([[-1,2,66,-1], 
              [-2,6,58,-1], 
              [-3,8,45,-2], 
              [1, 9,36, 1], 
              [2,10,62, 1], 
              [3,5, 83, 2]])
newX=pca.fit_transform(X)
newX

array([[  7.96504337,  -4.12166867],
       [ -0.43650137,  -2.07052079],
       [-13.63653266,  -1.86686164],
       [-22.28361821,   2.32219188],
       [  3.47849303,   3.95193502],
       [ 24.91311585,   1.78492421]])

##### 1.2 detail version

Omit temporarily

### 2. use sklearn to realize the algorithm of PCA

In [19]:
import numpy as np
from sklearn.decomposition import PCA

In [21]:
X = np.array([[-1,2,66,-1], 
              [-2,6,58,-1], 
              [-3,8,45,-2],
              [1,9,36,1], 
              [2,10,62,1], 
              [3,5,83,2]])

In [22]:
pca = PCA(n_components=2)
pca.fit(X) 
newX=pca.fit_transform(X)

In [23]:
print(pca.explained_variance_ratio_)
print(newX)

[0.95713353 0.03398198]
[[  7.96504337   4.12166867]
 [ -0.43650137   2.07052079]
 [-13.63653266   1.86686164]
 [-22.28361821  -2.32219188]
 [  3.47849303  -3.95193502]
 [ 24.91311585  -1.78492421]]
