### Import the required libraries

In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
M = np.array([
    [1.0,2,3,4],
    [5,5,6,7],
    [1,4,2,3],
    [5,3,2,1],
    [8,1,2,2]
], dtype=int)

M

array([[1, 2, 3, 4],
       [5, 5, 6, 7],
       [1, 4, 2, 3],
       [5, 3, 2, 1],
       [8, 1, 2, 2]])

### Step 1: Normalizing and Standardizing the dataset

In [3]:
ss = StandardScaler()

M_standard = ss.fit_transform(M)
M_standard

array([[-1.11803399, -0.70710678,  0.        ,  0.29138576],
       [ 0.372678  ,  1.41421356,  1.93649167,  1.74831455],
       [-1.11803399,  0.70710678, -0.64549722, -0.19425717],
       [ 0.372678  ,  0.        , -0.64549722, -1.16554303],
       [ 1.49071198, -1.41421356, -0.64549722, -0.6799001 ]])

### Step 2: Calculate the covariance matrix for the features in the dataset.

In [4]:
cov_matrix = np.cov(M_standard, rowvar=False)
cov_matrix

array([[ 1.25      , -0.39528471,  0.06014065, -0.22623554],
       [-0.39528471,  1.25      ,  0.79876206,  0.77265317],
       [ 0.06014065,  0.79876206,  1.25      ,  1.17555437],
       [-0.22623554,  0.77265317,  1.17555437,  1.25      ]])

### Step 3: Calculate the eigenvalues and eigenvectors for the covariance matrix.

In [5]:
eigenvalues, eigenvectors  = np.linalg.eig(cov_matrix)
eigenvalues

array([3.14474155, 1.33161063, 0.4923588 , 0.03128901])

### Step 4: sort  eigenvalues and eigenvectors

In [14]:
sorted_eigenvalues = eigenvalues.argsort()[::-1]
sorted_eigenvalues

array([0, 1, 2, 3], dtype=int64)

In [15]:
sorted_eigenvectors = eigenvectors[sorted_eigenvalues]
sorted_eigenvectors

array([[ 0.16195986, -0.91705888, -0.30707099,  0.19616173],
       [-0.52404813,  0.20692161, -0.81731886,  0.12061043],
       [-0.58589647, -0.3205394 ,  0.1882497 , -0.72009851],
       [-0.59654663, -0.11593512,  0.44973251,  0.65454704]])

### Step 5: Transform the original matrix.

In [16]:
final_matrix = M_standard @ sorted_eigenvectors
print(final_matrix)

[[ 1.56561741e-02  8.45205482e-01  1.05229316e+00 -1.13874250e-01]
 [-2.85829190e+00 -8.72549250e-01 -1.19484137e-01 -6.43658054e-03]
 [-5.75566043e-02  1.40104719e+00 -4.43494340e-01  2.03640099e-01]
 [ 1.13385419e+00  2.66995807e-04 -7.60135860e-01 -2.24975997e-01]
 [ 1.76633814e+00 -1.37397042e+00  2.70821178e-01  1.41646728e-01]]


### Test PCA

In [9]:
from sklearn.decomposition import PCA
pca = PCA()
print(pca.fit_transform(M_standard))

[[-1.56561741e-02  8.45205482e-01  1.05229316e+00  1.13874250e-01]
 [ 2.85829190e+00 -8.72549250e-01 -1.19484137e-01  6.43658054e-03]
 [ 5.75566043e-02  1.40104719e+00 -4.43494340e-01 -2.03640099e-01]
 [-1.13385419e+00  2.66995807e-04 -7.60135860e-01  2.24975997e-01]
 [-1.76633814e+00 -1.37397042e+00  2.70821178e-01 -1.41646728e-01]]


- we can see that the 4 steps get the same output as PCA ready model do <br>
except the opposite sign

### Let get all in one function

In [17]:
def my_PCA(matrix):
    
    #1st step
    ss = StandardScaler()
    M_standard = ss.fit_transform(matrix)
    
    #2nd step
    cov_matrix = np.cov(M_standard, rowvar=False)
    
    #3rd step
    eigenvalues, eigenvectors  = np.linalg.eig(cov_matrix)

    #4th step
    sorted_eigenvalues = eigenvalues.argsort()[::-1]
    sorted_eigenvectors = eigenvectors[sorted_eigenvalues]

    #5t step
    final_matrix = M_standard @ sorted_eigenvectors
    
    return final_matrix

In [18]:
print(my_PCA(M))

[[ 1.56561741e-02  8.45205482e-01  1.05229316e+00 -1.13874250e-01]
 [-2.85829190e+00 -8.72549250e-01 -1.19484137e-01 -6.43658054e-03]
 [-5.75566043e-02  1.40104719e+00 -4.43494340e-01  2.03640099e-01]
 [ 1.13385419e+00  2.66995807e-04 -7.60135860e-01 -2.24975997e-01]
 [ 1.76633814e+00 -1.37397042e+00  2.70821178e-01  1.41646728e-01]]
