In [2]:

from numpy import array
from numpy import mean
from numpy import cov
from numpy.linalg import eig

In [3]:
# define a matrix
A = array([[2.5, 2.4], [0.5, 0.7], [2.2, 2.9], [1.9, 2.2], [3.1, 3], [2.3, 2.7], [2, 1.6], [1, 1.1], [1.5, 1.6], [1.1, 0.9]])
print(A)

[[2.5 2.4]
 [0.5 0.7]
 [2.2 2.9]
 [1.9 2.2]
 [3.1 3. ]
 [2.3 2.7]
 [2.  1.6]
 [1.  1.1]
 [1.5 1.6]
 [1.1 0.9]]


In [5]:
# calculate the mean of each column
M = mean(A.T, axis=1)
print(M)

[1.81 1.91]


In [6]:
# center columns by subtracting column means
C = A - M
print(C)

[[ 0.69  0.49]
 [-1.31 -1.21]
 [ 0.39  0.99]
 [ 0.09  0.29]
 [ 1.29  1.09]
 [ 0.49  0.79]
 [ 0.19 -0.31]
 [-0.81 -0.81]
 [-0.31 -0.31]
 [-0.71 -1.01]]


### covariance matrix

![image.png](attachment:image.png)

In [24]:
C

array([[ 0.69,  0.49],
       [-1.31, -1.21],
       [ 0.39,  0.99],
       [ 0.09,  0.29],
       [ 1.29,  1.09],
       [ 0.49,  0.79],
       [ 0.19, -0.31],
       [-0.81, -0.81],
       [-0.31, -0.31],
       [-0.71, -1.01]])

In [25]:
C.T

array([[ 0.69, -1.31,  0.39,  0.09,  1.29,  0.49,  0.19, -0.81, -0.31,
        -0.71],
       [ 0.49, -1.21,  0.99,  0.29,  1.09,  0.79, -0.31, -0.81, -0.31,
        -1.01]])

In [7]:
# calculate covariance matrix of centered matrix
V = cov(C.T)
print(V)

[[0.61655556 0.61544444]
 [0.61544444 0.71655556]]


In [10]:
# eigen decomposition of covariance matrix
values, vectors = eig(V)
print(vectors)



[[-0.73517866 -0.6778734 ]
 [ 0.6778734  -0.73517866]]


In [11]:
#eigen values
print(values)

[0.0490834  1.28402771]


In [28]:
# project data  - EIGEN VECTOR  * Centered data
P = vectors.T.dot(C.T)
print(P.T)

[[-0.17511531 -0.82797019]
 [ 0.14285723  1.77758033]
 [ 0.38437499 -0.99219749]
 [ 0.13041721 -0.27421042]
 [-0.20949846 -1.67580142]
 [ 0.17528244 -0.9129491 ]
 [-0.3498247   0.09910944]
 [ 0.04641726  1.14457216]
 [ 0.01776463  0.43804614]
 [-0.16267529  1.22382056]]


In [None]:
#Projected data is calcuated manually, same way it can be done using inbuilt pca function which is done below

In [14]:
from sklearn.decomposition import PCA

# create the PCA instance
pca = PCA(2)

In [19]:
# fit on data
pca.fit(A)

PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [16]:
# access values and vectors
print(pca.components_)
print(pca.explained_variance_)

[[-0.18847868  0.47194324 -0.39741537 -0.11681567 -0.42332048 -0.31417157
   0.12960692  0.31703644  0.12133493  0.40028023]
 [-0.92361683  0.09008118  0.24452335  0.04756052 -0.05229439  0.03252983
  -0.10679134 -0.03030342 -0.04404841 -0.24229694]]
[5.99900000e+00 8.05038717e-32]


In [21]:
# transform data
B = pca.transform(A)
print(B)

[[-0.82797019 -0.17511531]
 [ 1.77758033  0.14285723]
 [-0.99219749  0.38437499]
 [-0.27421042  0.13041721]
 [-1.67580142 -0.20949846]
 [-0.9129491   0.17528244]
 [ 0.09910944 -0.3498247 ]
 [ 1.14457216  0.04641726]
 [ 0.43804614  0.01776463]
 [ 1.22382056 -0.16267529]]


In [23]:
#Eigen values
print(pca.explained_variance_) 

[1.28402771 0.0490834 ]
