# Principal Component Analysis

In [10]:
import pandas as pd
import numpy as np

from numpy.polynomial import Polynomial

from sklearn.datasets import load_iris

In [6]:
iris_data = load_iris()
df = pd.DataFrame(iris_data.data, columns = iris_data.feature_names)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [7]:
df.corr()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),1.0,-0.11757,0.871754,0.817941
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126
petal length (cm),0.871754,-0.42844,1.0,0.962865
petal width (cm),0.817941,-0.366126,0.962865,1.0


In [8]:
df.cov()   # Spread + Direction

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),0.685694,-0.042434,1.274315,0.516271
sepal width (cm),-0.042434,0.189979,-0.329656,-0.121639
petal length (cm),1.274315,-0.329656,3.116278,1.295609
petal width (cm),0.516271,-0.121639,1.295609,0.581006


# Eigen Decomposition

# Roots

In [11]:
# X^2 - 5X - 6 = 0

p1 = Polynomial([-6,-5,1])
p1.roots()

array([-1.,  6.])

In [13]:
cov_matrix = df.cov()
cov_matrix

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),0.685694,-0.042434,1.274315,0.516271
sepal width (cm),-0.042434,0.189979,-0.329656,-0.121639
petal length (cm),1.274315,-0.329656,3.116278,1.295609
petal width (cm),0.516271,-0.121639,1.295609,0.581006


In [14]:
eigvalues = np.linalg.eigvals(cov_matrix)
eigvalues

array([4.22824171, 0.24267075, 0.0782095 , 0.02383509])

In [15]:
eigvalues, eigvectors = np.linalg.eig(cov_matrix)

In [16]:
eigvalues

array([4.22824171, 0.24267075, 0.0782095 , 0.02383509])

In [17]:
eigvectors

array([[ 0.36138659, -0.65658877, -0.58202985,  0.31548719],
       [-0.08452251, -0.73016143,  0.59791083, -0.3197231 ],
       [ 0.85667061,  0.17337266,  0.07623608, -0.47983899],
       [ 0.3582892 ,  0.07548102,  0.54583143,  0.75365743]])

In [19]:
eigvectors[0]  # 1 * 4 >> lambda = 4.22

array([ 0.36138659, -0.65658877, -0.58202985,  0.31548719])

In [20]:
eigvectors[1]  # 1 * 4 >> lambda = 0.24

array([-0.08452251, -0.73016143,  0.59791083, -0.3197231 ])

In [21]:
eigvectors[2]  # 1 * 4 >> lambda = 0.07

array([ 0.85667061,  0.17337266,  0.07623608, -0.47983899])

In [22]:
eigvectors[3]  # 1 * 4 >> lambda = 0.02

array([0.3582892 , 0.07548102, 0.54583143, 0.75365743])

In [23]:
eigvectors.shape

(4, 4)