In [0]:
import numpy as np

In [2]:
# Generate data input
np.random.seed(2019)
dim = 4
total_class = 2
total_sample = 100
data = []
reduce_dim = 2

for i in range(total_class):
  mean_vec = np.ones(dim)*i
  cov_mat = np.identity(dim)
  class_sample = np.random.multivariate_normal(mean_vec, cov_mat, total_sample).T
  data.append(class_sample)
data = np.asarray(data)

# Ignoring class label
data_stack = np.reshape(data, (dim, total_class*total_sample))

print(np.shape(data_stack))

(4, 200)


In [3]:
# Geting the mean vector
mean_vec = np.average(data_stack, axis=1)
print(mean_vec)

[-0.04871638  0.03771309  1.08386071  0.94384264]


In [4]:
# Computing the Scatter Matrix
scatter_mat = np.zeros((dim,dim))
for i in range(dim):
    scatter_mat += (data_stack[:,i].reshape(dim,1) - mean_vec).dot((data_stack[:,i].reshape(dim,1) - mean_vec).T)
print('Scatter Matrix:\n', scatter_mat)

Scatter Matrix:
 [[ 12.43387118   8.14619893  -6.62810332  -1.01678081]
 [  8.14619893  27.56950802 -10.82534438  -8.61858513]
 [ -6.62810332 -10.82534438  38.86807884  19.55556185]
 [ -1.01678081  -8.61858513  19.55556185  16.69238294]]


In [5]:
# Computing the Eigenvalues and Eigenvectors
eig_val, eig_mat = np.linalg.eig(scatter_mat)
print('Scatter values:\n', eig_val)
print('Scatter vectors:\n', eig_mat)

Scatter values:
 [58.52485595 22.89624789  3.53122913 10.61150801]
Scatter vectors:
 [[ 0.19595981  0.33292571 -0.44134837  0.80992089]
 [ 0.43972747  0.80973245  0.22587454 -0.31615471]
 [-0.75356854  0.43854765 -0.43086555 -0.23273439]
 [-0.44763151  0.20290338  0.75402069  0.43578556]]


In [17]:
# Sort the Eigenvalues in decreasing order
np.argsort(np.abs(eig_val))[::-1]

# Reduce (reduce_dim) dimensions: Take only (dim-reduce_dim) scatter vectors to put in the new eigen matrix
new_eig_mat = []
for i in range(0, dim-reduce_dim):
  new_eig_mat.append(eig_mat[i])
new_eig_mat = np.asarray(new_eig_mat) 
new_eig_mat

array([[ 0.19595981,  0.33292571, -0.44134837,  0.80992089],
       [ 0.43972747,  0.80973245,  0.22587454, -0.31615471]])

In [24]:
# Project all samples to the new subspace:
proj_data = new_eig_mat.dot(data_stack)
proj_data.shape

(2, 200)