In [35]:
import numpy as np

# Generate data input
np.random.seed(2019)
dim = 4
total_sample = 100
total_class = 2
reduce_dim = 2
data = []

for i in range(total_class):
  mean_vec = np.ones(dim)*i
  cov_mat = np.identity(dim)
  class_sample = np.random.multivariate_normal(mean_vec, cov_mat, total_sample).T
  data.append(class_sample)
data = np.asarray(data)

# Ignoring class label
data_stack = np.reshape(data, (dim, total_class*total_sample))
np.savetxt(str(dim) + '_' + str(total_sample) + '_' + str(total_class) + '_' + str(reduce_dim) + '.csv', data_stack, delimiter=',')

print(np.shape(data_stack))

In [36]:
import numpy as np
import pandas as pd

fname = '4_100_2_2.csv'
data_stack = pd.read_csv(fname, header = None)
data_stack = data_stack.values


dim = int(fname.split('.')[0].split('_')[0])
total_sample = int(fname.split('.')[0].split('_')[1])
total_class = int(fname.split('.')[0].split('_')[2])
reduce_dim = int(fname.split('.')[0].split('_')[3])

print('Number of dimmensions: ' + str(dim))
print('Number of samples in each class: ' + str(total_sample))
print('Number of classes: ' + str(total_class))
print('Number of reduced dimmensions: ' + str(reduce_dim))
print('All data shape: ' + str(data_stack.shape[1]) + ' rows, ' + str(data_stack.shape[0]) + ' columns')

# Geting the mean vector
mean_vec = np.average(data_stack, axis=1)
print('Mean vector:\n' + str(mean_vec))

# Computing the Scatter Matrix
scatter_mat = np.zeros((dim,dim))
for i in range(dim):
    scatter_mat += (data_stack[:,i].reshape(dim,1) - mean_vec).dot((data_stack[:,i].reshape(dim,1) - mean_vec).T)
print('Scatter Matrix:\n' + str(scatter_mat))

# Computing the Eigenvalues and Eigenvectors
eig_val, eig_mat = np.linalg.eig(scatter_mat)
print('Eigenvalues:\n' + str(eig_val))
print('Eigenvectors:\n' + str(eig_mat))

# Sort the Eigenvalues in decreasing order
np.argsort(np.abs(eig_val))[::-1]

# Reduce (reduce_dim) dimensions: Take only (dim-reduce_dim) scatter vectors to put in the new eigen matrix
new_eig_mat = []
for i in range(0, dim-reduce_dim):
  new_eig_mat.append(eig_mat[i])
new_eig_mat = np.asarray(new_eig_mat) 
print('New eigenvectors:\n' + str(new_eig_mat))

# Project all samples to the new subspace:
proj_data = new_eig_mat.dot(data_stack)
print('Projected data shape:\n' + str(proj_data.shape[1]) + ' rows, ' + str(proj_data.shape[0]) + ' columns')

Number of dimmensions: 4
Number of samples in each class: 100
Number of classes: 2
Number of reduced dimmensions: 2
All data shape: 200 rows, 4 columns
Mean vector:
[-0.04871638  0.03771309  1.08386071  0.94384264]
Scatter Matrix:
[[ 12.43387118   8.14619893  -6.62810332  -1.01678081]
 [  8.14619893  27.56950802 -10.82534438  -8.61858513]
 [ -6.62810332 -10.82534438  38.86807884  19.55556185]
 [ -1.01678081  -8.61858513  19.55556185  16.69238294]]
Eigenvalues:
[58.52485595 22.89624789  3.53122913 10.61150801]
Eigenvectors:
[[ 0.19595981  0.33292571 -0.44134837  0.80992089]
 [ 0.43972747  0.80973245  0.22587454 -0.31615471]
 [-0.75356854  0.43854765 -0.43086555 -0.23273439]
 [-0.44763151  0.20290338  0.75402069  0.43578556]]
New eigenvectors:
[[ 0.19595981  0.33292571 -0.44134837  0.80992089]
 [ 0.43972747  0.80973245  0.22587454 -0.31615471]]
Projected data shape:
200 rows, 2 columns
