# ECE 445 (ML for ENGG): Mini Jupyter Exercise #2

*Eric Jiang
ewj12 158002948*

10/27/2018

In [1]:
import matplotlib as plt
import mpl_toolkits.mplot3d as plt3d
import numpy as np
%matplotlib inline

**Synthetic Data**

Problem 1

use np.random.normal(mean,variance,(rows,cols))

In [7]:
from numpy.linalg import matrix_rank
A = np.random.normal(0,1,(3,2))
print(A)
print(matrix_rank(A))


[[-1.84177669 -0.70226999]
 [ 1.14912855 -0.2138346 ]
 [-0.36642354  0.52979557]]
2


**Generation of Dataset #1**

Problem 1

In [73]:
X = np.empty([3,500])
V = np.empty([2,500])

i = 0
while i < 500:
    v = np.random.normal(0,1,(2,1))
    np.hstack((V,v))
    x = A@v
    np.hstack((X,x))
    i += 1
    
    #Alternatively could use:
    #V[:,:-1] = v 
    #X[:,:-1] = np.matmul(A,v)
    #np.c_[V,v]
    #np.c_[X,A@v]
    #use np.r_[mat1,mat2] for vstack (rows)
    #this took me forever to figure out!

print('The dimensionality of array X is:', np.shape(X))

V = np.random.normal(0,1,(2,500))
X1 = A@V
print('\nThe rank of X=A(3x2)V(2x500) is:', matrix_rank(X1))


The dimensionality of array X is: (3, 500)

The rank of X=A(3x2)V(2x500) is: 2


Since the x=Av data pts have dimensions of 3x1, with 500 cols of the 3 rowed x's then X's dimensions (3,500) confirms my expectations

Rank of 2 may possibly be due to the fact that X's multipliers A & V both can only have a rank of 2 given max 2 rows in V and max 2 cols in A

**Singular Value and Eigenvalue Decomposition of Dataset #1**

Problem 1

In [74]:
from numpy import linalg as la

B= '\033[1m'
E= '\033[0m'

LS, DS, RS = la.svd(X1, full_matrices=True) 
#print(X1.shape,LS.shape,DS.shape,RS.shape)
#reconstructX = LS @ np.diag(DS) @ RS
#la.norm(reconstructX-X)

#lS = Left Singular Vectors, DS = Singular Values, RS = Right Singular Vectors
XXt = X1@X1.T
#XXt = np.nan_to_num(XXt)
Eigval, Eigvec = la.eig(XXt)

# X's LS = XXt's Eigvec
print(B+'\nLeft Singular Vectors of X:\n'+E, LS,'\n')
print(B+'Eigenvectors of XXt:\n'+E,Eigvec,'\n')

# XXt's Eigvals = X's DS^2
print(B+'Eigenvalues of XXt:'+E,Eigval)
print(B+'Square of Singular Values of X:'+E,np.square(DS))

# X's Norm^2 = Sum of X's DS^2
print(B+'\nFrobenius Norm or Energy of X:'+E,la.norm(X1)**2)
print(B+'Sum of squares of Singular Vals of X:'+E,np.sum(DS**2))

print(B+'\nSingular Values of X:'+E,DS)
print(B+'Left Singular Vectors of X\n'+E,LS)
print(B+'Matrix A\n'+E,A)

[1m
Left Singular Vectors of X:
[0m [[-0.65200071  0.74401346 -0.14607891]
 [-0.3216326  -0.09692613  0.94189054]
 [ 0.68662038  0.66109704  0.30249489]] 

[1mEigenvectors of XXt:
[0m [[ 0.65200071  0.74401346 -0.14607891]
 [ 0.3216326  -0.09692613  0.94189054]
 [-0.68662038  0.66109704  0.30249489]] 

[1mEigenvalues of XXt:[0m [3.65343262e+03 7.38718431e+01 1.45682893e-13]
[1mSquare of Singular Values of X:[0m [3.65343262e+03 7.38718431e+01 5.88539729e-30]
[1m
Frobenius Norm or Energy of X:[0m 3727.3044598287042
[1mSum of squares of Singular Vals of X:[0m 3727.304459828704
[1m
Singular Values of X:[0m [6.04436317e+01 8.59487307e+00 2.42598378e-15]
[1mLeft Singular Vectors of X
[0m [[-0.65200071  0.74401346 -0.14607891]
 [-0.3216326  -0.09692613  0.94189054]
 [ 0.68662038  0.66109704  0.30249489]]
[1mMatrix A
[0m [[ 1.10843719 -1.38808897]
 [ 0.38801336 -0.76705261]
 [-0.67289342  1.71807559]]


Problem 2

- None of the singular values are exactly zero because when singular value deconstruction is used there is noise in the separation. The noise causes the singular value to oscillate close to zero but not exactly zero.

- The relationship between the left singular vectors of X and the two largest singular values and the columns of A

**PCA of Dataset #1**

Problem 1


There only needs to be 2 principal components to capture all the data variation.

Since X was created by two 2-D arrays with max rank of 2 (both x,y variation), it can be assumed that the variation can only be along the 2-D space with the third dimension z being left at 0 variation. Thus there only needs to be two principal components along the x-y plane to capture all the data variation.

Problem 2

In [114]:
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show

def cov(data):
    return dot(data.T, data) / data.shape[0]

#PCA with eigenvalues, also centers the mean and scales the data
def nppca(data, pc_count = None):

    data -= mean(data, 0)
    data /= std(data, 0)
    C = cov(data)
    E, V = eigh(C)
    key = argsort(E)[::-1][:pc_count]
    E, V = E[key], V[:, key]
    U = dot(data, V)
    return U, E, V

xtrans = nppca(x, 3)[0]

transmean = xtrans.mean()
print(B+'Estimated Value xk for k = 1,2,3:'+E,transmean)

Xmean = X.mean()
Xmeanvec = X.mean(0)
print(B+'\nMean of sum of X vectors:'+E,Xmean)
print(B+'\nX vector mean entries all are ~ 0:\n'+E,Xmeanvec)

[1mEstimated Value xk for k = 1,2,3:[0m 0.0
[1m
Mean of sum of X vectors:[0m 0.0
[1m
X vector mean entries all are ~ 0:
[0m [ 0.00000000e+00 -7.40148683e-17  0.00000000e+00  0.00000000e+00
 -9.25185854e-18 -3.70074342e-17  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  1.85037171e-17  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -1.85037171e-17
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  2.31296463e-18  1.85037171e-17  1.85037171e-17
  0.00000000e+00  0.00000000e+00 -1.85037171e-17  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
 -9.25185854e-18  0.00000000e+00  0.00000000e+00 -3.70074342e-17
  0.00000000e+00  3.70074342e-17  3.70074342e-17 -4.62592927e-18
  0.00000000e+00 -3.70074342e-17  2.31296463e-18 -7.40148683e-17
 -9.25185854e-18  0.00000000e+00  0.00000000e+00 -9.25185854e-18
 -3.70074342e-17  3.70074

Problem 3

In [119]:
#Using the pca above for matrix X
Xtrans = nppca(X, 3)[0]

#Retrieving x, y PCs for U[PCx,PCy]
ux = np.reshape(Xtrans[:,0],(3,1)) #PCx
uy = np.reshape(Xtrans[:,1],(3,1)) #PCy
uz = np.reshape(Xtrans[:,2],(3,1)) #PCz
U = np.hstack((ux,uy))

print(B+"Top two principal components U[PCx, PCy]:\n"+E,U)
#print(uz) 

[1mTop two principal components U[PCx, PCy]:
[0m [[-23.75054773   5.64477552]
 [ 18.05358409  13.45226974]
 [  5.69696364 -19.09704526]]


Problem 4