# Lab04 - Sparsity Aware Learning
Author: [Yunting Chiu](https://www.linkedin.com/in/yuntingchiu/)

# Exercise 1


In [17]:
# Install the libraries
import random
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
%matplotlib inline
plt.style.use(['ggplot'])

## 1A

In [18]:
X = np.array([[0.5, 2, 1.5], [2, 2.3, 3.5]]) # matrix X
theta = np.array([2.5, 0, 0], ndmin = 2).T 
y = np.dot(X, theta)
print(theta)

[[2.5]
 [0. ]
 [0. ]]


In [19]:
print(X)

[[0.5 2.  1.5]
 [2.  2.3 3.5]]


In [20]:
print(y)

[[1.25]
 [5.  ]]


According to the textbook 9.10, we can know the L2 minimizer accepts the closed from the following solution:

$$
\hat{\theta} = X^T (XX^T)^{-1} y
$$

In [21]:
# theta2 = np.dot(np.dot(X.T, np.linalg.inv(np.dot(X, X.T))), y)
theta2 = np.dot(np.dot(X.T, np.linalg.inv(np.dot(X, X.T))), y)
error_L2 = np.linalg.norm(y - np.dot(X, theta2))
error_theta = np.linalg.norm(theta2 - theta)
print('The L2 norm minimized solution is {}'.format(theta2))

The L2 norm minimized solution is [[ 1.08637128]
 [-0.49775659]
 [ 1.13488503]]


In [30]:
print(" The error achieved with L2 norm minimization is {}".format(error_L2))

 The error achieved with L2 norm minimization is 4.636427468134552e-15


In [23]:
pseu_X = np.linalg.pinv(X)
print(pseu_X)

[[-0.49040942  0.33987661]
 [ 0.81323612 -0.30286035]
 [-0.25417835  0.29052159]]


## 1B

We need to estimate the smallest number of parameters that can be explained the obtained observations. Consider all possible combinations of zero in $\theta$, removing the respective columns of X and check whether the system of equations is satisifed. 

Let's start checking for protential 1-sparse solution.
### Check solution [x, 0, 0]

In [76]:
subX_11 = np.array(X[:, 0], ndmin = 2).T # ndmin = Number of array dimensions
# print(subX_11)
theta_11 = np.zeros((3, 1))
theta_11[0] = np.dot(np.linalg.inv(np.dot(subX_11.T, subX_11)), np.dot(subX_11.T, y))
print(theta_11)
error1 = np.linalg.norm(y - np.dot(X, theta_11)) #check that theta_11 is a solution
error_theta1 = np.linalg.norm(theta_11 - theta)
print('Achieved error: %.20f'% error1)
print('Achieved error in theta %.20f'% error_theta1)

[[2.5]
 [0. ]
 [0. ]]
Achieved error: 0.00000000000000000000
Achieved error in theta 0.00000000000000000000


### Check solution [0, x, 0]

In [77]:
subX_22 = np.array(X[:, 1], ndmin = 2).T # ndmin = Number of array dimensions
# print(subX_22)
theta_22 = np.zeros((3, 1))
theta_22[1] = np.dot(np.linalg.inv(np.dot(subX_22.T, subX_22)), np.dot(subX_22.T, y))
print(theta_22)
error2 = np.linalg.norm(y - np.dot(X, theta_22)) #check that theta_22 is a solution
error_theta2 = np.linalg.norm(theta_22 - theta)
print('Achieved error: %.20f'% error2)
print('Achieved error in theta %.20f'% error_theta2)

[[0.        ]
 [1.50699677]
 [0.        ]]
Achieved error: 2.33763667191955271107
Achieved error in theta 2.91908192193438464912


### Check solution [0, 0, x]

In [78]:
subX_33 = np.array(X[:, 2], ndmin = 2).T # ndmin = Number of array dimensions
# print(subX_22)
theta_33 = np.zeros((3, 1))
theta_33[2] = np.dot(np.linalg.inv(np.dot(subX_33.T, subX_33)), np.dot(subX_33.T, y))
print(theta_33)
error3 = np.linalg.norm(y - np.dot(X, theta_33)) # check that theta2 is a solution
error_theta3 = np.linalg.norm(theta_33 - theta)
print('Achieved error: %.20f'% error3)
print('Achieved error in theta %.20f'% error_theta3)

[[0.       ]
 [0.       ]
 [1.3362069]]
Achieved error: 0.82066520537326592688
Achieved error in theta 2.83468673232023826714


## 1C

In [80]:
#print(theta_11)
print("L2 minization is {}".format(theta2))
print("L0 minization is {}".format(theta_11))

L2 minization is [[ 1.08637128]
 [-0.49775659]
 [ 1.13488503]]
L0 minization is [[2.5]
 [0. ]
 [0. ]]


In [24]:
print('Check solution [x, 0, 0]')
theta_11 = np.zeros((3, 1))
# print(theta_11)
subX_11 = np.array(X[:, 0], ndmin = 2).T

theta_11[0] = np.dot(np.linalg.inv(np.dot(subX_11.T, subX_11)), np.dot(subX_11.T, y))
print(theta_11)
error1 = np.linalg.norm(y - np.dot(X, theta_11)) #check that theta_11 is a solution
error_theta1 = np.linalg.norm(theta_11 - theta)

print('Achieved error: %.20f'% error1)
print('Achieved error in theta %.20f'% error_theta1)
print('--------------')

Check solution [x, 0, 0]
[[2.5]
 [0. ]
 [0. ]]
Achieved error: 0.00000000000000000000
Achieved error in theta 0.00000000000000000000
--------------


## Exercise 2

# Output

In [None]:
# should access the Google Drive files before running the chunk
#%%capture
!sudo apt-get install texlive-xetex texlive-fonts-recommended texlive-plain-generic 
!jupyter nbconvert --to pdf "/content/drive/MyDrive/American_University/2021_Fall/DATA-642-001_Advanced Machine Learning/GitHub/Labs/04/submit/Lab4_Yunting.ipynb"