## CE9010: Introduction to Data Science
## Semester 2 2017/18
## Xavier Bresson
## ⚠ Student name: 
<br>


## Laboratory test 1: Supervised regression
Instruction: Check the box with the right answer.<br>

Grading: You will receive 1 point for each of the questions you answer correctly.<br>
$ $


In [1]:
# Import libraries

# math library
import numpy as np

# visualization library
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png2x','pdf')
import matplotlib.pyplot as plt

# machine learning library
from sklearn.linear_model import LinearRegression

# 3d visualization
from mpl_toolkits.mplot3d import axes3d

# computational time
import time


# Problem 1: Supervised regression with one feature
<hr>
The data feature is $x$ and the data label/target is $y$.<br>

In [2]:
# import data with numpy
data = np.loadtxt('data/lab01_data1.txt', delimiter=',')
#print(data[:10,:])

### Q1: What is the number $n$ of training data, and the dimension $d$ of each data?

☐ $n$=47, $d$=47 <br>
☐ $n$=2, $d$=47 <br>
☐ $n$=47, $d$=2 <br>
☐ $n$=2, $d$=2 

In [3]:
#YOUR CODE HERE

n = data.shape[0] 
d = data.shape[1] 
print(n,d)

47 2


### Q2: What is the value of the predictive linear function $f_w(x)$ for $x=0.2$ with $w_0=0.2, w_1=-0.4$?

☐ $f$ = 0.24<br>
☐ $f$ = 0.06<br>
☐ $f$ = 0.48<br>
☐ $f$ = 0.12


In [4]:
#YOUR CODE HERE

# construct data matrix
X = np.ones([n,2]) 
X[:,1:2] = data[:,0:1]
print(X.shape)
print(X[:5,:])

# parameters vector
w = np.array([0.2,-0.4])[:,None] 

# predictive function definition
def f_pred(X,w): 
    f = X.dot(w) 
    return f 

# Test predicitive function 
x = np.array([1,0.2])[None,:] 
y_pred = f_pred(x,w)
print(y_pred)

(47, 2)
[[ 1.       0.46985]
 [ 1.       0.3573 ]
 [ 1.       0.53595]
 [ 1.       0.31621]
 [ 1.       0.66994]]
[[ 0.12]]


### Q3: What is the value of the mean square error (MSE) regression loss function $L(w)$ for $w_0=0.2, w_1=-0.4$?

☐ $L$ = 0.345<br>
☐ $L$ = 0.165<br>
☐ $L$ = 0.273<br>
☐ $L$ = 0.657

In [5]:
#YOUR CODE HERE

# loss function definition
def loss_mse(y_pred,y): 
    n = len(y)
    loss = 1/n* (y_pred - y).T.dot(y_pred - y) 
    return loss


# Test loss function 
y = data[:,1][:,None] # label 
print(y[:10,:])
y_pred = f_pred(X,w) # prediction
loss = loss_mse(y_pred,y)
print(loss)

[[ 0.57137]
 [ 0.47135]
 [ 0.52722]
 [ 0.33148]
 [ 0.7714 ]
 [ 0.42849]
 [ 0.44992]
 [ 0.28432]
 [ 0.3029 ]
 [ 0.34648]]
[[ 0.27369978]]


### Q4: What is the value of the gradient of the MSE regression loss function $\frac{\partial}{\partial w} L(w)$ for $w_0=0.2, w_1=-0.4$?

☐ $\frac{\partial}{\partial w} L$ = [-0.43,0.49]<br>
☐ $\frac{\partial}{\partial w} L$ = [-0.93,-0.49]<br>
☐ $\frac{\partial}{\partial w} L$ = [-0.27,-0.21]<br>
☐ $\frac{\partial}{\partial w} L$ = [-0.61,0.87]

In [6]:
#YOUR CODE HERE

# gradient function definition
def grad_loss(y_pred,y,X):
    n = len(y)
    grad = 2/n* X.T.dot(y_pred-y) 
    return grad


# Test grad function 
y_pred = f_pred(X,w)
grad = grad_loss(y_pred,y,X)
print(grad)    

[[-0.93017149]
 [-0.49330366]]


### Q5: What is the value of the MSE regression loss function $L(w)$ after $200$ gradient descent iterations starting at $w_0=0.2, w_1=-0.4$ and selecting a learning rate $\tau=0.1$?

☐ $L$ = 0.013<br>
☐ $L$ = 0.023<br>
☐ $L$ = 0.008<br>
☐ $L$ = 0.045

In [7]:
#YOUR CODE HERE

# gradient descent function definition
def grad_desc(X, y , w_init=np.array([0,0,0])[:,None] ,tau=0.01, max_iter=500):

    L_iters = np.zeros([max_iter]) # record the loss values
    w_iters = np.zeros([max_iter,2]) # record the loss values
    w = w_init # initialization
    for i in range(max_iter): # loop over the iterations
        y_pred = f_pred(X,w) # linear predicition function #YOUR CODE HERE
        grad_f = grad_loss(y_pred,y,X) # gradient of the loss #YOUR CODE HERE
        w = w - tau* grad_f # update rule of gradient descent #YOUR CODE HERE
        L_iters[i] = loss_mse(y_pred,y) # save the current loss value 
        w_iters[i,:] = w[0],w[1] # save the current w value 
        
    return w, L_iters, w_iters


# run gradient descent algorithm 
w_init = np.array([0.2,-0.4])[:,None]
tau = 0.1
max_iter = 200
w, L_iters, w_iters = grad_desc(X,y,w_init,tau,max_iter)
print(L_iters[-1])

0.0130814250215


### Q6: What is the value of the MSE regression loss function $L(w)$ computed with scikit-learn?

☐ $L$ = 0.004<br>
☐ $L$ = 0.006<br>
☐ $L$ = 0.008<br>
☐ $L$ = 0.013

In [8]:
#YOUR CODE HERE

# run linear regression with scikit-learn
lin_reg_sklearn = LinearRegression()
x_train = data[:,0][:,None]
y_train = data[:,1]
lin_reg_sklearn.fit(x_train, y_train) # learn the model parameters #YOUR CODE HERE

# compute loss value
w_sklearn = np.zeros([2,1])
w_sklearn[0,0] = lin_reg_sklearn.intercept_
w_sklearn[1:2,0] = lin_reg_sklearn.coef_
loss_sklearn = loss_mse(f_pred(X,w_sklearn),y_train[:,None])
print('loss sklearn=',loss_sklearn)
print('loss gradient descent=',L_iters[-1]) 


loss sklearn= [[ 0.00840311]]
loss gradient descent= 0.0130814250215




### Q7: What is the value of the predictive linear function $f_w(x)$ for $x=0.2$ with the parameters $w_0,w_1$ computed with scikit-learn?

☐ $f$ = 0.42<br>
☐ $f$ = 0.11<br>
☐ $f$ = 0.27<br>
☐ $f$ = 0.34

In [9]:
#YOUR CODE HERE

print(w_sklearn.T.dot([1,0.2])[0])


0.273970683974


# Problem 2: Supervised regression with two features
<hr>
The data features are $x_{(1)},x_{(2)}$ and the data label/target is $y$.<br>

In [10]:
# import data with numpy
data = np.loadtxt('data/lab01_data2.txt', delimiter=',')
#print(data[:10,:])

### Q8: What is the value of the predictive linear function $f_w(x)$ for $x_{(1)}=0.2,x_{(2)}=0.6$ with $w_0=0.2, w_1=-0.4, w_2=0.1$?

☐ $f$ = 0.18<br>
☐ $f$ = 0.22<br>
☐ $f$ = 0.35<br>
☐ $f$ = 0.56

In [11]:
#YOUR CODE HERE

# construct data matrix
X = np.ones([n,3]) 
X[:,1:3] = data[:,0:2]
print(X.shape)
print(X[:5,:])

# parameters vector
w = np.array([0.2,-0.4,0.1])[:,None] 

# predictive function definition
def f_pred(X,w): 
    f = X.dot(w) 
    return f 

# Test predicitive function 
x = np.array([1,0.2,0.6])[None,:] 
y_pred = f_pred(x,w)
print(y_pred)

(47, 3)
[[ 1.       0.46985  0.6    ]
 [ 1.       0.3573   0.6    ]
 [ 1.       0.53595  0.6    ]
 [ 1.       0.31621  0.4    ]
 [ 1.       0.66994  0.8    ]]
[[ 0.18]]


### Q9: What is the value of the MSE regression loss function $L(w)$ after $200$ gradient descent iterations starting at $w_0=0.2, w_1=-0.4, w_2=0.1$ and selecting a learning rate $\tau=0.1$?

☐ $L$ = 0.023<br>
☐ $L$ = 0.029<br>
☐ $L$ = 0.014<br>
☐ $L$ = 0.010

In [12]:
#YOUR CODE HERE

y = data[:,2][:,None] # label 

# run gradient descent algorithm 
w_init = np.array([0.2,-0.4,0.1])[:,None]
tau = 0.1
max_iter = 200
w, L_iters, w_iters = grad_desc(X,y,w_init,tau,max_iter)
print(L_iters[-1])

0.0141011607915


# Problem 3: Supervised regression with two features and the MAE loss
<hr>
The data features are $x_{(1)},x_{(2)}$ and the data label/target is $y$.<br>

We consider a new loss defined as
$$
L(w)=\frac{1}{n} \sum_{i=1}^n \ \big| \ f_w(x_i) – y_i \ \big|
$$
It is called the mean absolute error (MAE) loss, a.k.a. L1 loss.


### Q10: What is the value of the MAE regression loss function $L(w)$ after $200$ gradient descent iterations starting at $w_0=0.2, w_1=-0.4, w_2=0.1$ and selecting a learning rate $\tau=0.1$?

☐ $L$ = 0.014 <br>
☐ $L$ = 0.038<br>
☐ $L$ = 0.051<br>
☐ $L$ = 0.074

In [13]:
#YOUR CODE HERE

# loss function definition
def loss_mae(y_pred,y): 
    n = len(y)
    loss = 1/n* np.sum(np.abs((y_pred - y)))
    return loss

def sign(x):
    return x/ np.abs(x)
    
# gradient function definition
def grad_loss(y_pred,y,X):
    n = len(y)
    grad = 1/n* X.T.dot(sign(y_pred-y))
    return grad

In [14]:
#YOUR CODE HERE

# gradient descent function definition
def grad_desc(X, y , w_init=np.array([0,0,0])[:,None] ,tau=0.01, max_iter=500):

    L_iters = np.zeros([max_iter]) # record the loss values
    w_iters = np.zeros([max_iter,2]) # record the loss values
    w = w_init # initialization
    for i in range(max_iter): # loop over the iterations
        y_pred = f_pred(X,w) # linear predicition function #YOUR CODE HERE
        grad_f = grad_loss(y_pred,y,X) # gradient of the loss #YOUR CODE HERE
        w = w - tau* grad_f # update rule of gradient descent #YOUR CODE HERE
        L_iters[i] = loss_mae(y_pred,y) # save the current loss value 
        w_iters[i,:] = w[0],w[1] # save the current w value 
        
    return w, L_iters, w_iters


# run gradient descent algorithm 
w_init = np.array([0.2,-0.4,0.1])[:,None]
tau = 0.1
max_iter = 200
w, L_iters, w_iters = grad_desc(X,y,w_init,tau,max_iter)
print(L_iters[-1])

0.0742509960921
