In [24]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import time

In [2]:
def dataloader():
    boston = datasets.load_boston()
    data = boston.data
    labels = boston.target
    X_train,X_test,y_train,y_test = train_test_split(data,labels,test_size=0.2,random_state=3)
    print X_train.shape
    return X_train,X_test,y_train,y_test

In [3]:
def gradJ(theta,train,labels,numSamples):
    return (np.matmul((np.matmul(theta.T,train)-labels),train.T).T)/numSamples

In [4]:
def gradientDescent(train,labels,alpha, options):
    theta = np.random.randn(options['n_attrib'],1) #theta transpose\n",
    while True:
        thetaOld = theta
        theta = theta - alpha * gradJ(theta,train,labels,options['num_samples'])
        if np.linalg.norm(theta-thetaOld,2)<=options['tolerance']:
            break
    return theta

In [7]:
alphas = [1e-6,1e-7,1e-8,1e-9]
tolerances = [1e-4,1e-5,1e-6,1e-7]
trainData,testData,y_train,y_test = dataloader()
X_train = np.transpose(np.insert(trainData,0,1,1))
X_test = np.transpose(np.insert(testData,0,1,1))
options = {'n_attrib':X_train.shape[0],'num_samples':X_train.shape[1],'tolerance':1e-7}
print 'New X_train dim ',X_train.shape

(404, 13)
New X_train dim  (14, 404)


In [101]:
# Trying to find the optimum value for learning rate and tolerance
for alpha in alphas:
    for tolerance in tolerances:
        options['tolerance'] = tolerance
        theta = gradientDescent(X_train,y_train,alpha,options)
        prediction = np.matmul(np.transpose(theta),X_test).reshape(y_test.shape)
        print 'Mean squared error for tolerance %f with learning rate %f is %f' \
        %(options['tolerance'],alpha,mean_squared_error(prediction,y_test))

Mean squared error for tolerance 0.000100 with learning rate 0.000000 is 2056.258632
Mean squared error for tolerance 0.000010 with learning rate 0.000000 is 1485.763928
Mean squared error for tolerance 0.000001 with learning rate 0.000000 is 246.373733
Mean squared error for tolerance 0.000000 with learning rate 0.000000 is 55.183478
Mean squared error for tolerance 0.000100 with learning rate 0.000000 is 29392.222593
Mean squared error for tolerance 0.000010 with learning rate 0.000000 is 3708.573371
Mean squared error for tolerance 0.000001 with learning rate 0.000000 is 925.131850
Mean squared error for tolerance 0.000000 with learning rate 0.000000 is 99.512109


### MSE obtained using gradient descent:

|Learning Rate|Tolerance|Mean Squared Error|
|:-----------:|:-------:|:----------------:|
|1e-6|1e-4|160.68|
|1e-6|1e-5|51.89|
|1e-6|1e-6|21.73|
|1e-6|1e-7|19.07|
|1e-7|1e-4|424.91|
|1e-7|1e-5|115.79|
|1e-7|1e-6|53.81|
|1e-7|1e-7|21.49|
|1e-8|1e-4|2056.26|
|1e-8|1e-5|1485.76|
|1e-8|1e-6|246.37|
|1e-8|1e-7|55.48|

In [31]:
alpha = 1e-6
options['tolerance'] = 1e-7
timeTaken = -time.time()
theta = gradientDescent(X_train,y_train,alpha,options)
timeTaken += time.time()
prediction = np.matmul(np.transpose(theta),X_test).reshape(y_test.shape)
print 'MSE using gradient descent, for tolerance %f with learning rate %f is %f' \
        %(options['tolerance'],alpha,mean_squared_error(prediction,y_test))
print 'Time taken is %f seconds'%(timeTaken)

MSE using gradient descent, for tolerance 0.000000 with learning rate 0.000001 is 19.070616
Time taken is 746.630659 seconds


In [29]:
from sklearn.linear_model import LinearRegression
#X_train,X_test,y_train,y_test = dataloader()
timeTaken = -time.time()
model = LinearRegression()
model.fit(trainData,y_train)
timeTaken += time.time()
predicted = model.predict(testData)
print 'MSE using native linear regression method in sklearn',mean_squared_error(predicted,y_test)
print 'Time taken is %f seconds'%(timeTaken)

MSE using native linear regression method in sklearn 16.9682486675
Time taken is 0.002954 seconds


In [30]:
import scipy
theta = np.random.randn(options['n_attrib'],1)
J = lambda theta : mean_squared_error(np.matmul(theta.T,X_train).ravel(),y_train)
timeTaken = -time.time()
results = scipy.optimize.minimize(J,theta, method='L-BFGS-B', jac=False, options={'maxiter': 500, 'disp': True})
timeTaken += time.time()
optimumTheta = results['x']
prediction = np.matmul(np.transpose(optimumTheta),X_test).reshape(y_test.shape)
print 'MSE using scipy.optimize', mean_squared_error(prediction,y_test)
print 'Time taken is %f seconds'%(timeTaken)

MSE using scipy.optimize 19.7982317626
Time taken is 0.643147 seconds


### Observations:
1. It is quite clear from the table that learning rate of 1e-6 with tolerance 1e-7 is optimal as it gives the least MSE for this dataset.
2. The MSE using gradient descent is 19.07 but took 746.63
3. The MSE using native implementation of linear regression from sklearn is 16.97 but took 0.003 seconds.
4. The MSE using minimize function from scipy.optimize using the method L-BFGS-B is 19.72 but took 0.64 seconds.
5. So it is more efficient and accurate to use the native implementation.