In [56]:
import numpy as np
from numpy.linalg import inv

m is number of features  
n is number of training examples  
X is of dimension (m,n) and Y is of dimension (m,1)

In [121]:
class lin_reg:
    '''Model that does linear regression on the data'''
    
    def train(self, X, Y, alpha = 0.0001, limit = 100000, normal = False, lambd = 0, regularization = None):
        '''Train the model on a dataset from the examples X and the labels Y.
        The algorithm uses gradient descent with learning rate 'alpha' for a max of 'limit' steps.
        X must in the format (# examples)x(# features) and Y is a column array.
        The final weights as well as a list of the costs calculated through the training 
        can be extracted using methods .weights and .cost.
        Option to use the exact solution using "normal" instead of gradient descent.
        There are the option of including L1 or L2 regularization.'''
        
        # Extract number of training examples and features from X
        (m, n) = X.shape
        # Add a column of ones to X for the bias
        X = np.append(np.ones((m,1)), X, axis = 1)
        Y = np.reshape(Y, (m,1))
            
        if not normal:
            # Initialize the weights to zero
            self.weights = np.zeros((n+1 ,1))
            # Initialize the cost
            i = 0
            self.cost = []
            self.cost.append(1/(2*m) * np.matmul(np.transpose(Y - np.matmul(X, self.weights)), Y - np.matmul(X, self.weights)))
            # Update weights with gradient and compute new cost
            while True:
                self.weights = self.weights - alpha * self.gradient(X, Y, self.weights, lambd, regularization)
                self.cost.append(1/(2*m) * np.matmul(np.transpose(Y - np.matmul(X, self.weights)), Y - np.matmul(X, self.weights)))
                # Stop upgrading if cost doesn't lower of if reached limit
                if self.cost[i + 1] < self.cost[i]:
                    i += 1
                else:
                    break
                if i > limit:
                    print('Gradient descent did not converge fast enough')
                    break
        else:
            self.weights = np.matmul(np.matmul(inv(np.matmul(np.transpose(X), X)), np.transpose(X)), Y)

            
    def gradient(self, X, Y, weights, lambd, regularization):
        '''Compute the gradient involved in gradient descent, for different regularization schemes.'''
        
        (m, n) = X.shape
        basic_grad = -1/m * np.matmul(np.transpose(X), Y - np.matmul(X, weights))
        if regularization == 'L1':
            return basic_grad + lambd * np.insert(np.sign(weights[1:]), 0, 0).reshape(n, 1) 
        elif regularization == 'L2':
            return basic_grad + lambd * np.insert(weights[1:], 0, 0).reshape(n, 1)
        else:
            return basic_grad
    
    
    def predict(self, X):
        '''Use data X and trained model to predict labels using linear regression hypothesis.
        Output is an array with one value for each data point.'''
    
        # Extract number of training examples and features from X
        (m, n) = X.shape
        # Add a column of ones to X for the bias
        X = np.append(np.ones((m,1)), X, axis = 1)
        return np.matmul(X, self.weights)
    
    
    def error(self, Y, Y_pred, metric):
        '''Computes the error of Y_pred compared to true answer Y.
        Option of using the R2 or RSE as metrics.'''
        
        # Reshape data and use it to make a prediction
        m = Y.shape[0]
        Y = np.reshape(Y, (m,1))
        # Compute total squared error for use later
        rss = np.matmul(np.transpose(Y - Y_pred), Y - Y_pred)
        if metric == 'rse':
            rse_error = np.sqrt(rss/(m-2))
            print('RSE error is: ', rse_error)
        elif metric == 'r2':
            tss = np.matmul(np.transpose(Y - np.mean(Y)), Y - np.mean(Y))
            r2_error = 1 - rss/tss
            print('R squared error is: ', r2_error)
        else:
            print('Wrong metric specification!')
    
    

In [4]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
data = load_boston()

In [129]:
model = lin_reg()
model.train(data['data'], data['target'], alpha=0.000001, limit = 10000, lambd = 1, regularization = None)
model.cost[:15]

Gradient descent did not converge fast enough


[array([[296.0734585]]),
 array([[174.83024101]]),
 array([[116.97644499]]),
 array([[89.06818697]]),
 array([[75.32047471]]),
 array([[68.28238328]]),
 array([[64.43625929]]),
 array([[62.12078977]]),
 array([[60.55080478]]),
 array([[59.35467676]]),
 array([[58.35599664]]),
 array([[57.47053486]]),
 array([[56.65761063]]),
 array([[55.89713882]]),
 array([[55.17879543]])]

In [130]:
model.error(data['target'], model.predict(data['data']), 'r2')

R squared error is:  [[0.25144805]]


In [55]:
model.predict(data['data'])[:15]

array([[26.33323471],
       [23.90216462],
       [23.64444031],
       [23.19578018],
       [23.37212363],
       [23.49085986],
       [23.99589384],
       [24.59863327],
       [22.21251978],
       [23.86254698],
       [24.03671475],
       [24.91311395],
       [21.39202783],
       [23.15180904],
       [23.35616569]])

Future: add regularization, polynomial regression, visualization cost, normalize data