In [1]:
import numpy as np
from numpy.linalg import inv
import itertools

m is number of features  
n is number of training examples  
input: X is of dimension (m,n) and Y is of dimension (m,1)

In [57]:
class lin_reg:
    '''Model that does linear regression on the data. There is also the option of transforming the data
    to do polynomial regression.'''
    
    def __init__(self, degree = None):
        if degree >=2:
            self.degree = degree
        else:
            self.degree = None
    
    def train(self, X, Y, alpha = 0.0001, limit = 100000, normal = False, lambd = 0, regularization = None):
        '''Train the model on a dataset from the examples X and the labels Y.
        The algorithm uses gradient descent with learning rate 'alpha' for a max of 'limit' steps.
        X must in the format (# examples)x(# features) and Y is a column array.
        The final weights as well as a list of the costs calculated through the training 
        can be extracted using methods .weights and .cost.
        Option to use the exact solution using "normal" instead of gradient descent.
        There are the option of including L1 or L2 regularization.'''
        
        # Transform data to polynomial features if asked to
        if self.degree != None:
            X = self.polynomial(X, self.degree)
        # Extract number of training examples and features from X
        (m, n) = X.shape
        # Add a column of ones to X for the bias
        X = np.append(np.ones((m,1)), X, axis = 1)
        Y = np.reshape(Y, (m,1))
            
        if not normal:
            # Initialize the weights to zero
            self.weights = np.zeros((n+1 ,1))
            # Initialize the cost
            i = 0
            self.cost = []
            self.cost.append(1/(2*m) * np.matmul(np.transpose(Y - np.matmul(X, self.weights)), Y - np.matmul(X, self.weights)))
            # Update weights with gradient and compute new cost
            while True:
                self.weights = self.weights - alpha * self.gradient(X, Y, self.weights, lambd, regularization)
                self.cost.append(1/(2*m) * np.matmul(np.transpose(Y - np.matmul(X, self.weights)), Y - np.matmul(X, self.weights)))
                # Stop upgrading if cost doesn't lower of if reached limit
                if self.cost[i + 1] < self.cost[i]:
                    if i % 10000 == 0:
                        print(i, 'steps done', end="\r")
                    i += 1
                else:
                    break
                if i > limit:
                    print('Reached the limit')
                    break
        else:
            self.weights = np.matmul(np.matmul(inv(np.matmul(np.transpose(X), X)), np.transpose(X)), Y)

            
    def gradient(self, X, Y, weights, lambd, regularization):
        '''Compute the gradient involved in gradient descent, for different regularization schemes.'''
        
        (m, n) = X.shape
        basic_grad = -1/m * np.matmul(np.transpose(X), Y - np.matmul(X, weights))
        if regularization == 'L1':
            return basic_grad + lambd * np.insert(np.sign(weights[1:]), 0, 0).reshape(n, 1) 
        elif regularization == 'L2':
            return basic_grad + lambd * np.insert(weights[1:], 0, 0).reshape(n, 1)
        else:
            return basic_grad
    
    
    def predict(self, X):
        '''Use data X and trained model to predict labels using linear regression hypothesis.
        Output is an array with one value for each data point.'''
    
        # Transform data to polynomial features if asked to
        if self.degree != None:
            X = self.polynomial(X, self.degree)
        # Extract number of training examples and features from X
        (m, n) = X.shape
        # Add a column of ones to X for the bias
        X = np.append(np.ones((m,1)), X, axis = 1)
        return np.matmul(X, self.weights)
    
    
    def error(self, Y, Y_pred, metric):
        '''Computes the error of Y_pred compared to true answer Y.
        Option of using the R2 or RSE as metrics.'''
        
        # Reshape data and use it to make a prediction
        m = Y.shape[0]
        Y = np.reshape(Y, (m,1))
        # Compute total squared error for use later
        rss = np.matmul(np.transpose(Y - Y_pred), Y - Y_pred)
        if metric == 'rse':
            rse_error = np.sqrt(rss/(m-2))
            print('RSE error is: ', rse_error)
        elif metric == 'r2':
            tss = np.matmul(np.transpose(Y - np.mean(Y)), Y - np.mean(Y))
            r2_error = 1 - rss/tss
            print('R squared error is: ', r2_error)
        else:
            print('Wrong metric specification!')
        
    def polynomial(self, X, degree):
        '''Combines the features into all possible combinations to form a polynomial of a given degree.<
        This should be used before doing anything else and on all the data.'''
        
        (m,n) = X.shape
        for deg in range(2, degree+1):
            for combin in itertools.combinations_with_replacement(range(n), deg):
                X = np.append(X, np.prod(X[:, list(combin)], axis=1).reshape(m,1), axis=1)
        return X
    

In [3]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
data = load_boston()

In [132]:
model = lin_reg(degree=3)
model.train(data['data'], data['target'], alpha=0.00000000000000001, limit = 100000, lambd = 1, regularization = 'L1')
model.cost[:15]

Reached the limit


[array([[296.0734585]]),
 array([[226.63649927]]),
 array([[193.35609595]]),
 array([[175.4968844]]),
 array([[164.36111601]]),
 array([[156.28967524]]),
 array([[149.73327024]]),
 array([[144.02677976]]),
 array([[138.87628683]]),
 array([[134.14434048]]),
 array([[129.76001635]]),
 array([[125.68120498]]),
 array([[121.8787602]]),
 array([[118.32979998]]),
 array([[115.01484191]])]

In [133]:
model.error(data['target'], model.predict(data['data']), 'r2')

R squared error is:  [[0.45655467]]


In [66]:
model.predict(data['data'])[:15]

array([[3.17609768e+28],
       [3.43898536e+28],
       [3.44124683e+28],
       [2.33380579e+28],
       [2.01098853e+28],
       [2.42642392e+28],
       [5.04993450e+28],
       [4.62679054e+28],
       [4.16972526e+28],
       [5.27743167e+28],
       [4.64605284e+28],
       [5.31920353e+28],
       [4.88645773e+28],
       [4.30821094e+28],
       [3.92337512e+28]])

Future: add regularization, polynomial regression, visualization cost, normalize data

Notes: normalize before poly, can't use normal equation for poly, need very small alpha (not sure if ok)

In [77]:
model.weights

array([[-7.29163710e+28],
       [-7.95655720e+28],
       [-1.04149756e+27],
       [ 3.56882105e+27],
       [ 4.89618978e+33],
       [ 1.81888218e+29],
       [-6.60180526e+27],
       [-1.08931187e+24],
       [ 5.52589906e+27],
       [-6.85269160e+26],
       [-1.38365445e+25],
       [ 7.04705483e+26],
       [ 1.51210794e+25],
       [-6.08838541e+26],
       [-2.17587400e+27],
       [ 7.89477646e+26],
       [-1.30768696e+27],
       [-4.52174822e+37],
       [ 1.76283121e+28],
       [ 3.44261248e+26],
       [-3.34465814e+24],
       [ 1.34764850e+27],
       [-3.63455629e+27],
       [ 2.25658152e+26],
       [ 3.53904546e+26],
       [-1.53192740e+25],
       [-1.68862156e+25],
       [ 6.56270229e+24],
       [ 6.17377228e+25],
       [ 6.01255302e+34],
       [ 4.87147319e+27],
       [ 4.06540116e+25],
       [-2.57508946e+24],
       [ 2.16968123e+25],
       [-1.65448409e+25],
       [ 2.84603638e+22],
       [-6.42701458e+24],
       [-2.70311378e+24],
       [ 3.1