In [6]:
import numpy as np

m is number of features  
n is number of training examples  
X is of dimension (m,n) and Y is of dimension (m,1)

In [51]:
class lin_reg:
    '''Model that does linear regression on the data'''
    
    def train(self, X, Y, alpha = 0.0001, limit = 100000):
        '''Train the model on a dataset from the examples X and the labels Y.
        The algorithm uses gradient descent with learning rate 'alpha' for a max of 'limit' steps.
        X must in the format (# examples)x(# features) and Y is a column array.
        The final weights as well as a list of the costs calculated through the training 
        can be extracted using methods .weights and .cost.'''
        
        # Extract number of training examples and features from X
        (m, n) = X.shape
        # Add a column of ones to X for the bias
        X = np.append(np.ones((m,1)), X, axis = 1)
        Y = np.reshape(Y, (m,1))
        # Initialize the weights to zero
        self.weights = np.zeros((n+1 ,1))
        # Initialize the cost
        i = 0
        self.cost = []
        self.cost.append(1/m * np.matmul(np.transpose(Y - np.matmul(X, self.weights)), Y - np.matmul(X, self.weights)))
        # Update weights with gradient and compute new cost
        while True:
            self.weights = self.weights + alpha / m * np.matmul(np.transpose(X), Y - np.matmul(X, self.weights))
            self.cost.append(1/m * np.matmul(np.transpose(Y - np.matmul(X, self.weights)), Y - np.matmul(X, self.weights)))
            # Stop upgrading if cost doesn't lower of if reached limit
            if self.cost[i + 1] < self.cost[i]:
                i += 1
            else:
                break
            if i > limit:
                print('Gradient descent did not converge fast enough')
                break

    def predict(self, X):
        '''Use data X and trained model to predict labels using linear regression hypothesis.
        Output is an array with one value for each data point.'''
    
        # Extract number of training examples and features from X
        (m, n) = X.shape
        # Add a column of ones to X for the bias
        X = np.append(np.ones((m,1)), X, axis = 1)
        return np.matmul(X, self.weights)
    
    def error(self, X, Y, metric):
        '''Computes the error made by the model when making predictions from the data X.
        Option of using the R2 or RSE'''
        
        # Reshape data and use it to make a prediction
        m = X.shape[0]
        Y = np.reshape(Y, (m,1))
        Y_pred = self.predict(X)
        # Compute total squared error for use later
        rss = np.matmul(np.transpose(Y - Y_pred), Y - Y_pred)
        if metric == 'rse':
            rse_error = np.sqrt(rss/(m-2))
            print('RSE error is: ', rse_error)
        elif metric == 'r2':
            tss = np.matmul(np.transpose(Y - np.mean(Y)), Y - np.mean(Y))
            r2_error = 1 - rss/tss
            print('R squared error is: ', r2_error)
        else:
            print('Wrong metric specification!')
        
    #def gradient
    
    

In [4]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
data = load_boston()

In [52]:
model = lin_reg()
model.train(data['data'], data['target'], alpha=0.000001, limit = 10000)
model.cost[:15]

Gradient descent did not converge fast enough


[array([[592.146917]]),
 array([[349.66048201]]),
 array([[233.95288997]]),
 array([[178.13637393]]),
 array([[150.64094942]]),
 array([[136.56476656]]),
 array([[128.87251857]]),
 array([[124.24157953]]),
 array([[121.10160955]]),
 array([[118.70935353]]),
 array([[116.71199328]]),
 array([[114.94106971]]),
 array([[113.31522125]]),
 array([[111.79427765]]),
 array([[110.35759086]])]

In [54]:
model.error(data['data'], data['target'], 'rse')

RSE error is:  [[7.96512315]]


In [36]:
model.predict(data['data'])

array([[26.33323471],
       [23.90216462],
       [23.64444031],
       [23.19578018],
       [23.37212363],
       [23.49085986],
       [23.99589384],
       [24.59863327],
       [22.21251978],
       [23.86254698],
       [24.03671475],
       [24.91311395],
       [21.39202783],
       [23.15180904],
       [23.35616569],
       [22.70977555],
       [20.94341275],
       [22.63684883],
       [15.61670526],
       [22.72088691],
       [21.862538  ],
       [23.5361936 ],
       [22.94464651],
       [23.1134231 ],
       [23.45783694],
       [18.44426799],
       [22.66249403],
       [18.65550475],
       [23.88262154],
       [23.22135866],
       [20.5341029 ],
       [23.56689461],
       [12.58336332],
       [21.3366201 ],
       [15.69107651],
       [23.24884591],
       [21.5429086 ],
       [21.77132469],
       [20.63992956],
       [31.19848518],
       [31.30916503],
       [19.49547043],
       [19.39437284],
       [19.61412522],
       [21.02685165],
       [20

Future: add regularization, normal equation, polynomial regression, visualization cost