## Implement linear regression

In [1]:
import pandas as pd
import numpy as np

In [2]:
def linear_regression(x, y, interations=100, learning_rate=0.01):
    '''
    M data point
    N columns (features)
    N+1 total beta's beta0 ... beta n
    '''
    n, m = len(x[0]), len(x)
    beta_0, beta_other = initialize_params(n)
    for _ in range(iterations):
        gradient_beta_0, gradient_beta_other = compute_gradient(x, y, beta_0, beta_other, n, m)
        beta_0, beta_other = update_params(beta_0, beta_other, gradient_beta_0, gradient_beta_other, learning_rate)
    return beta_0, beta_other

def initialize_params(dimensions):
    beta_0 = 0
    beta_other = [random.random() for _ in range(dimensions)]
    return beta_0, beta_other

def compute_gradient(x, y, beta_0, beta_other, dimension, m):
    gradient_beta_0 = 0
    gradient_beta_other = [0] * dimension
    
    for i in range(m):
        y_i_hat = sum(x[i][j] * beta_other[j] for j in range(dimension)) + beta_0
        derror_dy = 2 * (y[i] - y_i_hat) # derivative of error, if y over estimated, then y derror_dy is negative
        for j in range(dimension):
            gradient_beta_other[j] += derror_dy * x[i][j]/m # m average over all datapoints
        gradient_beta_0 += derror_dy / m
    return gradient_beta_0, gradient_beta_other

def update_params(beta_0, beta_other, gradient_beta_0, gradient_beta_other, learning_rate):
    beta_0 += gradient_beta_0 * learning_rate # scale by multiplying learning rate
    for i in range(len(beta_1)):
        beta_other[i] += (gradient_beta_other[i] * learning_rate) # beta = beta + negative gradient
    return beta_0, beta_other

In [3]:
## Time and space complexity

### Compute gradient, O(MN)
### update_params, O(N) 
## O(MNI)

### beta_0, gradient_beta_0: singal_value O(1)
### beta_other, gradient_beta_other: N values O(N)
## O(N)