# Linear Algebra to linear regression

In [None]:
import numpy as np

class SimpleLinearRegression():
    def __init__(self):
        self.coefficient = None
        self.intercept = None

    def fit(self, x, y):
        '''
            Given a dataset with 1 input feature x and output feature y,
            estimates the coefficient and compute the intercept.
        '''
        self.coefficient = self._coefficient_estimate(x, y)
        self.intercept = self._compute_intercept(x, y)

    def predict(self, x):
        '''
            y = b_0 + b_1*x
        '''
        x_times_coeff = np.multiply(x, self.coefficient)
        return np.add(x_times_coeff, self.intercept)

    def r2_score(self, y_true, y_pred):
        '''
            r2 = 1 - (rss/tss)
            rss = sum_{i=0}^{n} (y_i - y_hat)^2
            tss = sum_{i=0}^{n} (y_i - y_bar)^2
        '''
        y_average = np.average(y_true)

        residual_sum_of_squares = 0
        total_sum_of_squares = 0

        for i in range(len(y_true)):
            residual_sum_of_squares += (y_true[i] - y_pred[i])**2
            total_sum_of_squares += (y_true[i] - y_average)**2

        return 1 - (residual_sum_of_squares/total_sum_of_squares)

    def _compute_intercept(self, x, y):
        '''
            intercept = y_bar - coefficient*x_bar

            WHERE:  y_bar = average(y),
                    x_bar = average(x),
                    coefficient = coefficient already estimated
        '''
        # find the average of the array x
        x_average = np.average(x)

        # multiply the coefficient and the average of the x values
        mul = self.coefficient*x_average

        return np.average(y) - mul

    def _coefficient_estimate(self, x, y):
        '''
            coefficient_{x,y} = ∑_{i=0}^{n} (x_i - x_bar) * (y_i - y_bar)
                                _________________________________________
                                ∑_{i=0}^{n} (x_i - x_bar)^2
        '''
        numerator = 0
        denominator = 0

        for i in range(len(x)):
            x_value = x[i]
            y_value = y[i]
            x_average = np.average(x)
            y_average = np.average(y)

            numerator += (x_value - x_average) * (y_value - y_average)
            denominator += (x_value - x_average)**2

        return numerator / denominator

In [None]:
from load_dataset import x_train, x_test, y_train, y_test
from simple_linear_regression import SimpleLinearRegression

# pick a single feature to estimate y
x_train = x_train['LSTAT'].values
x_test = x_test['LSTAT'].values
y_train = y_train.values
y_test = y_test.values

# fit to data
slr = SimpleLinearRegression()
slr.fit(x_train, y_train)

# make predictions and score
pred = slr.predict(x_test)
score = slr.r2_score(y_test, pred)
print(f'Final R^2 score: {score}')

multi

In [None]:
def predict_polynomial(self, x):
    '''
        y = β0 + β1*x + β2*x^2 + ... + βm*x_i^m
    '''
    predictions = []
    for index, row in x.iterrows():
        # treating each feature as a variable that needs to be raised to the power of m
        polynomial_values = [feature**i+1 for i, feature in enumerate(row.values)]

        pred = np.multiply(polynomial_values, self.coefficients)
        pred = sum(pred)
        pred += self.intercept

        predictions.append(pred)

    return predictions