# Linear Regression

## Import Libraries

In [332]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print("Libraries imported")

Libraries imported


## Model Architecture

In [333]:
class LinearRegression:
    
    def __init__(self, lr = 0.01, iters = 1000) -> None:
        self.lr = lr
        self.iters = iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        if len(X.shape) == 1:
            n_samples = 1
            n_features = 1
            self.weights  = 0
        else:
            n_samples, n_features = X.shape
            self.weights = np.zeros(n_features)
        
        self.bias = 0

        for i in range(self.iters):

            # y = mx + b
            y_pred = np.dot(X, self.weights) + self.bias
            # print(y_pred)

            cost = (1/n_samples) * np.sum((y_pred - y)**2) # cost function
            print(f"Iter: {i} | Cost: {cost}")

            # gradient descent
            dw = (-2/n_samples) * np.dot(X.T, (y - y_pred))
            db = (-2/n_samples) * np.sum(y - y_pred)
            # update weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred


## Dataset

In [334]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

# used to generate a random dataset with 100 samples and 3 features
X, y = make_regression(n_samples=100, n_features=2, noise=1, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Model Training

In [335]:
lr_model = LinearRegression(lr = 0.001, iters = 1000)
lr_model.fit(X_train, y_train)

Iter: 0 | Cost: 11892.034989108484
Iter: 1 | Cost: 11848.067542051176
Iter: 2 | Cost: 11804.265900713031
Iter: 3 | Cost: 11760.629429563762
Iter: 4 | Cost: 11717.15749554152
Iter: 5 | Cost: 11673.849468043221
Iter: 6 | Cost: 11630.704718914885
Iter: 7 | Cost: 11587.722622442026
Iter: 8 | Cost: 11544.902555340073
Iter: 9 | Cost: 11502.243896744834
Iter: 10 | Cost: 11459.74602820299
Iter: 11 | Cost: 11417.408333662639
Iter: 12 | Cost: 11375.230199463867
Iter: 13 | Cost: 11333.211014329361
Iter: 14 | Cost: 11291.350169355053
Iter: 15 | Cost: 11249.647058000817
Iter: 16 | Cost: 11208.101076081184
Iter: 17 | Cost: 11166.711621756098
Iter: 18 | Cost: 11125.478095521714
Iter: 19 | Cost: 11084.399900201248
Iter: 20 | Cost: 11043.476440935818
Iter: 21 | Cost: 11002.707125175366
Iter: 22 | Cost: 10962.091362669598
Iter: 23 | Cost: 10921.62856545896
Iter: 24 | Cost: 10881.31814786564
Iter: 25 | Cost: 10841.15952648464
Iter: 26 | Cost: 10801.152120174833
Iter: 27 | Cost: 10761.295350050097
Iter: 2

## Model Evaluation

In [336]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = lr_model.predict(X_test)
y_pred
mse = mean_squared_error(y_test, y_pred) # mean squared error formula: 1/n * sum(y_true - y_pred)^2
r2 = r2_score(y_test, y_pred) # r2 score formula: 1 - (sum(y_true - y_pred)^2 / sum(y_true - y_mean)^2)

print(f"mse: {mse:.2f}")
print(f"r2: {r2:.2f}")

mse: 273.67
r2: 0.97
