# Linear Regression

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print("Libraries imported")

Libraries imported


## Model Architecture

In [2]:
class LinearRegression:
    """
    Linear Regression Model

    Parameters:
    - lr (float): Learning rate for gradient descent optimization.
    - iters (int): Number of iterations for gradient descent.

    Methods:
    - fit(X, y): Fit the linear regression model to the training data.
    - predict(X): Predict target values for new data.

    Attributes:
    - weights (array): Learned coefficients for the features.
    - bias (float): Learned intercept term.
    """

    def __init__(self, lr=0.01, iters=1000):
        """
        Initialize the LinearRegression model.

        Parameters:
        - lr (float): Learning rate for gradient descent optimization.
        - iters (int): Number of iterations for gradient descent.
        """
        self.lr = lr
        self.iters = iters
        self.weights = None
        self.bias = None
        self.costs = []

    def fit(self, X, y):
        """
        Fit the linear regression model to the training data.

        Parameters:
        - X (array-like): Training feature matrix.
        - y (array-like): Target values.
        """
        # initialize weights and bias
        if len(X.shape) == 1:
            n_samples = 1
            n_features = 1
            self.weights = 0
        else:
            n_samples, n_features = X.shape
            self.weights = np.zeros(n_features)

        self.bias = 0

        # loop through gradient descent iterations
        for i in range(self.iters):
            # calculate predicted y values and the cost
            y_pred = np.dot(X, self.weights) + self.bias

            # calculate the mean squared error
            self.costs.append((1/n_samples) * np.sum((y_pred - y)**2))

            # calculate the gradients
            dw = (-2/n_samples) * np.dot(X.T, (y - y_pred))
            db = (-2/n_samples) * np.sum(y - y_pred)
            
            # update the weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        """
        Predict target values for new data.

        Parameters:
        - X (array-like): New data feature matrix.

        Returns:
        - y_pred (array): Predicted target values.
        """
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred


## Dataset

In [3]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

# used to generate a random dataset with 100 samples and 3 features
X, y = make_regression(n_samples=100, n_features=2, noise=1, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Model Training

In [4]:
lr_model = LinearRegression(lr = 0.001, iters = 1000)
lr_model.fit(X_train, y_train)
lr_model.costs

[11892.034989108484,
 11848.067542051176,
 11804.265900713031,
 11760.629429563762,
 11717.15749554152,
 11673.849468043221,
 11630.704718914885,
 11587.722622442026,
 11544.902555340073,
 11502.243896744834,
 11459.74602820299,
 11417.408333662639,
 11375.230199463867,
 11333.211014329361,
 11291.350169355053,
 11249.647058000817,
 11208.101076081184,
 11166.711621756098,
 11125.478095521714,
 11084.399900201248,
 11043.476440935818,
 11002.707125175366,
 10962.091362669598,
 10921.62856545896,
 10881.31814786564,
 10841.15952648464,
 10801.152120174833,
 10761.295350050097,
 10721.58863947047,
 10682.031414033328,
 10642.623101564626,
 10603.36313211014,
 10564.250937926767,
 10525.285953473855,
 10486.467615404561,
 10447.795362557255,
 10409.268635946935,
 10370.886878756712,
 10332.649536329282,
 10294.556056158488,
 10256.605887880862,
 10218.798483267223,
 10181.13329621433,
 10143.60978273653,
 10106.227400957454,
 10068.985611101753,
 10031.883875486867,
 9994.921658514813,
 9

## Model Evaluation

In [5]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = lr_model.predict(X_test)
y_pred
mse = mean_squared_error(y_test, y_pred) # mean squared error formula: 1/n * sum(y_true - y_pred)^2
r2 = r2_score(y_test, y_pred) # r2 score formula: 1 - (sum(y_true - y_pred)^2 / sum(y_true - y_mean)^2)

print(f"mse: {mse:.2f}")
print(f"r2: {r2:.2f}")

mse: 273.67
r2: 0.97
