# Q1) Implement Linear Regression and calculate sum of residual error on the following Datasets. x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], y = [1, 3, 2, 5, 7, 8, 8, 9, 10, 12]

# 1.1) Compute the regression coefficients using analytic formulation and calculate Sum Squared Error (SSE) and R 2 value.

In [1]:
import numpy as np

x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

mean_x = np.mean(x)
mean_y = np.mean(y)

numerator = np.sum((x - mean_x) * (y - mean_y))
denominator = np.sum((x - mean_x) ** 2)
slope = numerator / denominator
intercept = mean_y - slope * mean_x

y_pred = slope * x + intercept

SSE = np.sum((y - y_pred) ** 2)

SST = np.sum((y - mean_y) ** 2)
R_squared = 1 - (SSE / SST)


print("SSE:", SSE)
print("R^2:", R_squared)


SSE: 5.624242424242423
R^2: 0.952538038613988


In [22]:
import numpy as np

x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])


def full_batch_gradient_descent(x, y, learning_rate, num_iterations):

    theta = np.zeros(2)
    m = len(y)
    X = np.vstack((np.ones_like(x), x)).T
    for _ in range(num_iterations):
        y_pred = X.dot(theta)
        theta -= (1/m) * learning_rate * X.T.dot(y_pred - y)
    
    return theta

def stochastic_gradient_descent(x, y, learning_rate, num_iterations):
    theta = np.zeros(2)
    m = len(y)
    for _ in range(num_iterations):
        for i in range(m):
            rand_index = np.random.randint(0, m)
            xi = x[rand_index]
            yi = y[rand_index]
            y_pred = np.dot(xi, theta)
            theta -= learning_rate * xi * (y_pred - yi)
    
    return theta


learning_rate = 0.01
num_iterations = 1000

theta_full_batch = full_batch_gradient_descent(x, y, learning_rate, num_iterations)
theta_stochastic = stochastic_gradient_descent(x, y, learning_rate, num_iterations)

X = np.vstack((np.ones_like(x), x)).T

y_pred_full_batch = X.dot(theta_full_batch)
y_pred_stochastic = X.dot(theta_stochastic)

SSE_full_batch = np.sum((y - y_pred_full_batch) ** 2)
SSE_stochastic = np.sum((y - y_pred_stochastic) ** 2)

mean_y = np.mean(y)
SST = np.sum((y - mean_y) ** 2)
R_squared_full_batch = 1 - (SSE_full_batch / SST)
R_squared_stochastic = 1 - (SSE_stochastic / SST)

# Step 8: Print results
print("Full-batch Gradient Descent:")
print("\nCoefficients:", theta_full_batch)
print("SE:", SSE_full_batch)
print("R-squared value:", R_squared_full_batch)

print("\nStochastic Gradient Descent:")
print("\nCoefficients:", theta_stochastic)
print("SSE:", SSE_stochastic)
print("R-squared value:", R_squared_stochastic)


Full-batch Gradient Descent:

Coefficients: [1.17580361 1.17935476]
SE: 5.634861529064238
R-squared value: 0.9524484259150697

Stochastic Gradient Descent:

Coefficients: [1.43665929 1.43665929]
SSE: 31.149481351391834
R-squared value: 0.7371351784692672


# 1.2) Download Boston Housing Rate Dataset. Analyse the input attributes and find out the attribute that best follow the linear relationship with the output price. Implement both the analytic formulation and gradient descent (Full-batch, stochastic) on LMS loss formulation to compute the coefficients of regression matrix and compare the results.

In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

housing_data = pd.read_csv("housing.csv")

selected_attribute = 'median_income'
X = housing_data[selected_attribute].values.reshape(-1, 1)
y = housing_data['median_house_value'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_with_intercept = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_with_intercept = np.c_[np.ones(X_test.shape[0]), X_test]

theta_analytic = np.linalg.inv(X_train_with_intercept.T.dot(X_train_with_intercept)).dot(X_train_with_intercept.T).dot(y_train)
print("Coefficients using Analytic Formulation:", theta_analytic)

def full_batch_gradient_descent(X, y, learning_rate, num_iterations):
    theta = np.zeros(X.shape[1])
    for _ in range(num_iterations):
        y_pred = X.dot(theta)
        theta -= (1/len(y)) * learning_rate * X.T.dot(y_pred - y)
    return theta

learning_rate = 0.01
num_iterations = 1000
theta_full_batch = full_batch_gradient_descent(X_train_with_intercept, y_train, learning_rate, num_iterations)
print("Coefficients using Full-batch Gradient Descent:", theta_full_batch)

def stochastic_gradient_descent(X, y, learning_rate, num_iterations):
    theta = np.zeros(X.shape[1])
    for _ in range(num_iterations):
        for i in range(len(y)):
            rand_index = np.random.randint(0, len(y))
            xi = X[rand_index]
            yi = y[rand_index]
            y_pred = np.dot(xi, theta)
            theta -= learning_rate * xi * (y_pred - yi)
    return theta

theta_stochastic = stochastic_gradient_descent(X_train_with_intercept, y_train, learning_rate, num_iterations)
print("Coefficients using Stochastic Gradient Descent:", theta_stochastic)

Coefficients using Analytic Formulation: [44459.72916908 41933.84939381]
Coefficients using Full-batch Gradient Descent: [39148.47787113 43047.96802282]
Coefficients using Stochastic Gradient Descent: [43043.23438082 49792.8048704 ]


In [30]:

y_pred_analytic = X_test_with_intercept.dot(theta_analytic)
y_pred_full_batch = X_test_with_intercept.dot(theta_full_batch)
y_pred_stochastic = X_test_with_intercept.dot(theta_stochastic)

SSE_analytic = np.sum((y - y_pred_analytic) ** 2)
SSE_full_batch = np.sum((y - y_pred_full_batch) ** 2)
SSE_stochastic = np.sum((y - y_pred_stochastic) ** 2)

mean_y = np.mean(y)
SST = np.sum((y - mean_y) ** 2)

R_squared_analytic = 1 - (SSE_analytic / SST)
R_squared_full_batch = 1 - (SSE_full_batch / SST)
R_squared_stochastic = 1 - (SSE_stochastic / SST)

print("SSE and R-squared value:")
print("Analytic Formulation: SSE =", SSE_analytic, ", R-squared =", R_squared_analytic)
print("Full-batch Gradient Descent: SSE =", SSE_full_batch, ", R-squared =", R_squared_full_batch)
print("Stochastic Gradient Descent: SSE =", SSE_stochastic, ", R-squared =", R_squared_stochastic)


SSE and R-squared value:
Analytic Formulation: SSE = 144713469420465.44 , R-squared = 0.47344749180719903
Full-batch Gradient Descent: SSE = 144830458695665.94 , R-squared = 0.47302181618394723
Stochastic Gradient Descent: SSE = 170532359763553.44 , R-squared = 0.3795032202521611
