In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv('/home/waltertaya/ML/Batch-Gradient-Descent/housing.csv', header=None)
df.columns = ['Number of bedrooms', 'Sizes of the house(feet sqrd)', 'Prices in dollars']
# df.head()

In [4]:
X = np.array(df.drop('Prices in dollars', axis=1))
y = np.array(df['Prices in dollars'])
# print(X)
# print(y)

In [5]:
def predict(X,w,b):
    y_pred = []
    return np.array(np.matmul(w, X.T) + b)

In [6]:
def cost_func(y, y_pred):
    n = len(y)
    cost = 0
    for i in range(1, n):
        cost += (y[i] - y_pred[i])**2
    return cost / n

In [7]:
def batch_gradient_descent(X, y, w, b, learning_rate, iterations):
    N = len(y)  # Number of data points
    for i in range(iterations):
        # Predictions
        y_pred = predict(X, w, b)

        # Compute gradients
        dw = -(2/N) * np.dot(X.T, (y - y_pred))  # Gradient w.r.t weights
        db = -(2/N) * np.sum(y - y_pred)         # Gradient w.r.t bias

        # Update weights and bias
        w -= learning_rate * dw
        b -= learning_rate * db

        # Optionally, print the cost every 100 iterations for monitoring
        if i % 100 == 0:
            cost = cost_func(y, y_pred)
            print(f"Iteration {i}: Cost = {cost}")

    return w, b

In [8]:
from sklearn.preprocessing import StandardScaler

# Normalize the features using StandardScaler (or MinMaxScaler)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

w = np.zeros(X.shape[1])
b = 0
learning_rate = 0.01
iterations = 1000

# Run gradient descent
w, b = batch_gradient_descent(X_scaled, y, w, b, learning_rate, iterations)

# Final weights and bias after training
print(f"Trained weights: {w}")
print(f"Trained bias: {b}")

Iteration 0: Cost = 248886.36363636365
Iteration 100: Cost = 5595.894553587888
Iteration 200: Cost = 1464.1754085851371
Iteration 300: Cost = 946.4241206861439
Iteration 400: Cost = 655.3719839250393
Iteration 500: Cost = 459.3341293030831
Iteration 600: Cost = 325.118610994839
Iteration 700: Cost = 233.0537321983769
Iteration 800: Cost = 169.90980703403727
Iteration 900: Cost = 126.62481325294056
Trained weights: [ 14.05305762 231.89793199]
Trained bias: 440.4545447132748


In [9]:
w = np.array([ 14, 233])
b = 440
y_pred = predict(X_scaled, w, b)
print(y_pred)
print(cost_func(y, y_pred))

[264.50741284 265.35441967 362.26796078 441.65195812 171.11003105
 735.83186157 264.48178644 507.12507663 771.55857111 899.86128202
 156.24963977]
96.31045408478899
