In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
df = pd.read_csv('/home/waltertaya/MachineLearning-Projects/Stochastic-Gradient-Descent/housing.csv', header=None)
df.columns = ['Number of bedrooms', 'Sizes of the house(feet sqrd)', 'Prices in dollars']
# df.head()

In [5]:
X = np.array(df.drop('Prices in dollars', axis=1))
y = np.array(df['Prices in dollars'])
# print(X)
# print(y)

In [6]:
def predict(X,w,b):
    y_pred = []
    return np.array(np.matmul(w, X.T) + b)

In [7]:
def cost_func(y, y_pred):
    n = len(y)
    cost = 0
    for i in range(1, n):
        cost += (y[i] - y_pred[i])**2
    return cost / n

In [8]:
def stochastic_gradient_descent(X, y, w, b, learning_rate, iterations):
    N = len(y)  # Number of data points
    for i in range(iterations):
        # Loop through each training example
        for j in range(N):
            # Predictions for the current example
            y_pred = np.dot(X[j], w) + b

            # Compute gradients for the current example
            dw = -(2) * X[j] * (y[j] - y_pred)  # Gradient w.r.t weights for example j
            db = -(2) * (y[j] - y_pred)         # Gradient w.r.t bias for example j

            # Update weights and bias
            w -= learning_rate * dw
            b -= learning_rate * db

        # Optionally, print the cost every 100 iterations for monitoring
        if i % 100 == 0:
            y_pred_all = predict(X, w, b)  # Predictions for the whole dataset
            cost = cost_func(y, y_pred_all)
            print(f"Iteration {i}: Cost = {cost}")

    return w, b

In [9]:
from sklearn.preprocessing import StandardScaler

# Normalize the features using StandardScaler (or MinMaxScaler)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

w = np.zeros(X.shape[1])
b = 0
learning_rate = 0.01
iterations = 1000

# Run gradient descent
w, b = stochastic_gradient_descent(X_scaled, y, w, b, learning_rate, iterations)

# Final weights and bias after training
print(f"Trained weights: {w}")
print(f"Trained bias: {b}")

Iteration 0: Cost = 144859.66370128014
Iteration 100: Cost = 70.96220065203661
Iteration 200: Cost = 33.82500140076176
Iteration 300: Cost = 33.60011805132122
Iteration 400: Cost = 33.63102206936622
Iteration 500: Cost = 33.63545182339658
Iteration 600: Cost = 33.63598396609795
Iteration 700: Cost = 33.63604672395088
Iteration 800: Cost = 33.63605410941904
Iteration 900: Cost = 33.63605497833674
Trained weights: [ -5.98584702 251.41172204]
Trained bias: 440.60223162002865


In [12]:
w = np.array([ -5.98584702, 251.41172204])
b = 440.60223162002865
y_pred = predict(X_scaled, w, b)
print(y_pred)
print(cost_func(y, y_pred))

[257.71717724 270.50195496 351.33194951 448.85973166 168.81034966
 718.80244863 269.56036603 507.63571322 781.09397265 907.66435776
 164.6465265 ]
33.636055079633245
