# Linear Regression and Gradient Descent

Highly Recommended: Use Google Colab to Avoid Errors Related to 3D plots rendering.

Make sure you have plotly installed (pip install plotly) and numpy installed as well if using local IDEs and set pio.renderers.default to 'browser' in each cell. If using jupyter application, set pio.renderers.default to 'jupyter'. No change is needed if using Google Colab

IN THE FOLLOWING, CELL COMPLETE THE TODOs TO CODE LINEAR REGRESSION AND GRADIENT DESCENT FROM SCRATCH

In [13]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio

# Generate 1D synthetic linear data
np.random.seed(4)
x = np.linspace(0, 10, 100).reshape(-1, 1)
y = 3 * x + 7 + np.random.normal(0, 2, size=(100, 1))

# TO DO: Cost function (MSE for linear regression)
# First calculate the y_hat (predicted values) and then average difference of actual and predicted values across all points
def compute_cost(w, b):
    y_hat = w * x + b
    return np.mean((y - y_hat) ** 2)

# TO DO: Manual gradient descent to find optimal w, b
def gradient_descent_path(x, y, lr=0.01, epochs=2000):
    # TO DO: Complete this loop of gradient descent. You can initialize weights and bias with 0.0
    # NOTE THAT: save w, b, and cost for each epoch, and return them as 3 arrays. This will be used in next step to visualize path of gradient descent.

    w = 0.0
    b = 0.0

    w_path = []
    b_path = []
    cost_path = []

    for epoch in range(epochs):
        y_hat = w*x + b 
        error = y_hat - y

        dw = np.mean(error * x)
        db = np.mean(error)

        w -= lr * dw 
        b -= lr * db 

        cost = np.mean((y - (w * x + b)) ** 2)
        w_path.append(w)
        b_path.append(b)
        cost_path.append(cost)

    return np.array(w_path), np.array(b_path), np.array(cost_path)


# Grid for (w, b)
w_range = np.linspace(-2, 6, 200)
b_range = np.linspace(0, 14, 200)
W, B = np.meshgrid(w_range, b_range)
Z = np.zeros_like(W)
    

# Compute cost surface
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        Z[i, j] = compute_cost(W[i, j], B[i, j])

w_path, b_path, cost_path = gradient_descent_path(x, y)
w_opt, b_opt, optimal_cost = w_path[-1], b_path[-1], cost_path[-1]

# Plotting interactive cost plot
fig = go.Figure(data=[
    go.Surface(z=Z, x=W, y=B, colorscale='Viridis', opacity=0.85),
    go.Scatter3d(
        x=[w_opt],
        y=[b_opt],
        z=[optimal_cost],
        mode='markers',
        marker=dict(size=6, color='red'),
        name='Optimal (w, b)'
    )
])

fig.update_layout(
    title="Interactive Cost Surface for Linear Regression (MSE)",
    scene=dict(
        xaxis_title='w',
        yaxis_title='b',
        zaxis_title='MSE Cost'
    ),
    width=800,
    height=600
)

pio.renderers.default = 'browser'  # or 'jupyter' or 'browser'
fig.show()

# Print cost comparison
print(f"Surface min cost: {Z.min():.6f}")
print(f"Gradient descent cost: {optimal_cost:.6f}")
print(f"Found at: w={w_opt:.4f}, b={b_opt:.4f}")

Surface min cost: 3.789140
Gradient descent cost: 3.786755
Found at: w=3.0070, b=7.0243


JUST RUN THE FOLLOWING CELL TO OBSERVE GRADIENT DESCENT PATH

In [14]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio



# Surface grid
w_range = np.linspace(-2, 6, 200)
b_range = np.linspace(0, 14, 200)
W, B = np.meshgrid(w_range, b_range)
Z = np.zeros_like(W)

# Compute cost surface
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        Z[i, j] = compute_cost(W[i, j], B[i, j]) # compute_cost() from previious cell


w_path, b_path, cost_path = gradient_descent_path(x, y) # gradient_descent_path() from previous cell

# Plot interactive surface and ball path
fig = go.Figure(data=[
    go.Surface(z=Z, x=W, y=B, colorscale='Viridis', opacity=0.85),
    go.Scatter3d(
        x=w_path,
        y=b_path,
        z=cost_path,
        mode='lines+markers',
        marker=dict(size=3, color='red'),
        line=dict(color='red', width=2),
        name='Gradient Descent Path'
    ),
    go.Scatter3d(
        x=[w_path[-1]],
        y=[b_path[-1]],
        z=[cost_path[-1]],
        mode='markers',
        marker=dict(size=6, color='black'),
        name='Final Point'
    )
])

fig.update_layout(
    title="Gradient Descent Path on Linear Regression MSE Surface",
    scene=dict(
        xaxis_title='w',
        yaxis_title='b',
        zaxis_title='Cost',
        camera=dict(eye=dict(x=1.3, y=1.3, z=0.7)),
        xaxis=dict(range=[-2, 6]),
        yaxis=dict(range=[0, 14]),
        zaxis=dict(range=[0, max(cost_path)])
    ),
    width=800,
    height=600
)

# Set renderer and show plot
pio.renderers.default = 'browser'  # or 'jupyter' or 'browser'
fig.show()

# Final metrics
print(f"Surface min cost: {Z.min():.6f}")
print(f"Gradient descent cost: {cost_path[-1]:.6f}")
print(f"Found at: w = {w_path[-1]:.4f}, b = {b_path[-1]:.4f}")

Surface min cost: 3.789140
Gradient descent cost: 3.786755
Found at: w = 3.0070, b = 7.0243


NOW, LET US OBSERVE NLL FOR LINEAR REGRESSION INSTEAD OF MSE LOSS.  
JUST RUN THE FOLLOWING CELL TO SEE THE CORRESPONDING MSE AND NLL CURVES AND ANSWER THE QUESTIONS IN NEXT CELL.

In [15]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio

# Generate linear regression data
np.random.seed(4)
x = np.linspace(0, 10, 100).reshape(-1, 1)
y = 3 * x + 7 + np.random.normal(0, 2, size=(100, 1))

# Cost functions
def compute_cost(w, b, loss='nll'):
    y_hat = w * x + b
    eps = 1e-8
    if loss == 'nll':
        residuals = y - y_hat
        sigma_sq = np.var(residuals)
        return 0.5 * np.mean((residuals ** 2) / (sigma_sq + eps) + np.log(2 * np.pi * (sigma_sq + eps)))
    elif loss == 'mse':
        return np.mean((y - y_hat)**2)

# Grid for (w, b)
w_range = np.linspace(-2, 6, 200)
b_range = np.linspace(0, 14, 200)
W, B = np.meshgrid(w_range, b_range)
Z_nll = np.zeros_like(W)
Z_mse = np.zeros_like(W)

# Compute both surfaces
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        Z_nll[i, j] = compute_cost(W[i, j], B[i, j], loss='nll')
        Z_mse[i, j] = compute_cost(W[i, j], B[i, j], loss='mse')

# Plot NLL
fig_nll = go.Figure()
fig_nll.add_trace(go.Surface(z=Z_nll, x=W, y=B, colorscale='Viridis'))
fig_nll.update_layout(
    title="NLL Cost Surface (Linear Regression - Gaussian Likelihood)",
    scene=dict(xaxis_title='w', yaxis_title='b', zaxis_title='NLL Cost'),
    width=800, height=600
)
fig_nll.show()

# Plot MSE
fig_mse = go.Figure()
fig_mse.add_trace(go.Surface(z=Z_mse, x=W, y=B, colorscale='Plasma'))
fig_mse.update_layout(
    title="MSE Cost Surface (Linear Regression)",
    scene=dict(xaxis_title='w', yaxis_title='b', zaxis_title='MSE Cost'),
    width=800, height=600
)
fig_mse.show()

pio.renderers.default = 'browser' # or 'jupyter' or 'browser'

QUESTIONS:
1. How is the NLL curve different from the MSE curve?

    The MSE curve is more smooth while the NLL is sharper and steeper near the center.
    
2. What potential problems can this NLL curve cause and how would they affect the gradient descent process?

    The steep curves can cause large gradiate steps which might make the algo overshoot. The MSE curve is more stable, which makes it easier for gradient descent to find a global minimum.

3. WHICH ONE IS BETTER FOR LINEAR REGRESSION (OBSERVE THE CURVES AND ANSWER)?

    MSE is better for linear regression since it is smooth and predictable. It offers a stable convergence and has only one global minimum.