In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

drop_cols = ['Unnamed: 0', 'id', 'date', 'zipcode', 'price']

X_train = train_df.drop(columns=[c for c in drop_cols if c in train_df.columns])
y_train = train_df['price'] / 1000 

X_test  = test_df.drop(columns=[c for c in drop_cols if c in test_df.columns])
y_test = test_df['price'] / 1000

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
#6.2
def ridge_gradient_descent(X, y, alpha, num_iters, lam):
    N, d = X.shape
    theta = np.zeros(d)
    for _ in range(num_iters):
        theta_reg = np.copy(theta)
        theta_reg[0] = 0 
        ridge_grad = 2 * lam * theta_reg
        gradient = (2/N) * (X.T @ (X @ theta - y)) + ridge_grad
        theta = theta - alpha * gradient
    return theta

In [4]:
#6.3
np.random.seed(42)
N = 1000

X_sim = np.random.uniform(-2, 2, N)

e = np.random.normal(0, np.sqrt(2), N)

y_sim = 1 + 2 * X_sim + e

X_sim_bias = np.column_stack([np.ones(N), X_sim])

lambdas = [0, 1, 10, 100, 1000, 10000]
sim_results = []

for l in lambdas:
    t = ridge_gradient_descent(X_sim_bias, y_sim, 1e-4, 5000, l)
    y_pred = X_sim_bias @ t
    
    sim_results.append({
        "Lambda": l, 
        "Slope": t[1],
        "MSE": mean_squared_error(y_sim, y_pred),
        "R2": r2_score(y_sim, y_pred)
    })

df_sim = pd.DataFrame(sim_results)
print(df_sim)

   Lambda  Theta1 (Slope)       MSE        R2
0       0        1.434816  2.483693  0.650773
1       1        1.008344  3.316558  0.533666
2      10        0.232024  6.109465  0.140962
3     100        0.026027  7.127648 -0.002203
4    1000        0.002635  7.250618 -0.019493
5   10000       -0.000759  7.268565 -0.022017


As lambda increases, the slope consistently decreses, the MSE increases, and the R^2 decreases. When lambda is 0 which is linear regression, the slope is about 1.435 and the model achieves the best performance with the lowest MSE and the highest R^2. As lambda becomes larger and sloped approaches zero, model gradually loses it flexibility. This causes underfit of the data which is reflected by increasing MSE and decreasing R^2.â€¨For very larges values of lambda, the model stabilizes with slightly increasing MSE and negative R^2 values. Overall, theses results demonstrate that stronger regulation reduces variance but increases bias, and produces poor predictive performance when lambda is too large.