In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_df = train_df.drop(columns=['id', 'date', 'zipcode', 'Unnamed: 0'], errors='ignore')
test_df = test_df.drop(columns=['id', 'date', 'zipcode', 'Unnamed: 0'], errors='ignore')

X_raw_train = train_df['sqft_living'].values
X_raw_test = test_df['sqft_living'].values

y_train_poly = train_df['price'] / 1000
y_test_poly = test_df['price'] / 1000

X_train_multi = train_df.drop(columns=['price'])
y_train = train_df['price'] / 1000
X_test_multi = test_df.drop(columns=['price'])
y_test = test_df['price'] / 1000

train_mean = X_train_multi.mean()
train_std = X_train_multi.std()

X_train_scaled = (X_train_multi - train_mean) / train_std
X_test_scaled = (X_test_multi - train_mean) / train_std

X_train_bias = np.c_[np.ones(X_train_scaled.shape[0]), X_train_scaled]
X_test_bias = np.c_[np.ones(X_test_scaled.shape[0]), X_test_scaled]

def gradient_descent(X, y, lr, iterations):
    m, n = X.shape
    theta = np.zeros(n)
    
    for _ in range(iterations):
        predictions = X.dot(theta)
        errors = predictions - y
        
        gradient = (1/m) * X.T.dot(errors)
        
        theta = theta - lr * gradient
        
    return theta

learning_rates = [0.01, 0.1, 0.5]
iterations_list = [10, 50, 100]

gd_results = []

print(f"{'LR':<6} {'Iter':<6} {'Train MSE':<12} {'Train R2':<10} {'Test MSE':<12} {'Test R2':<10}")

for lr in learning_rates:
    for iters in iterations_list:
        theta = gradient_descent(X_train_bias, y_train, lr, iters)
        
        train_pred = X_train_bias.dot(theta)
        test_pred = X_test_bias.dot(theta)
        
        tr_mse = mean_squared_error(y_train, train_pred)
        tr_r2 = r2_score(y_train, train_pred)
        te_mse = mean_squared_error(y_test, test_pred)
        te_r2 = r2_score(y_test, test_pred)
        
        gd_results.append({
            'Learning Rate': lr,
            'Iterations': iters,
            'Train MSE': tr_mse,
            'Train R2': tr_r2,
            'Test MSE': te_mse,
            'Test R2': te_r2
        })
        
        print(f"{lr:<6} {iters:<6} {tr_mse:<12.2f} {tr_r2:<10.4f} {te_mse:<12.2f} {te_r2:<10.4f}")
        print(f"Theta (First 5): {theta[:5]}") 
        print(f"Theta (Norm): {np.linalg.norm(theta):.4f}")
print(pd.DataFrame(gd_results).to_string(index=False))

--- Part 1: Polynomial Regression Results ---
LR     Iter   Train MSE    Train R2   Test MSE     Test R2   
0.01   10     294825.43    -1.5606    350564.31    -1.1026   
Theta (First 5): [49.76098656  7.88078028 12.88591338 19.46535631  3.65593961]
Theta (Norm): 67.6450
0.01   50     138307.43    -0.2012    170397.91    -0.0220   
Theta (First 5): [205.560702    12.52628095  25.93459743  47.92367226   5.46112182]
Theta (Norm): 238.5007
0.01   100    70123.55     0.3910     97494.50     0.4152    
Theta (First 5): [329.92617388   6.05164215  23.84548765  54.61276376   3.66665972]
Theta (Norm): 362.3789
0.1    10     66503.52     0.4224     93566.73     0.4388    
Theta (First 5): [338.95740148   5.71876169  23.71847135  55.22067503   3.41073429]
Theta (Norm): 371.7393
0.1    50     31579.17     0.7257     58012.94     0.6520    
Theta (First 5): [517.73273293 -11.3003228   16.58336424  57.67459768   4.5047538 ]
Theta (Norm): 548.7962
0.1    100    31497.74     0.7264     57725.44     0.

In [None]:
# Notice that when LR=0.01, it will learn in a slow speed and MSE is still high with 10 or 50 even 100 iterations although it do going down
# LR = 0.1 shows a proper learning speed that after 50 and 100 iteration get a close result as Scikit or close form solution (in previous problems)
# LR = 0.5 make the learning step too big and cause a super huge MSE and negative R2 which might mean it do not converge