In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

drop_cols = ['Unnamed: 0', 'id', 'date', 'zipcode', 'price']

X_train = train_df.drop(columns=[c for c in drop_cols if c in train_df.columns])
y_train = train_df['price'] / 1000 

X_test  = test_df.drop(columns=[c for c in drop_cols if c in test_df.columns])
y_test = test_df['price'] / 1000

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
X_train_gd = np.hstack([np.ones((X_train_scaled.shape[0], 1)), X_train_scaled])
X_test_gd = np.hstack([np.ones((X_test_scaled.shape[0], 1)), X_test_scaled])

#5.1
def gradient_descent(X, y, alpha, num_iters):
    N, d = X.shape
    theta = np.zeros(d)
    for _ in range(num_iters):
        gradient = (2/N) * (X.T @ (X @ theta - y))
        theta = theta - alpha * gradient
    return theta

In [4]:
#5.2
alphas = [0.01, 0.1, 0.5]
iterations_list = [10, 50, 100]

gd_results = []

for alpha in alphas:
    for iters in iterations_list:
        theta = gradient_descent(X_train_gd, y_train, alpha, iters)
        
        y_train_pred = X_train_gd @ theta
        y_test_pred = X_test_gd @ theta
        
        tr_mse = mean_squared_error(y_train, y_train_pred)
        te_mse = mean_squared_error(y_test, y_test_pred)
        te_r2 = r2_score(y_test, y_test_pred)
        tr_r2 = r2_score(y_train, y_train_pred)

        
        # Store for report
        gd_results.append({
            "Alpha": alpha,
            "Iterations": iters,
            "Train MSE": tr_mse,
            "Train R2": tr_r2,
            "Test MSE": te_mse,
            "Test R2": te_r2,
            "Final Theta": theta.tolist()
        })

gd_df = pd.DataFrame(gd_results)
print(gd_df)

   Alpha  Iterations      Train MSE       Train R2       Test MSE  \
0   0.01          10   2.357278e+05  -1.047365e+00   2.805687e+05   
1   0.01          50   6.972050e+04   3.944571e-01   9.704954e+04   
2   0.01         100   3.682035e+04   6.802045e-01   6.333304e+04   
3   0.10          10   3.510510e+04   6.951019e-01   6.163043e+04   
4   0.10          50   3.149726e+04   7.264371e-01   5.772248e+04   
5   0.10         100   3.148643e+04   7.265311e-01   5.763896e+04   
6   0.50          10   1.456064e+17  -1.264635e+12   1.626068e+17   
7   0.50          50   1.259542e+67  -1.093949e+62   1.406601e+67   
8   0.50         100  3.322792e+129 -2.885942e+124  3.710745e+129   

         Test R2                                        Final Theta  
0  -6.828036e-01  [95.19802483770326, 11.928618743287174, 20.283...  
1   4.179133e-01  [330.8955303896301, 6.005279760915583, 23.8214...  
2   6.201392e-01  [451.39764983387875, -3.6569193334206287, 19.2...  
3   6.303511e-01  [464.535716