# Mini Project 2: Regression Problem

**Dataset**: California Housing Dataset (auto-download via sklearn)

**Goal**: Predict house prices using regression models.

In [1]:

import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:

models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "Random Forest": RandomForestRegressor(),
    "SVR": SVR()
}

best_model = None
best_score = -np.inf

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} R2 Score: {r2:.4f}")
    if r2 > best_score:
        best_score = r2
        best_model = model


Linear Regression R2 Score: 0.5758
Ridge Regression R2 Score: 0.5759
Lasso Regression R2 Score: 0.2842
Random Forest R2 Score: 0.8055
SVR R2 Score: -0.0165


In [3]:

# Save best model
with open("best_regression_model.pkl", "wb") as f:
    pickle.dump(best_model, f)
print("Best regression model saved as 'best_regression_model.pkl'")


Best regression model saved as 'best_regression_model.pkl'
