In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

# as_frame=True, the data is returned as a Pandas DataFrame (with column names)
housing = fetch_california_housing(as_frame=True)
df = housing.frame
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [6]:
from sklearn.model_selection import train_test_split

# axis=0 is by rows, axis=1 is by columns
X = df.drop("MedHouseVal", axis=1) # all features except MedHouseVal
y = df["MedHouseVal"] # target column

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.4f}")
print(f"R²:   {r2:.4f}")

RMSE: 0.7456
R²:   0.5758


In [8]:
# Polynomial Regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline


# degree=2 means adding all squares and pairwise interactions
# Example: if features = [x1, x2], it will generate [x1, x2, x1², x1*x2, x2²]
# include_bias=False means don’t add the constant 1 column
poly_model = Pipeline([
    ("poly_features", PolynomialFeatures(degree=2, include_bias=False)),
    ("lin_reg", LinearRegression())
])

poly_model.fit(X_train, y_train)
y_poly_pred = poly_model.predict(X_test)

rmse_poly = np.sqrt(mean_squared_error(y_test, y_poly_pred))
r2_poly = r2_score(y_test, y_poly_pred)

print(f"Polynomial RMSE: {rmse_poly:.4f}")
print(f"Polynomial R²:   {r2_poly:.4f}")


Polynomial RMSE: 0.6814
Polynomial R²:   0.6457
