In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error


In [3]:
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Ridge())
])


In [8]:
params = {
    'model__alpha': [0.01, 0.1, 1.0, 10.0, 100.0]
}

grid = GridSearchCV(pipeline, params, cv=5, scoring='r2')
grid.fit(X_train, y_train)

print("✅ Best Params:", grid.best_params_)
print("✅ Best CV Score:", grid.best_score_)


✅ Best Params: {'model__alpha': 0.01}
✅ Best CV Score: 0.6114839924016691


In [10]:
y_pred = grid.predict(X_test)
print("✅ Test R2 Score:", r2_score(y_test, y_pred))
print("✅ Test MSE:", mean_squared_error(y_test, y_pred))


✅ Test R2 Score: 0.5757879873121585
✅ Test MSE: 0.5558912301037898
