In [1]:
# Imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    shuffle=True,
                                                    random_state=43)

rf = RandomForestRegressor(random_state=43)

param_grid = {
    'max_depth': [5, 10, 15],
    'n_estimators': [10, 50, 100],
}

grid_search = GridSearchCV(estimator=rf,
                           param_grid=param_grid,
                           cv=5,
                           n_jobs=-1,
                           scoring='neg_mean_squared_error')

grid_search.fit(X_train, y_train)

best_rf = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

print("Best score on validation set:", -grid_search.best_score_)

print("\nCV Results:")
for mean_score, params in zip(grid_search.cv_results_['mean_test_score'], grid_search.cv_results_['params']):
    print(f"Mean validation score: {-mean_score:.4f} for {params}")

y_pred = best_rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("\nMean Squared Error on test set:", mse)

r2 = best_rf.score(X_test, y_test)
print("R-squared score on test set:", r2)

Best parameters: {'max_depth': 15, 'n_estimators': 100}
Best score on validation set: 0.260442963594118

CV Results:
Mean validation score: 0.4634 for {'max_depth': 5, 'n_estimators': 10}
Mean validation score: 0.4546 for {'max_depth': 5, 'n_estimators': 50}
Mean validation score: 0.4523 for {'max_depth': 5, 'n_estimators': 100}
Mean validation score: 0.3112 for {'max_depth': 10, 'n_estimators': 10}
Mean validation score: 0.2943 for {'max_depth': 10, 'n_estimators': 50}
Mean validation score: 0.2919 for {'max_depth': 10, 'n_estimators': 100}
Mean validation score: 0.2888 for {'max_depth': 15, 'n_estimators': 10}
Mean validation score: 0.2637 for {'max_depth': 15, 'n_estimators': 50}
Mean validation score: 0.2604 for {'max_depth': 15, 'n_estimators': 100}

Mean Squared Error on test set: 0.24477404979641443
R-squared score on test set: 0.8095845027708665
