In [5]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

housing = fetch_california_housing()
X, y = housing['data'], housing['target']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=True, random_state=43
)

pipeline = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('lr', LinearRegression())]
pipe = Pipeline(pipeline)

cv_results = cross_validate(pipe, X_train, y_train, cv=10, scoring='r2')

scores = cv_results['test_score']

print("Scores on validation sets:")
print(scores)

print("\nMean of scores on validation sets:")
print(np.mean(scores))

print("\nStandard deviation of scores on validation sets:")
print(np.std(scores))

Scores on validation sets:
[0.62433594 0.61648956 0.62486602 0.59891024 0.59284295 0.61307055
 0.54630341 0.60742976 0.60014575 0.59574508]

Mean of scores on validation sets:
0.6020139252674299

Standard deviation of scores on validation sets:
0.0214983822773466
