In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [2]:
import tarfile
import urllib.request

DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"

#creates a local path for the data and extracts the tar (.tgz) file
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
    if not os.path.isdir(housing_path): #checks wheteher local directory exist at '/datasets/housing' 
        os.makedirs(housing_path) #,if not create one!
    tgz_path = os.path.join(housing_path, "housing.tgz") # creates the path: datasets/housing/housing.tgz
    urllib.request.urlretrieve(housing_url, tgz_path) #get the content from the first atribute into the second atribute (local)
    #extract the tar file
    housing_tgz = tarfile.open(tgz_path)
    housing_tgz.extractall(path=housing_path)
    housing_tgz.close()

In [3]:
import pandas as pd

#load the csv file that is extracted by the help of above function
def load_housing_data(housing_path=HOUSING_PATH):
    csv_path = os.path.join(housing_path, "housing.csv")
    return pd.read_csv(csv_path)

# Data Import and Test/Train Split (Stratified)

In [4]:
#Load data
fetch_housing_data()
housing = load_housing_data()

In [5]:
#lets create a temporary feature for stratficiation
housing["income_cat"] = pd.cut(housing["median_income"],
                               bins=[0., 1.5, 3.0, 4.5, 6., np.inf],
                               labels=[1, 2, 3, 4, 5])

In [6]:
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

#create stratified train/test splits
for train_index, test_index in split.split(housing, housing["income_cat"]):
    strat_train_set = housing.loc[train_index]
    strat_test_set = housing.loc[test_index]
    
#drop temporary cat features
for set_ in (strat_train_set, strat_test_set):
    set_.drop("income_cat", axis=1, inplace=True)

# Prepare the Data for ML

In [10]:
#training set
housing = strat_train_set.drop("median_house_value", axis=1) # drop labels for training set; X
housing_labels = strat_train_set["median_house_value"].copy()#y

In [13]:
#Feature Engineering
from sklearn.base import BaseEstimator, TransformerMixin

# column index
col_names = "total_rooms", "total_bedrooms", "population", "households"
rooms_ix, bedrooms_ix, population_ix, households_ix = [
    housing.columns.get_loc(c) for c in col_names] # get the column indices

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    
    def __init__(self, add_bedrooms_per_room=True): # no *args or **kargs
        self.add_bedrooms_per_room = add_bedrooms_per_room
        
    def fit(self, X, y=None):
        return self  # nothing else to do
    
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room:
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household,
                         bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]


# Exercises

## 1 -

Q1. Try a Support Vector Machine regressor or
values for the and hyperparameters). Don’t worry about what these hyperparameters mean for now. How does the best
predictor perform?

In [305]:
#downsides of having an old mac :D
from sklearn.svm import SVR

svm_reg = SVR()

svm_param_grid = [
    {'kernel' : ['linear'], 'C' : [100, 1000]},
    {'kernel' : ['rbf'], 'C' : [100, 1000], 'gamma' : [0.125, 1]}
]

svm_grid_search = GridSearchCV(svm_reg, svm_param_grid, cv = 3, scoring="neg_mean_squared_error", return_train_score= True, verbose=2)

svm_grid_search.fit(housing_prepared, housing_labels)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV] END ...............................C=100, kernel=linear; total time=   7.3s
[CV] END ...............................C=100, kernel=linear; total time=   7.4s
[CV] END ...............................C=100, kernel=linear; total time=   7.2s
[CV] END ..............................C=1000, kernel=linear; total time=   8.0s
[CV] END ..............................C=1000, kernel=linear; total time=   7.8s
[CV] END ..............................C=1000, kernel=linear; total time=   7.5s
[CV] END .....................C=100, gamma=0.125, kernel=rbf; total time=  17.8s
[CV] END .....................C=100, gamma=0.125, kernel=rbf; total time=  17.9s
[CV] END .....................C=100, gamma=0.125, kernel=rbf; total time=  18.0s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=  17.6s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=  17.6s
[CV] END .........................C=100, gamma=1,

In [307]:
svm_grid_search.best_params_

{'C': 1000, 'kernel': 'linear'}

In [308]:
negative_mse = svm_grid_search.best_score_
rmse = np.sqrt(-negative_mse)
rmse

72798.03672352972

## 2 -

Q2. Try replacing GridSearchCV with RandomizedSearchCV

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import expon, reciprocal

param_distribs = {
        'kernel': ['linear', 'rbf'],
        'C': reciprocal(20, 200000),
        'gamma': expon(scale=1.0),
    }


rnd_search = RandomizedSearchCV(svm_reg, param_distributions=param_distribs,
                                n_iter=5, cv=3, scoring='neg_mean_squared_error',
                                verbose=2, random_state=42)
                               
                               
rnd_search.fit(housing_prepared, housing_labels)

In [182]:
rnd_search.best_params_

{'C': 26290.206464300216, 'gamma': 0.9084469696321253, 'kernel': 'rbf'}

In [181]:
negative_mse = rnd_search.best_score_
rmse = np.sqrt(-negative_mse)
rmse

65238.00400681524

## 3 -

Q3- Try adding a transformer in the preparation pipeline to select only the most important attributes

In [19]:
#Preprocessing pipeline (pp)

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest

#create a pipeline for numerical features
num_pipeline = Pipeline(
    
    [
    ('imputer', SimpleImputer(strategy='median')),
    ('Combined_Attr_Adder', CombinedAttributesAdder(add_bedrooms_per_room=True)),
    ('Standard_Scaler', StandardScaler()),
    ]
)

#Combine numerical and categorical features
num_columns = list(housing.drop("ocean_proximity", axis=1))
str_columns = ["ocean_proximity"]
all_columns = housing.columns

#ColumnTransformer has no named_steps attr, use named_transformers_ instead!
col_pp_pipeline = ColumnTransformer(
    [('num_transformer', num_pipeline, num_columns),
    ('str_transformer',OneHotEncoder() , str_columns),
    ]
)

#Add a Selection tool to the pp pipeline
full_pp_pipeline = Pipeline(
    [
        ('col_pipeline', col_pp_pipeline),
        ('Feature_Selector', SelectKBest(k=13))
        
    ]
)

pp_housing = full_pp_pipeline.fit_transform(housing, housing_labels)

## 4-

Q4- Try creating a single pipeline that does the full data preparation plus the final prediction.

In [24]:
from sklearn.metrics import mean_absolute_error
from sklearn.svm import SVR

#add an estimator at the end of the pipeline
## Note that SVR is not from sklear.linearmodels, hence it has no fit_predict() method.
pp_select_predict_pipeline = Pipeline(
    [
        ('pp_select', full_pp_pipeline),
        ("svm_reg", SVR(C= 26290, gamma= 0.9, kernel= 'rbf'))
        
    ]
    
)

pp_select_predict_pipeline.fit(housing, housing_labels)
train_pred = pp_select_predict_pipeline.predict(housing)
mean_absolute_error(train_pred, housing_labels)

33710.28318716908

# 5-

Q5- Automatically explore some preparation options using GridSearchCV

In [25]:
#get the name of the specific estimator for their hyper parameters
pp_select_predict_pipeline

In [488]:
param_grid = [
    {
        'pp_select__col_pipeline__num_transformer__imputer__strategy' : ['mean', 'median'],
        'pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room' : [True, False],
        'pp_select__Feature_Selector__k' : [11, 12, 13],
    }
]

grid_search_prep = GridSearchCV(pp_select_predict_pipeline, param_grid, cv=3,
                                scoring='neg_mean_squared_error', verbose=2)

grid_search_prep.fit(housing, housing_labels)

grid_search_prep.best_estimator_.fit(housing, housing_labels)
grid_train_pred = grid_search_prep.best_estimator_.predict(housing)
mean_absolute_error(grid_train_pred, housing_labels)

Fitting 3 folds for each of 12 candidates, totalling 36 fits


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  18.3s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.2s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.1s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  18.2s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.0s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.1s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  18.1s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.0s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.4s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  18.0s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.5s
[CV] END pp_select__Feature_Selector__k=11, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  32.7s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  18.8s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.3s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  32.1s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  18.2s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.0s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.9s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  18.8s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.1s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.8s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  18.2s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.3s
[CV] END pp_select__Feature_Selector__k=12, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  31.9s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  20.2s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  32.6s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  32.4s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  18.9s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  32.0s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=True, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  32.5s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  18.9s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  32.2s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=mean; total time=  31.7s


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 480, in predict
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 658, in transform
    Xt = transform.transform(Xt)
  File "/usr/local/lib/python3.9/site-packages/sklearn/utils/_set_output.py", line 142, in wrapped
    data_to_wrap = f(self, X, *args, 

[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  18.8s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  32.2s
[CV] END pp_select__Feature_Selector__k=13, pp_select__col_pipeline__num_transformer__Combined_Attr_Adder__add_bedrooms_per_room=False, pp_select__col_pipeline__num_transformer__imputer__strategy=median; total time=  33.0s




In [None]:
#Evaluate the final performance on unseen test set
X_test = strat_test_set.drop("median_house_value", axis=1)
y_test = strat_test_set["median_house_value"].copy()


grid_test_pred = grid_search_prep.best_estimator_.predict(X_test)
mean_absolute_error(grid_test_pred, y_test)