In [11]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector
from sklearn.base import BaseEstimator

from sklearn import set_config

In [12]:
# Set up Data

In [13]:
df_train_raw = pd.read_csv('./datasets/train.csv')

df = df_train_raw.copy()

In [14]:
df_test_raw = pd.read_csv('./datasets/test.csv')
df_test = df_test_raw.copy()

In [15]:
# Set up Pipeline

In [16]:
# https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html
#numeric_features = list(X.loc[:, X.dtypes == object].columns)
numeric_transformer = Pipeline(steps=[
    ('num_imputer', SimpleImputer(strategy='mean')),
    ('num_scaler', StandardScaler())])

#cat_features = list(X.loc[:, X.dtypes != object].columns)
categorical_transformer = Pipeline(steps=[
    ('cat_imputer', SimpleImputer(strategy='constant', fill_value='Other')),
    ('cat_onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessing = ColumnTransformer(transformers=[
    ('num', numeric_transformer, make_column_selector(dtype_include=np.number)),
    ('cat', categorical_transformer, make_column_selector(dtype_include='object'))
])

In [17]:
# Set up Params

In [18]:
# Set up Pipeline, GridSearch

In [75]:
class Switcher(BaseEstimator):
    def __init__(self, estimator = LinearRegression()):
        """
        A Custom BaseEstimator that can switch between classifiers.
        :param estimator: sklearn object - The classifier
        """ 
        self.estimator = estimator


    def fit(self, X, y=None, **kwargs):
        self.estimator.fit(X, y)
        return self


    def predict(self, X, y=None):
        return self.estimator.predict(X)


    def predict_proba(self, X):
        return self.estimator.predict_proba(X)


    def score(self, X, y):
        return self.estimator.score(X, y)

In [76]:
switcher_pipeline = Pipeline([(
    'clf', Switcher()
    )]) # Placeholder estimator

In [77]:
params = [{
        'clf': [KNeighborsRegressor()]}
]

In [78]:
pipe = make_pipeline(preprocessing, switcher_pipeline)
gs = GridSearchCV(pipe, param_grid=params, n_jobs=-1)

In [79]:
set_config(display='diagram')
gs

In [67]:
X = pd.DataFrame([1, 2, 3, 4, 5], [1, 2, 3, 4, 5])
y = [1, 2, 3, 4, 5]

In [68]:
gs.fit(X,y);

ValueError: Invalid parameter clf for estimator Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('num_imputer',
                                                                   SimpleImputer()),
                                                                  ('num_scaler',
                                                                   StandardScaler())]),
                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x7fc67755f250>),
                                                 ('cat',
                                                  Pipeline(steps=[('cat_imputer',
                                                                   SimpleImputer(fill_value='Other',
                                                                                 strategy='constant')),
                                                                  ('cat_onehot',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x7fc67756cb20>)])),
                ('pipeline',
                 Pipeline(steps=[('clf',
                                  Switcher(estimator=LinearRegression()))]))]). Check the list of available parameters with `estimator.get_params().keys()`.