In [1]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

In [2]:
import seaborn as sns
df=sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
X=df.iloc[:,1:]
y=df['total_bill']

In [4]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [5]:
numeric_preprocessor = Pipeline(steps=[("imputation_mean", SimpleImputer(missing_values=np.nan,strategy="mean")),("scaler", StandardScaler()),])

In [6]:
from sklearn import set_config
set_config(display='diagram')
numeric_preprocessor

In [7]:
categorical_preprocessor = Pipeline(steps=[("imputation_constant",SimpleImputer(fill_value="missing", strategy="constant"),),("onehot", OneHotEncoder(handle_unknown="ignore")),])
preprocessor=Pipeline(steps=[("categorical",categorical_preprocessor),("numerical",numeric_preprocessor)])
preprocessor

In [8]:
Pipeline(steps=[('categorical',Pipeline(steps=[('imputation_constant',SimpleImputer(fill_value='missing',strategy='constant')),('onehot',OneHotEncoder(handle_unknown='ignore'))])),('numerical',Pipeline(steps=[('imputation_mean', SimpleImputer()),('scaler', StandardScaler())]))])

In [9]:
pipe=Pipeline(
 [("preprocessor",preprocessor),
("regressor",RandomForestRegressor())]
)
pipe

In [10]:
from sklearn.compose import ColumnTransformer

categorical_cols = X.select_dtypes(include='object').columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ("categorical", categorical_preprocessor, categorical_cols),
        ("numerical", numeric_preprocessor, numerical_cols),
    ]
)

pipe = Pipeline(
    [("preprocessor", preprocessor), ("regressor", RandomForestRegressor())]
)

pipe.fit(X_train, y_train)

In [11]:
pipe.fit(X_train,y_train)

In [12]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

In [13]:
randomcv_models = [
 ('Random Forest',Pipeline([('preprocessor', preprocessor),
 ('classifier', RandomForestClassifier(random_state=42))]),
{
 'classifier__n_estimators': [50, 100, 200],
 'classifier__max_depth': [None, 10, 20],
 'classifier__min_samples_split': [2, 5],
 'classifier__min_samples_leaf': [1, 2],
 'classifier__bootstrap': [True, False]
}),('Logistic Regression',Pipeline([('preprocessor', preprocessor),('classifier', LogisticRegression(max_iter=200, random_state=42))]),
{
 'classifier__penalty': ['l2'],
 'classifier__C': [0.01, 0.1, 1.0, 10],
 'classifier__solver': ['liblinear', 'lbfgs']
}
 )
]


In [14]:

import numpy as np

y_train = np.round(y_train).astype(int)


model_param = {}
for name, model, params in randomcv_models:
 print(f"\nTuning: {name}")
 random = RandomizedSearchCV(estimator=model,param_distributions=params,n_iter=10,cv=3,n_jobs=-1,verbose=1,random_state=42)
 random.fit(X_train, y_train)
 model_param[name] = random.best_params_
 print(f"Best Params for {name}:")
 print(random.best_params_)



Tuning: Random Forest
Fitting 3 folds for each of 10 candidates, totalling 30 fits




Best Params for Random Forest:
{'classifier__n_estimators': 100, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 2, 'classifier__max_depth': None, 'classifier__bootstrap': True}

Tuning: Logistic Regression
Fitting 3 folds for each of 8 candidates, totalling 24 fits
Best Params for Logistic Regression:
{'classifier__solver': 'lbfgs', 'classifier__penalty': 'l2', 'classifier__C': 1.0}


