In [1]:
from sklearn.pipeline import Pipeline
#feature scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [2]:
steps = [('scaler', StandardScaler()),
    ('classifier', LogisticRegression())]

In [3]:
steps

[('scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [5]:
pipe = Pipeline(steps)

In [7]:

pipe

In [8]:
## Creating a random classification dataset 

from sklearn.datasets import make_classification
X,y = make_classification(n_samples=10000, n_features=2, n_informative=2, n_redundant=0)

In [10]:
X.shape

(10000, 2)

In [11]:
# Perform train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
pipe.fit(X_train, y_train)

In [14]:
y_pred = pipe.predict(X_test)

In [15]:
y_pred

array([1, 1, 1, ..., 1, 1, 1])

In [16]:
## Example 2

# Display a pipeline with Standard Scaler , dimensionality reduction and Logistic Regression

In [17]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [21]:
steps = [('scaler', StandardScaler()),
    ('pca', PCA(n_components=2)),
    ('classifier', SVC())]

In [22]:
pipe2 = Pipeline(steps)

In [23]:
pipe2.fit(X_train, y_train)

In [24]:
pipe2.predict(X_test)

array([1, 1, 1, ..., 1, 1, 1])

In [26]:
pipe2.score(X_test, y_test)

0.92

# Column Transformers

In [27]:
from sklearn.impute import SimpleImputer


In [28]:
#Numeric Processor Pipeline
import numpy as np
numeric_processor = Pipeline(
    steps=[('imputer_numeric', SimpleImputer(missing_values=np.nan, strategy='mean')),
           ('scaler', StandardScaler())]
    )

#Categorical Processor Pipeline
from sklearn.preprocessing import OneHotEncoder
categorical_processor = Pipeline(
    steps=[('imputer_categoric', SimpleImputer(missing_values=np.nan, strategy='most_frequent')),
           ('onehot', OneHotEncoder(handle_unknown='ignore'))]
)

In [29]:
numeric_processor

In [30]:
categorical_processor

In [31]:
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(
    [('categorical',categorical_processor,["gender","City"]),
    ("numerical",numeric_processor,["Age","height"])]
)

In [33]:
preprocessor

In [34]:
# Add an estimator

from sklearn.pipeline import make_pipeline



In [35]:
pipe3 = make_pipeline(preprocessor, LogisticRegression())

In [36]:
pipe3