<a href="https://colab.research.google.com/github/wjdolan/DS_Portfolio/blob/main/Combine_Pipelines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Combining Pipelines

In [1]:
# IMPORT PACKAGES
from sklearn.svm import SVC
from sklearn.datasets import load_iris

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion

from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

In [2]:
iris = load_iris()

X, y = iris["data"], iris["target"]

In [3]:
# This dataset is way too high-dimensional. Better do PCA:
pca = PCA(n_components=2)

# Maybe some of the original features were good, too?
selection = SelectKBest(k=3)

In [4]:
# Build an transformer from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

In [5]:
# We will initialize the classifier
svm = SVC(kernel="linear")

In [6]:
# create our pipeline from FeatureUnion 
pipeline = Pipeline([("features", combined_features), ("svm", svm)])

# set up our parameters grid
param_grid = {"features__pca__n_components": [1, 2, 3],
                  "features__univ_select__k": [1, 2, 3],
                  "svm__C":[0.1, 1, 10]}

# create a Grid Search object
grid_search = GridSearchCV(pipeline, param_grid, verbose=10, refit=True)    

# fit the model and tune parameters
grid_search.fit(X, y)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV 1/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 1/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time=   0.0s
[CV 2/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 2/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time=   0.0s
[CV 3/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 3/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.867 total time=   0.0s
[CV 4/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 4/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time=   0.0s
[CV 5/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C

GridSearchCV(estimator=Pipeline(steps=[('features',
                                        FeatureUnion(transformer_list=[('pca',
                                                                        PCA(n_components=2)),
                                                                       ('univ_select',
                                                                        SelectKBest(k=3))])),
                                       ('svm', SVC(kernel='linear'))]),
             param_grid={'features__pca__n_components': [1, 2, 3],
                         'features__univ_select__k': [1, 2, 3],
                         'svm__C': [0.1, 1, 10]},
             verbose=10)

In [7]:
print(grid_search.best_params_)

{'features__pca__n_components': 2, 'features__univ_select__k': 3, 'svm__C': 1}


In [8]:
grid_search.best_score_

0.9800000000000001

In [9]:
grid_search.cv_results_

{'mean_fit_time': array([0.01128998, 0.00427222, 0.01002412, 0.00423012, 0.00496054,
        0.00551009, 0.00512118, 0.00420823, 0.00468683, 0.0046011 ,
        0.00534415, 0.00404968, 0.00459108, 0.00400043, 0.00415745,
        0.00453248, 0.00441537, 0.00429044, 0.00449452, 0.00374293,
        0.00422096, 0.00478621, 0.00259948, 0.00425043, 0.00483503,
        0.00391421, 0.00478978]),
 'mean_score_time': array([0.00205011, 0.00299921, 0.00446939, 0.00243621, 0.00125251,
        0.00121932, 0.00121388, 0.00101528, 0.00103478, 0.00198488,
        0.00133963, 0.00122366, 0.00120606, 0.00097685, 0.00097775,
        0.00104671, 0.00109086, 0.00094795, 0.00122395, 0.00096698,
        0.00096803, 0.00140915, 0.00099702, 0.00099182, 0.00150576,
        0.00162554, 0.00114751]),
 'mean_test_score': array([0.93333333, 0.94      , 0.95333333, 0.95333333, 0.95333333,
        0.96      , 0.96666667, 0.97333333, 0.97333333, 0.94666667,
        0.96666667, 0.95333333, 0.96666667, 0.97333333, 0.96 