### Grid-Search for Getting Best Parameters on all 3 Models

In [1]:
import kfp
import typing

#### Read the Data file from GCS Bucket

In [2]:
#### Read the Data file from GCS Bucket## Read Data

from typing import NamedTuple
from kfp.components import *

def read_data(file_name: str, df_churn_op: OutputPath(), mlpipeline_ui_metadata: OutputPath()): 
        
    ## Import Required Libraries
    import pandas as pd
    import numpy as np
    import gcsfs
    from tensorflow.python.lib.io import file_io
    import json
    
    fs = gcsfs.GCSFileSystem(project='YDataSynthetic', token = 'cloud')

    df_churn = pd.read_csv(file_name)
    df_churn.to_csv(df_churn_op, index=False)
    
    df_disp = df_churn.iloc[0:5]
    df_disp = df_disp[['customerID','gender','tenure','Contract','TotalCharges','Churn']]
 
    df_disp.to_csv('gs://pipelines_artifacts/Artifacts/Data_Sample.csv', index=False)
    
    df_show = pd.read_csv("gs://pipelines_artifacts/Artifacts/Data_Sample.csv")
    categorical_cols = [c for c in df_show.columns if df_show[c].dtype == 'object' or c == 'SeniorCitizen']

    numerical_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

    schema = [{'name':c, 'type': 'CATEGORY'if c in categorical_cols else 'NUMBER'} for c in df_show.columns]
    
    metadata = {
        'outputs' : [{
          'type': 'table',
          'storage': 'gcs',
          'format': 'csv',
          'header': [x['name'] for x in schema],
          'source': 'gs://pipelines_artifacts/Artifacts/Data_Sample.csv'
        }
        ]
    }

    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)

    with file_io.FileIO(mlpipeline_ui_metadata, 'w') as f:
        json.dump(metadata, f)
        
    

In [3]:
kfp_read_data = kfp.components.func_to_container_op(func = read_data, 
                                                          output_component_file = './read-data-func.yaml',
                                                          packages_to_install = ['numpy==1.17.2',
                                                                                 'pandas==1.0.3', 'gcsfs'])


#### Data Cleaning and One-Hot Encoding

In [4]:
from typing import NamedTuple
from kfp.components import *

def one_hot_encode(df_churn_ip: InputPath(), df_one_hot: OutputPath()):
    
    import pandas as pd
    import numpy as np
    
    df_churn = pd.read_csv(df_churn_ip)
    empty_cols = ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
           'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
           'OnlineSecurity', 'OnlineBackup', 'DeviceProtection','TechSupport',
           'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
           'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn']
    
    #Replacing Empty values
    for i in empty_cols:
        df_churn[i]=df_churn[i].replace(" ",np.nan)

    df_churn.drop(['customerID'], axis=1, inplace=True)
    df_churn = df_churn.dropna()
    binary_cols = ['Partner','Dependents','PhoneService','PaperlessBilling']
    
    #Binary Encoding
    for i in binary_cols:
        df_churn[i] = df_churn[i].replace({"Yes":1,"No":0})

    #Encoding column 'gender'
    df_churn['gender'] = df_churn['gender'].replace({"Male":1,"Female":0})


    category_cols = ['PaymentMethod','MultipleLines','InternetService','OnlineSecurity',
                   'OnlineBackup','DeviceProtection',
                   'TechSupport','StreamingTV','StreamingMovies','Contract']
    
    #One-hot Encoding of multiple-category columns
    for cc in category_cols:
        dummies = pd.get_dummies(df_churn[cc], drop_first=False)
        dummies = dummies.add_prefix("{}#".format(cc))
        df_churn.drop(cc, axis=1, inplace=True)
        df_churn = df_churn.join(dummies)
    
    df_churn_targets = df_churn['Churn'].unique()
    df_churn['Churn'] = df_churn['Churn'].replace({"Yes":1,"No":0})
    
    #Output the encoded file 
    df_churn.to_csv(df_one_hot, index=False)


In [5]:
kfp_one_hot_encode = kfp.components.func_to_container_op(func = one_hot_encode, 
                                                          output_component_file = './one-hot-encode-func.yaml',
                                                          packages_to_install = ['scikit-learn==0.22.2','numpy==1.17.2',
                                                                                 'pandas==1.0.3',
                                                                                 'imbalanced-learn==0.6.2'])

#### Data Cleaning and Column Elimination

In [6]:
from typing import NamedTuple
from kfp.components import *

def data_cleaning(df_churn_ip: InputPath(), df_cleaned: OutputPath()):
    
    import pandas as pd
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    
    df = pd.read_csv(df_churn_ip)

    df['TotalCharges'] = df['TotalCharges'].replace(" ", 0).astype('float32')
    df.drop(['customerID'],axis=1, inplace=True)


    categorical_cols = [c for c in df.columns if df[c].dtype == 'object'
                        or c == 'SeniorCitizen']
    df_categorical = df[categorical_cols].copy()
    for col in categorical_cols:
        if df_categorical[col].nunique() == 2:
            df_categorical[col], _ = pd.factorize(df_categorical[col])
        else:
            df_categorical = pd.get_dummies(df_categorical, columns=[col])

    numerical_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
    df_std = pd.DataFrame(StandardScaler().fit_transform(df[numerical_cols].astype('float64')),
                           columns=numerical_cols)

    df_processed = pd.concat([df_std, df_categorical], axis=1)

    # Remove Gender
    features = ['gender']
    df_processed.drop(features, axis=1, inplace=True)

    # Remove services with 'no internet' label
    features = ['OnlineSecurity_No internet service', 'OnlineBackup_No internet service',
               'DeviceProtection_No internet service', 'TechSupport_No internet service',
               'StreamingTV_No internet service', 'StreamingMovies_No internet service']
    df_processed.drop(features, axis=1, inplace=True)

    # Additional services 'No'
    features = ['OnlineSecurity_No', 'OnlineBackup_No',
               'DeviceProtection_No', 'TechSupport_No',
               'StreamingTV_No', 'StreamingMovies_No']
    df_processed.drop(features, axis=1, inplace=True)

    # Remove PhoneService as MultipleLines has a 'No phone service' label
    features = ['PhoneService']
    df_processed.drop(features, axis=1, inplace=True)   
    
    df_processed.to_csv(df_cleaned, index=False)


In [7]:
kfp_data_cleaning = kfp.components.func_to_container_op(func = data_cleaning, 
                                                          output_component_file = './data_cleaning.yaml',
                                                          packages_to_install = ['scikit-learn==0.22.2','numpy==1.17.2',
                                                                                 'pandas==1.0.3'])

In [8]:
from typing import NamedTuple
from kfp.components import *

def gridsearch(df_churn_ip: InputPath(), model: str, parameters: dict) -> dict:
    import pandas as pd
    import numpy as np
    import sklearn
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import LogisticRegression
    import xgboost as xgb
    
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split, GridSearchCV
    from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve, precision_score, recall_score, f1_score
    
    df_churn = pd.read_csv(df_churn_ip)
    df_churn.dropna(inplace=True)

    y1 = df_churn['Churn']
    X1 = df_churn.drop(['Churn'],axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X1, y1, random_state=0)
    
    if(model=='RandomForest'):
        clf = RandomForestClassifier()
        
    elif(model=='XGBoost'):
        clf = xgb.XGBClassifier()
        
    elif(model=='XGBoostRF'):
        clf = xgb.XGBRFClassifier()

    gscv = GridSearchCV(estimator=clf, param_grid=parameters, cv = 4, n_jobs = -1, verbose = 0)
    gscv.fit(X_train, y_train)
    
    best_params = gscv.best_params_

    print('Best Parameters: {}\n'.format(best_params))
    best_grid = gscv.best_estimator_
    print('Train Score: {}\n'.format(gscv.best_score_))
    
    test_accuracy = gscv.score(X_test, y_test)
    print('Test score: {}'.format(test_accuracy))
    
    return best_params


In [9]:
kfp_gridsearch = kfp.components.func_to_container_op(func = gridsearch, 
                                                          output_component_file = './gridsearch.yaml', 
                                                   packages_to_install = ['scikit-learn==0.22.2','numpy==1.17.2',
                                                                                 'pandas==1.0.3', 
                                                                          'xgboost==1.0.2'])

#### Machine Learning Algorithm - Random Forest

In [10]:
from typing import NamedTuple
from kfp.components import *


def rf_model(df_churn_ip: InputPath(), parameters: dict,
              conf_matr: OutputPath(),
              mlpipeline_ui_metadata: OutputPath(), mlpipeline_metrics: OutputPath()):
    
    import pandas as pd
    import numpy as np
    import sklearn
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve, precision_score, recall_score, f1_score
    import json
    import os
    import gcsfs
    from tensorflow.python.lib.io import file_io
    
    
    df_churn = pd.read_csv(df_churn_ip)
    df_churn.dropna(inplace=True)

    y1 = df_churn['Churn']
    X1 = df_churn.drop(['Churn'],axis=1)
    
    #Split Data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X1, y1, random_state=0)
    
    #fit the model on the Data and train it
    rfc_best = RandomForestClassifier(**parameters)

    rfc_best.fit(X_train, y_train) 
    y_test_pred = rfc_best.predict(X_test)
    y_test_proba = rfc_best.predict_proba(X_test)[:,0]
    
    #Get Metrics scores
    
    rf_score = float('%.4f' %rfc_best.score(X_test, y_test))   
    rf_precision = float('%.4f' %precision_score(y_test, y_test_pred))
    rf_recall = float('%.4f' %recall_score(y_test, y_test_pred))
    rf_f1 = float('%.4f' %f1_score(y_test, y_test_pred))
    
    print("Accuraccy, Precision, Recall, f1: ")
    print(rf_score, rf_precision, rf_recall, rf_f1)
    
    
    #Confusion Matrix
    cm = confusion_matrix(y_test, y_test_pred)
    print("Confusion Matrix: {}".format(cm))
    
    #True and False Positive Rates
    fpr, tpr, thresholds = roc_curve(y_test, y_test_proba) 
    auc_score = float('%.4f' %roc_auc_score(y_test, y_test_proba))
    print('Auc score: ')
    print(auc_score)
    
    #Converting the Confusion matrix to a Dataframe
    #Note that for Generating the Confusion Matrix Artifact, the Confusion Matrix has to be converted from
    #a numpy array to a DataFrame in the exact format as given below
    
    flags = {0:'Not Churned',1:'Churned'}
    flag_list = ['Not Churned','Churned']
    data = []
    for target_index, target_row in enumerate(cm):
        for predicted_index, count in enumerate(target_row):
            data.append((flags[target_index], flags[predicted_index], count))

    df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count'])
    print(df_cm)
    
    
    with file_io.FileIO(conf_matr, 'w') as f:
        df_cm.to_csv(f, columns=['target', 'predicted', 'count'], header=False, index=False)
        
    fs = gcsfs.GCSFileSystem(project='YDataSynthetic', token = 'cloud')
    
    #Save confusion Matrix to GCS
    with file_io.FileIO('gs://pipelines_artifacts/Artifacts/Conf_matRF.csv', 'w') as f:
        df_cm.to_csv(f, columns=['target', 'predicted', 'count'], header=False, index=False)
    
    
    #roc curve
    #For generating the ROC curve, the tpr, fpr and thresholds need to be output as a DataFrame in the 
    #exact format as given below
    
    df_roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds})
    with file_io.FileIO('gs://pipelines_artifacts/Artifacts/ROC_curveRF.csv', 'w') as f:
        df_roc.to_csv(f, columns=['fpr', 'tpr', 'thresholds'], header=False, index=False)

    
    #code to generate artifacts
    
    #Artifact generator - metadata
    
    from tensorflow.python.lib.io import file_io
    import json
    
    metadata = {
        'version' : 1, 
        'outputs' : [{
            'type': 'confusion_matrix',
            'format': 'csv',
            'storage': 'gcs',
            'schema': [   #schema is required in the exact same form for generating the artifact
                {'name': 'target', 'type': 'CATEGORY'},
                {'name': 'predicted', 'type': 'CATEGORY'},
                {'name': 'count', 'type': 'NUMBER'},
            ],
            'source': 'gs://pipelines_artifacts/Artifacts/Conf_matRF.csv', #conf_matr
            
       # Convert flags to string because for bealean values we want "True|False" to match csv data.
            'labels': flag_list
        },    
        {
          'type': 'roc',
          'format': 'csv',
          'storage': 'gcs',
          'schema': [  #schema is required in the exact same form for generating the artifact
            {'name': 'fpr', 'type': 'NUMBER'},
            {'name': 'tpr', 'type': 'NUMBER'},
            {'name': 'thresholds', 'type': 'NUMBER'},
          ],
          'source': 'gs://pipelines_artifacts/Artifacts/ROC_curveRF.csv'
        }
        ]
    }
    
    
    #Output the file to the container-level root with the exact same name as below 
    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)
    
    
    #Also output to Minio 
    with file_io.FileIO(mlpipeline_ui_metadata, 'w') as f:
        json.dump(metadata, f)
        
    
    #The metric scores can output as Pipeline Metrics by generating a json file as below
    metrics = {
    'metrics': [{
      'name': 'accuracy-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  rf_score, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) 
                            # and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'precision-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  rf_precision, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'recall', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  rf_recall, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'f1-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  rf_f1, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'auc-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  auc_score, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    }]
    }
    
    #Dump the metrics json file with the exact same name to the container-root directory
    with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
        json.dump(metrics, f)
    
    #Also dump it to the Minio file storage
    with file_io.FileIO(mlpipeline_metrics, 'w') as f:
        json.dump(metrics, f)
        

In [11]:
kfp_rf_model = kfp.components.func_to_container_op(func = rf_model, 
                                                          output_component_file = './rf-model-func.yaml', 
                                                   packages_to_install = ['scikit-learn==0.22.2','numpy==1.17.2',
                                                                                 'pandas==1.0.3', 
                                                                          'gcsfs'])

#### Machine Learning Algorithm - XGBoost

In [12]:
from typing import NamedTuple
from kfp.components import *

def xgb_model(df_churn_ip: InputPath(), 
              parameters: dict, 
              conf_matr: OutputPath(),
              mlpipeline_ui_metadata: OutputPath(), mlpipeline_metrics: OutputPath()):
        
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    import xgboost as xgb
    from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve, precision_score, recall_score, f1_score
    import json
    import os
    import gcsfs
    from tensorflow.python.lib.io import file_io
    
    df_churn = pd.read_csv(df_churn_ip)
    df_churn.dropna(inplace=True)

    y1 = df_churn['Churn']
    X1 = df_churn.drop(['Churn'],axis=1)

    X_train, X_test, y_train, y_test = train_test_split(X1, y1, random_state=0)
    
    clfxg = xgb.XGBClassifier(**parameters)
    clfxg.fit(X_train, y_train)
    
    y_test_pred = clfxg.predict(X_test)
    
    y_test_proba = clfxg.predict_proba(X_test)[:,0]
    
    xgb_score = float('%.4f' %accuracy_score(y_test, y_test_pred))   
    xgb_precision = float('%.4f' %precision_score(y_test, y_test_pred))
    xgb_recall = float('%.4f' %recall_score(y_test, y_test_pred))
    xgb_f1 = float('%.4f' %f1_score(y_test, y_test_pred))
    
    print("Accuracy, Precision, Recall, f1: ")
    print(xgb_score, xgb_precision, xgb_recall, xgb_f1)
    
    
    cm = confusion_matrix(y_test, y_test_pred)
    print("Confusion Matrix: {}".format(cm))
    
    fpr, tpr, thresholds = roc_curve(y_test, y_test_proba) 
    auc_score = float('%.4f' %roc_auc_score(y_test, y_test_proba))
    print('Auc score: ')
    print(auc_score)
    
    #Converting the matrix to a Dataframe
    
    flags = {0:'Not Churned',1:'Churned'}
    flag_list = ['Not Churned','Churned']
    data = []
    for target_index, target_row in enumerate(cm):
        for predicted_index, count in enumerate(target_row):
            data.append((flags[target_index], flags[predicted_index], count))

    df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count'])
    print(df_cm)
    
    with file_io.FileIO(conf_matr, 'w') as f:
        df_cm.to_csv(f, columns=['target', 'predicted', 'count'], header=False, index=False)
        
    fs = gcsfs.GCSFileSystem(project='YDataSynthetic', token = 'cloud')
        
    with file_io.FileIO('gs://pipelines_artifacts/Artifacts/XGBConf_mat.csv', 'w') as f:
        df_cm.to_csv(f, columns=['target', 'predicted', 'count'], header=False, index=False)
    
    
    #roc curve

    df_roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds})
    with file_io.FileIO('gs://pipelines_artifacts/Artifacts/XGBROC_curve.csv', 'w') as f:
        df_roc.to_csv(f, columns=['fpr', 'tpr', 'thresholds'], header=False, index=False)

    
    #code to generate artifacts
    
    #Artifact generator - metadata
    
    from tensorflow.python.lib.io import file_io
    import json
    
    metadata = {
        'version' : 1, 
        'outputs' : [{
            'type': 'confusion_matrix',
            'format': 'csv',
            'storage': 'gcs',
            'schema': [
                {'name': 'target', 'type': 'CATEGORY'},
                {'name': 'predicted', 'type': 'CATEGORY'},
                {'name': 'count', 'type': 'NUMBER'},
            ],
            'source': 'gs://pipelines_artifacts/Artifacts/XGBConf_mat.csv', #conf_matr
            
       # Convert flags to string because for bealean values we want "True|False" to match csv data.
            'labels': flag_list
        },    
        {
          'type': 'roc',
          'format': 'csv',
          'storage': 'gcs',
          'schema': [
            {'name': 'fpr', 'type': 'NUMBER'},
            {'name': 'tpr', 'type': 'NUMBER'},
            {'name': 'thresholds', 'type': 'NUMBER'},
          ],
          'source': 'gs://pipelines_artifacts/Artifacts/XGBROC_curve.csv'
        }
        ]
    }
    
    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)
        
    with file_io.FileIO(mlpipeline_ui_metadata, 'w') as f:
        json.dump(metadata, f)
        

    metrics = {
    'metrics': [{
      'name': 'accuracy-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  xgb_score, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'precision-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  xgb_precision, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'recall', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  xgb_recall, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'f1-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  xgb_f1, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    },
    {
      'name': 'auc-score', # The name of the metric. Visualized as the column name in the runs table.
      'numberValue':  auc_score, # The value of the metric. Must be a numeric value.
      'format': "RAW",   # The optional format of the metric. Supported values are "RAW" (displayed in raw format) and "PERCENTAGE" (displayed in percentage format).
    }]
    }
    
    with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
        json.dump(metrics, f)
        
    with file_io.FileIO(mlpipeline_metrics, 'w') as f:
        json.dump(metrics, f)
    

In [13]:
kfp_xgb_model = kfp.components.func_to_container_op(func = xgb_model, 
                                                          output_component_file = './xgb-model-func.yaml',
                                                          packages_to_install = ['scikit-learn==0.22.2','numpy==1.17.2',
                                                                                 'pandas==1.0.3', 
                                                                                 'xgboost==1.0.2', 'gcsfs'])

#### Defining the Pipeline Execution Sequence and Input-Output scheme

In [14]:
import kfp.dsl as dsl
import numpy as np

@dsl.pipeline(name='Grid Search Pipeline',description='Run Grid-search on all 3 models for best input Parameters')
def GridSearch_func(file_path = "gs://pipelines_artifacts/Artifacts/Data.csv"):
    
    #Passing pipeline parameter and a constant value as operation arguments
    read_data_task = kfp_read_data(file_name = file_path) 
    
    ohe_task = kfp_one_hot_encode(df_churn_ip = read_data_task.outputs['df_churn_op'])
    
    grid_search_rf_task = kfp_gridsearch(ohe_task.outputs['df_one_hot'], 'RandomForest', 
                           {'max_depth': [10, 30, 100],
                            'max_features': [2, 3], 'min_samples_leaf': [3, 4, 5], 'criterion': ['gini'],
                            'min_samples_split': [2, 3, 4], 'n_estimators': [100, 200, 300, 1000]})
    grid_search_xgb_task = kfp_gridsearch(ohe_task.outputs['df_one_hot'], 'XGBoost', 
                           {'n_estimators': [100, 200, 300, 1000], 'verbosity': [0], 'max_depth': [10, 30, 100], 
                            'eta': [1], 'silent': [0]})
    grid_search_xgbrf_task = kfp_gridsearch(ohe_task.outputs['df_one_hot'], 'XGBoostRF', 
                           {'n_estimators': [100, 200, 300, 1000], 'verbosity': [0], 'max_depth': [10, 30, 100], 
                            'eta': [1], 'silent': [0]})
    rf_model_task = kfp_rf_model(ohe_task.outputs['df_one_hot'], grid_search_rf_task.output)
    xgb_model_task = kfp_xgb_model(ohe_task.outputs['df_one_hot'], grid_search_xgb_task.output)

#For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax
#For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax

#### Compiling the Pipeline

In [15]:
pipeline_func = GridSearch_func
pipeline_filename = pipeline_func.__name__+'.pipeline.tar.gz'

import kfp.compiler as comp
comp.Compiler().compile(pipeline_func, pipeline_filename)