# About this Jupyter Notebook

@author: Yingding Wang\
@created: 30.06.2022\
@updated: 05.01.2024\
@version: 2

This notebook demonstrates an example of kubeflow pipeline with python function, the kubeflow pipeline contains a single model training with tensorflow using the KFP Python SDK v2.

## Install KFP Python SDK to build a V2 pipeline
* Build KF pipeline with python SDK: https://www.kubeflow.org/docs/components/pipelines/sdk/build-pipeline/
* Current KFP python SDK version on pypi.org: https://pypi.org/project/kfp/ 

In [1]:
import sys

In [2]:
# !{sys.executable} -m pip install --upgrade --user kfp[kubernetes]==2.6.0
# installs the following packages
# !{sys.executable} -m pip install --upgrade --user kfp==2.6.0 kfp-kubernetes==1.1.0 kfp-pipeline-spec==0.3.0 kfp-server-api==2.0.5

## Restart the Kernel

After the installation of KFP python SDK, the notebook kernel must be restarted.

## Getting familiar with Jupyter Notebook ENV 

In [3]:
# examing the kfp python sdk version inside a KubeFlow v1.8.0
!{sys.executable} -m pip list | grep kfp

kfp                           2.6.0
kfp-kubernetes                1.1.0
kfp-pipeline-spec             0.3.0
kfp-server-api                2.0.5


In [4]:
from platform import python_version
print (f"current platform python version: {python_version()}")

current platform python version: 3.11.6


In [5]:
# run kubectl command line to see the quota in the name space
!kubectl describe quota

Name:                                                                kf-resource-quota
Namespace:                                                           kubeflow-kindfor
Resource                                                             Used     Hard
--------                                                             ----     ----
cpu                                                                  2585m    36
csi-s3.storageclass.storage.k8s.io/persistentvolumeclaims            0        10
csi-s3.storageclass.storage.k8s.io/requests.storage                  0        2Ti
kubeflow-nfs-csi.storageclass.storage.k8s.io/persistentvolumeclaims  4        20
kubeflow-nfs-csi.storageclass.storage.k8s.io/requests.storage        45Gi     4Ti
memory                                                               11150Mi  520Gi
minio-nfs-csi.storageclass.storage.k8s.io/persistentvolumeclaims     2        20
minio-nfs-csi.storageclass.storage.k8s.io/requests.storage           210Gi    10Ti


## Setup global variables

In [6]:
import kfp
client = kfp.Client()
NAMESPACE = client.get_user_namespace()
EXPERIMENT_NAME = 'demo' # Name of the experiment in the KF webapp UI
EXPERIMENT_DESC = 'this kf experiments loads iris data from tf dataset and build models'
PREFIX = "single_"

print(NAMESPACE)

kubeflow-kindfor




In [7]:
from dataclasses import dataclass

@dataclass
class Settings:
    tf_datasets: str = "4.9.4" # "4.9.2"
    pandas_version: str = "2.1.4" #"1.5.3"
    jinja2_version: str = "3.1.2"
    sklearn_version: str = "1.3.2" # "1.2.2"
    numpy_version: str = "1.26.3" #"1.24.2",
    base_tf_image: str = "tensorflow/tensorflow:2.14.0"
    # base_tf_image: str = "tensorflow/tensorflow:2.12.0"
    # base_python_image: str = "python:3.10.11"
    base_python_image: str = "python:3.11.7-bullseye"
    

settings = Settings()
print(settings)

Settings(tf_datasets='4.9.4', pandas_version='2.1.4', jinja2_version='3.1.2', sklearn_version='1.3.2', numpy_version='1.26.3', base_tf_image='tensorflow/tensorflow:2.14.0', base_python_image='python:3.11.7-bullseye')


### Creating KubeFlow component from python function

In [8]:
from kfp import dsl
from kfp import compiler

from kfp.dsl import (
    Input,
    Output,
    Dataset,
    Model,
    pipeline,
)

#### Create download component

In [9]:
@dsl.component(
    base_image=settings.base_tf_image, # use tf base image
    packages_to_install=[
        f"tensorflow-datasets=={settings.tf_datasets}",
        f"pandas=={settings.pandas_version}",
        f"Jinja2=={settings.jinja2_version}", # needed by tf dataset
        f"urllib3==1.26.18",
        f"kfp==2.5.0"
    ] # adding additional libs
)
def download_data(csv_dataset: Output[Dataset]):
    # https://www.tensorflow.org/datasets/keras_example
    # something about iris dataset
    # https://www.tensorflow.org/datasets/catalog/iris
    import tensorflow_datasets as tfds
    import tensorflow as tf
    
    (ds_train), ds_info = tfds.load(
        'iris',
        split=tfds.Split.TRAIN,
        shuffle_files=True,
        as_supervised=True,
        with_info=True)
    # assert type
    assert isinstance(ds_train, tf.data.Dataset)
    size = ds_train.cardinality().numpy()
    
    # convert to pandas dataframe
    df = tfds.as_dataframe(ds_train.take(size), ds_info)
    
    # export csv data without index
    # with open(output_path, "w+", encoding="utf-8") as f:
    #    df.to_csv(f, index=False, header=True, encoding="utf-8")
    with open(csv_dataset.path, "w+", encoding="utf-8") as f:
        df.to_csv(f, index=False, header=True, encoding="utf-8")
    csv_dataset.metadata['type'] = 'csv'    

#### Create data processing component

In [10]:
@dsl.component(
    base_image=settings.base_python_image, # use python base image
    packages_to_install=[
        f"pandas=={settings.pandas_version}",
        f"scikit-learn=={settings.sklearn_version}",
        f"numpy=={settings.numpy_version}",
    ] # adding additional libs
)
def process_data(label_col_name: str, feature_col_name: str, 
                 csv_dataset: Input[Dataset], 
                 train_csv_dataset: Output[Dataset], 
                 test_csv_dataset: Output[Dataset]):
    import pandas as pd
    import numpy as np
    from pandas import DataFrame
    from sklearn.model_selection import train_test_split

    with open(csv_dataset.path, "r", encoding="utf-8") as f:
        df = pd.read_csv(f, sep=",", header=0, index_col=None)
    
    print("input csv dataframe")
    print(df)
    print(df.shape)
    
    def iris_ndarray_to_feature_columns(df: DataFrame, feature_col_name="features") -> DataFrame:
        """
        not inplace function, return a transformed DataFrame
        """
        # testing the DataFrame having two column
        assert df.shape[1]==2
        feature_names = {
            "0": "sepal length (cm)",
            "1": "sepal width (cm)",
            "2": "petal length (cm)",
            "3": "petal width (cm)",
        }
        # convert column of string to column of numpy array
        df['numpy'] = df[feature_col_name].apply(lambda x: 
                           np.fromstring(
                               x.replace('[','')
                                .replace(']',''), sep=' '))
        # convert numpy array element to feature column with name
        for i in range(0, 4):
            df[feature_names[f"{i}"]] = df["numpy"].apply(lambda x: x[i])
        # remove
        return df.drop(columns=[feature_col_name, 'numpy'])
    
    # unpack the numpy array feature to feature columns
    df = iris_ndarray_to_feature_columns(df, feature_col_name)
    
    # split
    all_feature_cols_mask = ~df.columns.isin([label_col_name])
    X_train, X_test, y_train, y_test = train_test_split(
        df.loc[:, all_feature_cols_mask], df.loc[:, [label_col_name]], test_size=0.2, random_state=0)
    
    # join on index
    df_train = X_train.join(y_train) 
    df_test = X_test.join(y_test)
    print(f"df_train.shape {df_train.shape}")
    print(f"df_test.shape {df_test.shape}")
    
    # get row by index label
    # print(df_train.loc[137])
    
    # output training set
    # with open(train_output_path, "w+", encoding="utf-8") as f:
    #     df_train.to_csv(f, index=False, header=True, encoding="utf-8")
    with open(train_csv_dataset.path, "w+", encoding="utf-8") as f:
        df_train.to_csv(f, index=False, header=True, encoding="utf-8")
    
    # output test set
    # with open(test_output_path, "w+", encoding="utf-8") as f:
    #     df_test.to_csv(f, index=False, header=True, encoding="utf-8")
    with open(test_csv_dataset.path, "w+", encoding="utf-8") as f:
        df_test.to_csv(f, index=False, header=True, encoding="utf-8")   

#### Create TensorFlow model

* Example of creating TensorFlow FNN model with iris dataset: https://medium.com/@nutanbhogendrasharma/tensorflow-deep-learning-model-with-iris-dataset-8ec344c49f91

In [11]:
from typing import NamedTuple
@dsl.component(
    base_image=settings.base_tf_image, # use tensorflow base image
    packages_to_install=[
        f"pandas=={settings.pandas_version}",
        f"scikit-learn=={settings.sklearn_version}",
        f"numpy=={settings.numpy_version}",
    ] # adding additional libs
)
def train_tf(
    batch_size: int, epochs: int, label_col_name: str, 
    train_csv_dataset: Input[Dataset], test_csv_dataset: Input[Dataset],
    cm_output_dataset: Output[Dataset]
) -> NamedTuple('ModelScores', [('model', str), ('acc', float), ('f1', float)]): 
    import json
    from pandas import DataFrame
    import pandas as pd
    import tensorflow as tf
    from sklearn.metrics import confusion_matrix, f1_score
    from collections import namedtuple
    import numpy as np
    
    print(f"tf.__version {tf.__version__}")
    
    df_train = pd.read_csv(train_csv_dataset.path, sep=",", header=0, index_col=None)
    df_test = pd.read_csv(test_csv_dataset.path, sep=",", header=0, index_col=None)
    
    def get_feature_label(df: DataFrame, label_col_name: str):
        """
        local util function to split feature and label dataframe
        """
        all_feature_cols_mask = ~df_train.columns.isin([label_col_name])
        x_train_l = df.loc[:, all_feature_cols_mask]
        y_train_l = pd.get_dummies(df[label_col_name])
        # y_train_l = pd.get_dummies(df[label_col_name]).values
        return x_train_l, y_train_l
        
    X_train, y_train = get_feature_label(df_train, label_col_name)
    print("train df")
    print(X_train)
    print(y_train)
    
    print("train values")
    print(X_train.to_numpy())
    print(y_train.to_numpy())
    
    print(f"training with batch size: {batch_size}, epoch: {epochs}")
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation=tf.nn.relu),
        tf.keras.layers.Dense(10, activation=tf.nn.relu),
        tf.keras.layers.Dense(3, activation=tf.nn.softmax)
    ])
    # config
    model.compile(
        optimizer=tf.optimizers.Adam(),
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    
    # model.compile(
    #     optimizer=tf.optimizers.RMSprop(),
    #     loss='categorical_crossentropy',
    #     metrics=['accuracy'])
    
    # train model
    model.fit(X_train.to_numpy(), y_train.to_numpy(), batch_size=batch_size, epochs=epochs)
    
    # show the shape and param of the ANN model
    # note: summary() must be called after fit() or build()
    model.summary()
    
    # evaluate model
    print("evaluate model")
    X_test, y_test = get_feature_label(df_test, label_col_name)
    loss, accuracy = model.evaluate(X_test.to_numpy(), y_test.to_numpy(), verbose=0)
    print(f"Test loss: {loss}")
    print(f"Test accuracy: {accuracy}")
      
    # predict model 
    y_pred: np.ndarray = model.predict(X_test.to_numpy())
    
    # convert the numpy triple probability to prediction of iris category
    actual = np.argmax(y_test.to_numpy(), axis=1)
    predicted = np.argmax(y_pred, axis=1)
    print(f"actual: {actual}")
    print(f"predicted: {predicted}")
    
    # for multi-class cls with weighted f1
    AVERAGE_MODE = "weighted"
    f1 = f1_score(actual, predicted, average=AVERAGE_MODE) 
    print(f"Test f1: {f1}")
    
    def create_iris_confusion_matrix_helper(actual, predicted) -> DataFrame:
        """ create a confustion matrix DataFrame
        @param: actual: array-like of shape (n_samples,)
        @param: predicted: array-like of shape (n_samples,)
        """
        label_map = {
            "0": "Iris-setosa",
            "1": "Iris-versicolor",
            "2": "Iris-virginica"
        }
        # the keys are str, need to be converted to int as labels
        # for the funciton confusion_matrix
        labels = list(map(int, label_map.keys()))
        vocab = list(label_map.values())
        # vocab = list(np.unique(actual))
        cm: np.ndarray = confusion_matrix(actual, predicted, labels = labels)
        return pd.DataFrame(cm, index=vocab, columns=vocab)    
    
    conf_mat_df = create_iris_confusion_matrix_helper(actual, predicted)
    print(conf_mat_df)
    
    # write conf_mat_df to the output path
    # with open(cm_output_path, 'w', encoding="utf-8") as f:
    #     conf_mat_df.to_csv(f, index=True, encoding="utf-8")
    conf_mat_df.to_csv(cm_output_dataset.path, index=True, encoding="utf-8")    
        
    mod_scores = namedtuple('ModelScores',['model', 'acc', 'f1'])    
    return mod_scores("tf", accuracy, f1)

### Define Confusion Matrix Visualization Component
* Visualization with Kubeflow: https://www.kubeflow.org/docs/components/pipelines/v1/sdk/output-viewer/

In [12]:
# @partial(
#     create_component_from_func,
#     output_component_file=f"{PREFIX}visualize_confusion_matrix_component.yaml",
#     base_image=settings.base_python_image, # use python base image
#     packages_to_install=[
#         f"pandas=={settings.pandas_version}",
#     ]    
# )
# def confusion_visualization(csv_path: InputPath(), mlpipeline_ui_metadata_path: OutputPath(str)):
#     """Provide confusion matrix csv file to visualize as metrics."""
#     import json
#     import pandas as pd
#     from typing import List, Tuple
#     from collections import namedtuple
    
#     cm_df = pd.read_csv(csv_path, index_col=0)
#     print(cm_df)
    
#     def make_pair_dataframe(df: pd.DataFrame) -> Tuple[pd.DataFrame, List]: 
#         """
#         this function constructs a target, predicted, count pair dataframe,
#         to be used for kf v1 confusion matrix visualiation
#         """
#         # create a vocabular list from the dataframe column names as List[str]
#         vocab = list(map(str, df.columns.values.tolist()))
#         data = []
#         # use df.to_numpy() to remove the header row for enumerate
#         for target_index, target_row in enumerate(cm_df.to_numpy()):
#             for predicted_index, count in enumerate(target_row):
#                 data.append((vocab[target_index], vocab[predicted_index], count))
            
#         # convert confusion_matrix pair dataset to dataframe
#         df = pd.DataFrame(data, columns=['target', 'predicted', 'count'])

#         # change 'target', 'predicted' to strings
#         # df[['target', 'predicted']] = (df[['target', 'predicted']].astype("string")).astype("string")

#         vocab = cm_df.columns.values.tolist()
#         return df, vocab
    
#     pair_df, vocab = make_pair_dataframe(cm_df)
#     # print(pair_df)
#     # print(pair_df.dtypes)
#     # print(f"type of vocab: {type(vocab)}")
#     # print(vocab)
    
#     '''
#     Important: 
#         make sure the 'label' is a List[str]
#         use list(map(str, vocab)) to convert, if not
#     '''
#     metadata = {
#         'outputs' : [{
#           'type': 'confusion_matrix',
#           'format': 'csv',
#           'schema': [
#             {'name': 'target', 'type': 'CATEGORY'},
#             {'name': 'predicted', 'type': 'CATEGORY'},
#             {'name': 'count', 'type': 'NUMBER'},
#           ],
#           'source': pair_df.to_csv(header=False, index=False),
#           'storage': 'inline',
#           'labels': vocab,
#         }]
#     }
    
#     with open(mlpipeline_ui_metadata_path, 'w') as metadata_file:
#         json.dump(metadata, metadata_file)

#### Create Run Output Component
Reference to KF v1 Pipeline Metrics: 
* https://www.kubeflow.org/docs/components/pipelines/sdk/pipelines-metrics/

The `name` and `numberValue` variable shall not be changed.
```json
            {
                "name": "some name",
                "numberValue": some value,
            },
            {
                "name": "f1 score",
                "numberValue": metric_tf,
            },
```

In [13]:
# @create_component_from_func
# def show_performance_metrics(
#     model_code: str,
#     metric_f1: float,
#     metric_acc: float,
#     mlpipeline_metrics_path: OutputPath("Metrics"),
# ):
#     import json
#     metrics = {
#         "metrics": [
#             {
#                 "name": f"{model_code}-acc-score", 
#                 "numberValue": metric_acc,
#                 "format": "PERCENTAGE"
#             },
#             {
#                 "name": f"{model_code}-f1-score",
#                 "numberValue": metric_f1,
#                 "format": "PERCENTAGE"
#             },
#         ],
#     } 
#     with open(mlpipeline_metrics_path, "w") as f:
#         json.dump(metrics, f)

### Define Helper Function
Difference between 2Gi and 2G\
https://stackoverflow.com/questions/50804915/kubernetes-size-definitions-whats-the-difference-of-gi-and-g/50805048#50805048

In [14]:
def gen_compiled_file_path(file_name: str, prefix="", pipeline_path_dir="./compiled", kfp_version="v2_") -> str:
    """
    In KFP SDK v2, YAML is the preferred serialization format. Json will also work
    Reference:
    https://www.kubeflow.org/docs/components/pipelines/v2/migration/#sdk-v1-v2-namespace-to-sdk-v2
    """
    return f"{pipeline_path_dir}/{prefix}{kfp_version}{file_name}.yaml"

In [15]:
from kfp.dsl.pipeline_task import PipelineTask

def set_res_limit(task: PipelineTask, mem_req="200Mi", cpu_req="2000m", mem_lim="4000Mi", cpu_lim='4000m') -> PipelineTask:
    """set the resource limit for cpu and memory, no cpu and memory requirement sofar.
    should the limit is set to small, the Task Pod would be stopped by kubernetes with OOMKilled status.
    
    Args:
        task(PipelineTask): the KFP PipelineTask which need to be set the cpu and memory limits
        cpu_limit(str): the str representation of cpu limit e.g. '1' as one cpu time, '0.5' as 1/2 cpu time
        mem_limit(str): the str representation of memory limit e.g. '500M' for 500MB RAM
        
    Return:
        (PipelineTask): the PipelineTask with the desired limitations set
    """
    # return task.set_cpu_limit('1').set_memory_limit('500M')
    return task.set_cpu_request(cpu_req)\
            .set_cpu_limit(cpu_lim)\
            .set_memory_request(mem_req)\
            .set_memory_limit(mem_lim)

## Define Pipeline
* Intro Kubeflow pipeline: https://v1-5-branch.kubeflow.org/docs/components/pipelines/introduction/
* Kubeflow pipeline SDK v1: https://v1-5-branch.kubeflow.org/docs/components/pipelines/sdk/sdk-overview/

In [16]:
@pipeline(
    name = EXPERIMENT_NAME,
    description = EXPERIMENT_DESC
)
def custom_pipeline(epochs: int):
    '''local variable'''
    # no_artifact_cache = False
    # artifact_cache_today = True
    # cache_setting = no_artifact_cache
    label_col_name = "label"
    feature_col_name = "features"
    batch_size = 50
    # epochs = 100
    
    '''pipeline'''   
    download_task = download_data()
    # 200 MB ram and 1 cpu
    download_task = set_res_limit(download_task, mem_req="500Mi", cpu_req="1000m")
    # set the download caching to be 1day, disable caching with P0D
    # download_task.set_caching_options(enable_caching=cache_setting)
    # download_task.set_display_name("download-iris-data")
    
    # variable name "output_path", all "_path" will be removed by sysem
    process_data_task = process_data(
        label_col_name=label_col_name, 
        feature_col_name=feature_col_name,
        csv_dataset=download_task.output)
    process_data_task = set_res_limit(process_data_task, mem_req="500Mi", cpu_req="1000m")
    # BUG, change the display name, the artifacts can not be found.
    # process_data_task.set_caching_options(enable_caching=cache_setting)
    # process_data_task.set_display_name("split-iris-data")
    
    # train tensorflow model, input variable are all removed with _path  
    train_tf_task = train_tf(
        batch_size=batch_size,
        epochs=epochs,
        label_col_name=label_col_name,
        train_csv_dataset=process_data_task.outputs["train_csv_dataset"],
        test_csv_dataset=process_data_task.outputs["test_csv_dataset"]
    )
    train_tf_task = set_res_limit(train_tf_task, mem_req="2Gi", cpu_req="2000m")
    # train_tf_task.set_caching_options(enable_caching=cache_setting)
    # train_tf_task.set_display_name("train-tf-model")
    
    # visualize confusion matrix
    # visualization_task = confusion_visualization(train_tf_task.outputs["cm_output"])
    # visualization_task = pod_resource_transformer(visualization_task, mem_req="200Mi", cpu_req="500m")
    # visualization_task.execution_options.caching_strategy.max_cache_staleness = cache_setting
    # visualization_task.set_display_name("visualize confusion matrix")
    
    # show model outputs
    # show_ml_metrics_task = show_performance_metrics(
    #     model_code = train_tf_task.outputs['model'],
    #     metric_f1 = train_tf_task.outputs['f1'],
    #     metric_acc =  train_tf_task.outputs['acc'],
    # )
    # show_ml_metrics_task = pod_resource_transformer(show_ml_metrics_task, mem_req="200Mi", cpu_req="500m")
    # show_ml_metrics_task.execution_options.caching_strategy.max_cache_staleness =cache_setting
    # show_ml_metrics_task.set_display_name("output model metrics")

my_pipeline = custom_pipeline

### (optional) pipeline compile step
use the following command to compile the pipeline to IR YAML serialized format

In [17]:
from kfp import compiler
import os

component_list = []

# download_data
component_list.append((
    download_data, 
    gen_compiled_file_path(file_name="comp_download_iris_data", prefix=PREFIX)
)) 
# process_data
component_list.append((
    process_data, 
    gen_compiled_file_path(file_name="comp_process_iris_data", prefix=PREFIX)
))
# train_tf
component_list.append((
    process_data, 
    gen_compiled_file_path(file_name="comp_train_tf_iris_data", prefix=PREFIX)
))

my_pipeline_file_name = "pipeline_iris_demo"
pipeline_package_path = gen_compiled_file_path(my_pipeline_file_name, prefix=PREFIX)

pipeline_path_dir="./compiled"
if not os.path.exists(pipeline_path_dir):
    os.makedirs(pipeline_path_dir)

# compile component, instead of using output_component_file in the @dsl.component decorator
for comp in component_list:
    compiler.Compiler().compile(
        pipeline_func=comp[0],
        package_path=comp[1],
    )

# compile pipeline
compiler.Compiler().compile(
    pipeline_func=my_pipeline,
    package_path=pipeline_package_path
)

### Create Experiment Run

create run label with current data time
```python
from datetime import datetime
from pytz import timezone as ptimezone
ts = datetime.strftime(datetime.now(ptimezone("Europe/Berlin")), "%Y-%m-%d %H-%M-%S")
print(ts)
```

Reference:
* https://stackoverflow.com/questions/25837452/python-get-current-time-in-right-timezone/25887393#25887393

In [18]:
from datetime import datetime
from pytz import timezone as ptimezone

def get_local_time_str(target_tz_str: str = "Europe/Berlin", format_str: str = "%Y-%m-%d %H-%M-%S") -> str:
    """
    this method is created since the local timezone is miss configured on the server
    @param: target timezone str default "Europe/Berlin"
    @param: "%Y-%m-%d %H-%M-%S" returns 2022-07-07 12-08-45
    """
    target_tz = ptimezone(target_tz_str) # create timezone, in python3.9 use standard lib ZoneInfo
    # utc_dt = datetime.now(datetime.timezone.utc)
    target_dt = datetime.now(target_tz)
    return datetime.strftime(target_dt, format_str)

### Config pipeline run
* Setting imagePullSecretes for Pipeline with SDK: https://github.com/kubeflow/pipelines/issues/5843#issuecomment-859799181

In [19]:
# from kubernetes import client as k8s_client
# pipeline_config = dsl.PipelineConf()

# pipeline_config.set_image_pull_secrets([k8s_client.V1ObjectReference(name=K8_GIT_SECRET_NAME, namespace=NAME_SPACE)])
# pipeline_config.set_image_pull_policy("Always")
# pipeline_config.set_image_pull_policy("IfNotPresent")

pipeline_args = {
    "epochs": 100
}

In [20]:
RUN_NAME = f"{PREFIX}v2_iris_demo {get_local_time_str()}"
# RUN_NAME = RUN_NAME.replace("_","-")

# CACHING_SETTING = False
CACHING_SETTING = True

# client = kfp.Client()
run = client.create_run_from_pipeline_func(
    pipeline_func=custom_pipeline,
    arguments = pipeline_args, #{}
    run_name = RUN_NAME,
    # pipeline_conf=pipeline_config,
    experiment_name=EXPERIMENT_NAME,
    namespace=NAMESPACE,
    enable_caching=CACHING_SETTING,
)
run

RunPipelineResult(run_id=20aba077-3a13-46af-8de8-1c4a3f3b84e3)