# Intro

Author: Yingding Wang\
Created on: 09.01.2024

This notebook file demonstrate metric visualization of Kubeflow V2 pipeline on Kubeflow 1.8.0 manifests deployment with ml pipeline api server 2.0.5. 

## KFP v2 visualization docs
* KFP sdk pipeline visualization https://www.kubeflow.org/docs/components/pipelines/v1/sdk/output-viewer/
* example https://github.com/kubeflow/pipelines/blob/sdk/release-1.8/samples/test/metrics_visualization_v2.py
* viewer function https://github.com/kubeflow/pipelines/blob/55a2fb5c20011b01945c9867ddff0d39e9db1964/sdk/python/kfp/v2/components/types/artifact_types.py#L255-L256


In [1]:
import sys
from platform import python_version
print (f"current platform python version: {python_version()}")

current platform python version: 3.11.6


In [2]:
# !{sys.executable} -m pip install --upgrade --user kfp[kubernetes]==2.6.0
# !{sys.executable} -m pip install --upgrade --user kfp==2.6.0 kfp-kubernetes==1.1.0 kfp-pipeline-spec==0.3.0 kfp-server-api==2.0.5

In [3]:
!{sys.executable} -m pip list | grep kfp

kfp                           2.6.0
kfp-kubernetes                1.1.0
kfp-pipeline-spec             0.3.0
kfp-server-api                2.0.5


In [4]:
from kfp import dsl

from kfp.dsl import (
    Input,
    Output,
    ClassificationMetrics
)

In [5]:
@dsl.component(
    packages_to_install=['scikit-learn==1.3.2'],
    base_image='python:3.11.7-bullseye'
)
def iris_sgdclassifier(test_samples_fraction: float, metrics: Output[ClassificationMetrics]):
    from sklearn import datasets, model_selection
    from sklearn.linear_model import SGDClassifier
    from sklearn.metrics import confusion_matrix

    iris_dataset = datasets.load_iris()
    train_x, test_x, train_y, test_y = model_selection.train_test_split(
        iris_dataset['data'], iris_dataset['target'], test_size=test_samples_fraction)


    classifier = SGDClassifier()
    classifier.fit(train_x, train_y)
    predictions = model_selection.cross_val_predict(classifier, train_x, train_y, cv=3)
    metrics.log_confusion_matrix(
        ['Setosa', 'Versicolour', 'Virginica'],
        confusion_matrix(train_y, predictions).tolist() # .tolist() to convert np array to list.
    )

#@dsl.pipeline(name='metrics-visualization-pipeline')

@dsl.pipeline()
def metrics_visualization_pipeline(test_samples_fraction: float):
    iris_sgdclassifier_op = iris_sgdclassifier(test_samples_fraction=test_samples_fraction)

my_pipeline = iris_sgdclassifier

In [6]:
def gen_compiled_file_path(file_name: str, pipeline_path_dir="./compiled") -> str:
    """
    In KFP SDK v2, YAML is the preferred serialization format. Json will also work
    Reference:
    https://www.kubeflow.org/docs/components/pipelines/v2/migration/#sdk-v1-v2-namespace-to-sdk-v2
    """
    return f"{pipeline_path_dir}/{file_name}.yaml"

In [7]:
from kfp import compiler
import os

component_file_name = "iris_sgdclassifier_component"
component_file_path = gen_compiled_file_path(component_file_name)

my_pipeline_file_name = "metrics_pipeline_v2"
pipeline_package_path = gen_compiled_file_path(my_pipeline_file_name)

pipeline_path_dir="./compiled"
if not os.path.exists(pipeline_path_dir):
    os.makedirs(pipeline_path_dir)

# compile component, instead of using output_component_file in the @dsl.component decorator
compiler.Compiler().compile(
    pipeline_func=iris_sgdclassifier,
    package_path=component_file_path
)

compiler.Compiler().compile(
    pipeline_func=my_pipeline,
    package_path=pipeline_package_path
)

In [8]:
from kfp.client import Client
import warnings

'''suppress kfp v2 client FutureWarning
https://stackoverflow.com/questions/14463277/how-to-disable-python-warnings/14463362#14463362
'''
with warnings.catch_warnings(action="ignore", category=FutureWarning):
    # kubeflow pipeline poddefault is passing the credential to the client
    client = Client()

NAMESPACE = client.get_user_namespace()
EXPERIMENT_NAME = "demo"
print(NAMESPACE)

args = {
    "test_samples_fraction":0.3
}

# ENABLE_CACHING = True
ENABLE_CACHING = False

kubeflow-kindfor


In [9]:
run = client.create_run_from_pipeline_func(
    pipeline_func = my_pipeline, 
    experiment_name = EXPERIMENT_NAME,
    namespace = NAMESPACE,
    enable_caching=ENABLE_CACHING,
    arguments=args,
)
run

RunPipelineResult(run_id=6a46416e-015b-4cf5-9d5f-14e2bd8304b4)