In [1]:
import sys

In [2]:
!{sys.executable} -m pip install kfp==2.0.0b13 kfp-kubernetes==1.0.0



In [7]:
from kfp.components.pipeline_task import PipelineTask

def set_res_limit(task: PipelineTask, cpu_limit: str, mem_limit: str) -> PipelineTask:
    """set the resource limit for cpu and memory, no cpu and memory requirement sofar.
    should the limit is set to small, the Task Pod would be stopped by kubernetes with OOMKilled status.
    
    Args:
        task(PipelineTask): the KFP PipelineTask which need to be set the cpu and memory limits
        cpu_limit(str): the str representation of cpu limit e.g. '1' as one cpu time, '0.5' as 1/2 cpu time
        mem_limit(str): the str representation of memory limit e.g. '500M' for 500MB RAM
        
    Return:
        (PipelineTask): the PipelineTask with the desired limitations set
    """
    # return task.set_cpu_limit('1').set_memory_limit('500M')
    return task.set_cpu_limit(cpu_limit).set_memory_limit(mem_limit)

In [8]:
from kfp import dsl
from kfp import kubernetes
from kfp import compiler

@dsl.component
def make_data():
    with open('/data/file.txt', 'w') as f:
        f.write('my data')

@dsl.component
def read_data():
    with open('/reused_data/file.txt') as f:
        print(f.read())

@dsl.pipeline
def my_pipeline():
    pvc1 = kubernetes.CreatePVC(
        # can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC
        pvc_name_suffix='-my-pvc',
        access_modes=['ReadWriteOnce'],
        size='1Gi',
        storage_class_name='default',       
    )

    task1 = make_data()
    task1 = set_res_limit(task=task1, cpu_limit='1', mem_limit='500M')
    # normally task sequencing is handled by data exchange via component inputs/outputs
    # but since data is exchanged via volume, we need to call .after explicitly to sequence tasks
    task2 = read_data().after(task1)
    task2 = set_res_limit(task=task2, cpu_limit='1', mem_limit='500M')
    
    # kubernetes.mount_pvc(
    #      task1,
    #      pvc_name=pvc1.outputs['name'],
    #      mount_path='/data',
    # )
    # kubernetes.mount_pvc(
    #      task2,
    #      pvc_name=pvc1.outputs['name'],
    #      mount_path='/reused_data',
    # )

    # wait to delete the PVC until after task2 completes
    delete_pvc1 = kubernetes.DeletePVC(
        pvc_name=pvc1.outputs['name']).after(task2)

In [9]:
compiler.Compiler().compile(
    pipeline_func=my_pipeline,
    package_path="./pvc_pipeline.yaml"
)

In [10]:
from kfp.client import Client
client = Client()
NAMESPACE = client.get_user_namespace()
print(NAMESPACE)

kubeflow-kindfor


In [None]:
# ENABLE_CACHING = True
ENABLE_CACHING = False
EXPERIMENT_NAME = "demo"

run = client.create_run_from_pipeline_func(
    pipeline_func=my_pipeline,
    arguments = {},
    run_name="my pvc test",
    experiment_name = EXPERIMENT_NAME,
    namespace=NAMESPACE,
    enable_caching=ENABLE_CACHING,
)