In [1]:
import sys

In [2]:
#!{sys.executable} -m pip install --upgrade --user -r /home/jovyan/llm-examples/requirements.txt --extra-index-url https://download.pytorch.org/whl/cu118

In [3]:
import applyllm as ap

In [4]:
ap.hello()

Hello from ApplyLlm


In [5]:
# !{sys.executable} -m pip install --upgrade --user kfp==1.8.22

In [6]:
import kfp
from kfp import dsl
from functools import partial
from kfp.dsl import (
    pipeline,
    ContainerOp
)
from kfp.components import (
    InputPath,
    OutputPath,
    create_component_from_func
)
client = kfp.Client()
NAMESPACE = client.get_user_namespace()
EXPERIMENT_NAME = 'demo' # Name of the experiment in the KF webapp UI
EXPERIMENT_DESC = 'custom package registry test'

print(NAMESPACE)

kubeflow-kindfor


## Install external package
V2 Kubeflow index urls
* https://github.com/kubeflow/pipelines/pull/7453

V1 Git repo
* git+https://github.com/pexpect/pexpect.git@master

v1 Git repo raw whl
* https://{token}@raw.githubusercontent.com/{user}/{repo}/master/{name.whl}

```
https://raw.githubusercontent.com/yingding/llm-examples/main/src/dist/applyllm-0.0.1-py3-none-any.whl
```

we can also see the whl from the private registry
```
https://gitlab.lrz.de/api/v4/projects/150553/packages/pypi/simple/applyllm
```

While click on the download link, we will get the download link:
```
https://gitlab.lrz.de/api/v4/projects/150553/packages/pypi/files/08ee3f757d1190c64bed6791d9da26a10f159287e3a08dcddd5308905bb2d678/applyllm-0.0.1-py3-none-any.whl#sha256=08ee3f757d1190c64bed6791d9da26a10f159287e3a08dcddd5308905bb2d678
```

we don't need to upload whl to git repo `https://raw.githubusercontent.com/yingding/llm-examples/main/src/dist/applyllm-0.0.1-py3-none-any.whl`

Reference:
* Install whl https://stackoverflow.com/questions/68848055/pip-installing-a-whl-file-from-a-private-github-repository/68943373#68943373

In [7]:
from functools import partial

@partial(
    create_component_from_func,
    output_component_file=f"custom_registry_component.yaml",
    base_image="python:3.10.11", # use tf base image
    packages_to_install=[
        "https://gitlab.lrz.de/api/v4/projects/150553/packages/pypi/files/08ee3f757d1190c64bed6791d9da26a10f159287e3a08dcddd5308905bb2d678/applyllm-0.0.1-py3-none-any.whl#sha256=08ee3f757d1190c64bed6791d9da26a10f159287e3a08dcddd5308905bb2d678",
        # f"git+https://github.com/yingding/llm-examples.git@main",
        f"pandas==1.5.3",
    ], # adding additional libs
    # pip_index_urls=["https://gitlab.lrz.de/api/v4/projects/150553/packages/pypi/simple"]
    # define my private pypi package registry v2 component decorator
)
def custom_comp():
    import applyllm as ap
    ap.hello()

In [8]:
def pod_resource_transformer(op: ContainerOp, mem_req="200Mi", cpu_req="2000m", mem_lim="4000Mi", cpu_lim='4000m'):
    """
    this function helps to set the resource limit for container operators
    op.set_memory_limit('1000Mi') = 1GB
    op.set_cpu_limit('1000m') = 1 cpu core
    """
    return op.set_memory_request(mem_req)\
            .set_memory_limit(mem_lim)\
            .set_cpu_request(cpu_req)\
            .set_cpu_limit(cpu_lim)

In [9]:
@pipeline(
    name = EXPERIMENT_NAME,
    description = EXPERIMENT_DESC
)
def custom_pipeline(epochs: int):
    '''local variable'''
    no_artifact_cache = "P0D"
    artifact_cache_today = "P1D"
    # cache_setting = artifact_cache_today
    cache_setting = no_artifact_cache
    
    '''pipeline'''   
    custom_task = custom_comp()
    # 200 MB ram and 1 cpu
    custom_task = pod_resource_transformer(custom_task, mem_req="500Mi", cpu_req="200m")
    # set the download caching to be 1day, disable caching with P0D
    # download_task.execution_options.caching_strategy.max_cache_staleness = artifact_cache_today
    custom_task.execution_options.caching_strategy.max_cache_staleness = cache_setting
    custom_task.set_display_name("install github packages")

In [10]:
PIPE_LINE_FILE_NAME=f"github_package_pipeline"
kfp.compiler.Compiler().compile(custom_pipeline, f"{PIPE_LINE_FILE_NAME}.yaml")

In [11]:
from datetime import datetime
from pytz import timezone as ptimezone

def get_local_time_str(target_tz_str: str = "Europe/Berlin", format_str: str = "%Y-%m-%d %H-%M-%S") -> str:
    """
    this method is created since the local timezone is miss configured on the server
    @param: target timezone str default "Europe/Berlin"
    @param: "%Y-%m-%d %H-%M-%S" returns 2022-07-07 12-08-45
    """
    target_tz = ptimezone(target_tz_str) # create timezone, in python3.9 use standard lib ZoneInfo
    # utc_dt = datetime.now(datetime.timezone.utc)
    target_dt = datetime.now(target_tz)
    return datetime.strftime(target_dt, format_str)

In [12]:
# from kubernetes import client as k8s_client
pipeline_config = dsl.PipelineConf()

# pipeline_config.set_image_pull_secrets([k8s_client.V1ObjectReference(name=K8_GIT_SECRET_NAME, namespace=NAME_SPACE)])
# pipeline_config.set_image_pull_policy("Always")
pipeline_config.set_image_pull_policy("IfNotPresent")

pipeline_args = {
}

In [13]:
RUN_NAME = f"custom package pipeline {get_local_time_str()}"

# client = kfp.Client()
run = client.create_run_from_pipeline_func(
    pipeline_func=custom_pipeline,
    arguments = pipeline_args, #{}
    run_name = RUN_NAME,
    pipeline_conf=pipeline_config,
    experiment_name=EXPERIMENT_NAME,
    namespace=NAMESPACE,
)

run

RunPipelineResult(run_id=7909fd8d-61cc-423a-93f2-b54f7813c7a0)