# Using MLRUN function locally, as a Kubernetes Job, and in a Workflow

In [None]:
# for loading specific MLRUN git version 
!pip uninstall -y mlrun
!pip install git+https://github.com/mlrun/mlrun.git@development

In [1]:
# nuclio: ignore
# if the nuclio-jupyter package is not installed run !pip install nuclio-jupyter
import nuclio 

## Define function and its dependencies 

In [2]:
%nuclio cmd -c pip install pandas
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


In [3]:
import os

def training(context, p1=1, p2=2):
    # access input metadata, values, and inputs
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    context.logger.info('started training')
    
    # do some training 
    
    # log the run results (scalar values)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    
    # add a lable/tag to this run 
    context.set_label('category', 'tests')
    
    # log a simple artifact + label the artifact 
    context.log_artifact('model.txt', body=b'abc is 123', labels={'framework': 'xgboost'})

def validation(context, model):
    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    #model = context.get_object('model', model)
    print('file - {}:\n{}\n'.format(model.url, model.get()))
    
    context.logger.info('started validation')
    
    context.log_artifact('validation.html', body=b'<b> validated </b>', viewer='web-app')

def listfiles(context, path='/'):
    l = os.listdir(path)
    print(l)
    return '{} contain {} files'.format(path, len(l))

## Load MLRUN and specify defaults 

In [4]:
# nuclio: end-code
# (end-code marker tells nuclio to stop parsing the notebook from this cell)

# set mlrun db path (can also be specified in run_start command)
%env MLRUN_DBPATH=/User/mlrun
#%env MLRUN_PACKAGE_PATH=git+https://github.com/mlrun/mlrun.git@development

from mlrun import new_function, code_to_function, NewTask
from mlrun.platforms import mount_v3io

env: MLRUN_DBPATH=/User/mlrun


## Test the code locally
the functions above can be tested locally, parameters, inputs, and outputs can be specified in the API or the runspec object<br>
we create a `function` which defines the runtime environment (type, code, image, ..) and `run` a tasks/experiments using that function <br>
(we use the `local` runtime by default, later on we will use a `job` runtime for running containers, and can use other runners like MpiJob, Spark, Dask, Nuclio, ..)

in each run we can specify the function, inputs, parameters/hyper-parameters, etc. (check the `RunTemplate` class for details)<br>
in Jupyter runs print a summary table with metadata and links to data artifacts, this can be disabled with `visible=False` in `.run()`

In [5]:
fn = new_function()
list_run = fn.run(handler=listfiles, params={'path': '/User'})

[mlrun] 2019-11-16 12:22:39,368 starting run listfiles uid=03bb83ae16454fa68bb065dc9930ffe8  -> /User/mlrun
['.bash_profile', '.bashrc', '.config', '.gitconfig', '.gitignore', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.kube', '.local', '.pythonlibs', '.viminfo', '.vimrc', '1', 'LICENSE', 'README.md', 'ai-week', 'assets', 'customers', 'demos', 'examples', 'experiment-tracking', 'getting-started', 'igz-tutorials-get.sh', 'kfserving', 'mlrun', 'mlrun-demos', 'mlrun_server.ipynb', 'nuclio-jupyter', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...30ffe8,0,Nov 16 12:22:39,completed,listfiles,repo=https://github.com/mlrun/mlrun.gitcommit=80b2805654fd1895e141b35e8740c07b0eb01ae1kind=handlerowner=adminhost=jupyter-4zzec664c4-cw71h-795444cd6c-2rqxp,,path=/User,return=/User contain 33 files,


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 03bb83ae16454fa68bb065dc9930ffe8 
[mlrun] 2019-11-16 12:22:39,630 run executed, status=completed


## Running and linking multiple tasks
in the next example we run two functions, `training` and `validation` and we pass the result from one to the other.<br>
we will see in the 'job' example that linking works even when the tasks run on different processes or containers, or in a workflow.

In [6]:
train_run = fn.run(handler=training, params={'p1': 5})
model_path = train_run.output('model.txt')
validation_run = fn.run(handler=validation, inputs={'model': model_path})

[mlrun] 2019-11-16 12:24:22,525 starting run training uid=16b72fb8ce2847c2b0609e0b8720a9e5  -> /User/mlrun
Run: training (uid=16b72fb8ce2847c2b0609e0b8720a9e5)
Params: p1=5, p2=2
[mlrun] 2019-11-16 12:24:22,688 started training



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...20a9e5,0,Nov 16 12:24:22,completed,training,repo=https://github.com/mlrun/mlrun.gitcommit=80b2805654fd1895e141b35e8740c07b0eb01ae1kind=handlerowner=adminhost=jupyter-4zzec664c4-cw71h-795444cd6c-2rqxpcategory=tests,,p1=5,accuracy=10loss=15,model.txt


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 16b72fb8ce2847c2b0609e0b8720a9e5 
[mlrun] 2019-11-16 12:24:22,750 run executed, status=completed
[mlrun] 2019-11-16 12:24:22,750 starting run validation uid=c16d21e26f224d8e8f53afe929709d3a  -> /User/mlrun
Run: validation (uid=c16d21e26f224d8e8f53afe929709d3a)
file - model.txt:
b'abc is 123'

[mlrun] 2019-11-16 12:24:22,889 started validation



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...709d3a,0,Nov 16 12:24:22,completed,validation,repo=https://github.com/mlrun/mlrun.gitcommit=80b2805654fd1895e141b35e8740c07b0eb01ae1kind=handlerowner=adminhost=jupyter-4zzec664c4-cw71h-795444cd6c-2rqxp,model,,,validation.html


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid c16d21e26f224d8e8f53afe929709d3a 
[mlrun] 2019-11-16 12:24:22,941 run executed, status=completed


## Define cluster jobs and build images 
in order to use in a cluster we need to package our code and dependencies<br>
the `code_to_function` call will automatically form a `Function` with list of dependencies and runtime configuration<br>
you can apply KubeFlow modifiers to configure resources like Volumes, `mount_v3io()` adds an iguazio v3io volume (Home of current user) to the function 

the `build()` command is optional it pre builds all the dependencies, so the runs will be faster. note the code and params can be updated per run

In [7]:
# create an ML function from the notebook, attache it to iguazio data fabric (v3io)
fn = code_to_function(runtime='job').apply(mount_v3io())

# prepare an image from the dependencies, so we wont need to build the image every run 
fn.build(image='mlrun/nuctest:latest')

[mlrun] 2019-11-16 12:25:05,354 building image (mlrun/nuctest:latest)
FROM python:3.6-jessie
WORKDIR /run
RUN pip install pandas
RUN pip install mlrun
ENV PYTHONPATH /run
[mlrun] 2019-11-16 12:25:05,357 using in-cluster config.
[mlrun] 2019-11-16 12:25:05,374 Pod mlrun-build-lk5mh created
..
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:0318d80cb241983eda20b905d77fa0bfb06e29e5aabf075c7941ea687f1c125a: no such file or directory 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Built cross stage deps: map[]                
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/s

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f25d7229f98>

### Run the function on the cluster (build or use pre-built image)
note the listfiles call will return the same results as in the local run since the function shares the same filesystem <br>
`with_code()` will inject the latest code to the function, in case we made changes (it doesnt require a new build)

In [9]:
fn.run(handler=listfiles, params={'path': '/User'})

[mlrun] 2019-11-16 12:43:03,576 starting run listfiles uid=09f579f7b89d4b25ba3b635101397b68  -> /User/mlrun
[mlrun] 2019-11-16 12:43:03,739 Pod listfiles-fcp75 created
..
[mlrun] 2019-11-16 12:43:09,209 starting run listfiles uid=09f579f7b89d4b25ba3b635101397b68  -> /User/mlrun
['.bash_profile', '.bashrc', '.config', '.gitconfig', '.gitignore', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.kube', '.local', '.pythonlibs', '.viminfo', '.vimrc', '1', 'LICENSE', 'README.md', 'ai-week', 'assets', 'customers', 'demos', 'examples', 'experiment-tracking', 'getting-started', 'igz-tutorials-get.sh', 'kfserving', 'mlrun', 'mlrun-demos', 'mlrun_server.ipynb', 'nuclio-jupyter', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']

type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 09f579f7b89d4b25ba3b635101397b68 
[mlrun] 2019-11-16 12:43:09,341 run executed, status=completed


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...397b68,0,Nov 16 12:43:09,completed,listfiles,repo=https://github.com/mlrun/mlrun.gitcommit=80b2805654fd1895e141b35e8740c07b0eb01ae1kind=localowner=adminv3io_user=adminhost=listfiles-fcp75,,path=/User,return=/User contain 33 files,


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 09f579f7b89d4b25ba3b635101397b68 
[mlrun] 2019-11-16 12:43:14,376 run executed, status=completed


<mlrun.model.RunObject at 0x7f25d55b19e8>

In [10]:
# define a run template, specify the artifacts output path, and add a label (can be used for search later)
run_base = NewTask(out_path='/User/mlrun/data').set_label('stage', 'dev')

In [14]:
# run our training task, with hyper params, and select the one with max accuracy
run = run_base.copy().with_hyper_params({'p1': [2,6,4]}, selector='max.accuracy')
train_run = fn.with_code().run(run, handler=training, name='my-training', params={'p1': 9})
model_path = train_run.output('model.txt')

[mlrun] 2019-11-16 12:49:39,663 starting run my-training uid=2f8d2b3195f54cd98627ac5394d3eecb  -> /User/mlrun
[mlrun] 2019-11-16 12:49:39,820 Pod my-training-brthj created
..
[mlrun] 2019-11-16 12:49:45,221 starting run my-training uid=2f8d2b3195f54cd98627ac5394d3eecb  -> /User/mlrun
[mlrun] 2019-11-16 12:49:45,314 started training
Run: my-training (uid=2f8d2b3195f54cd98627ac5394d3eecb-1)
Params: p1=2, p2=2

[mlrun] 2019-11-16 12:49:45,423 started training
Run: my-training (uid=2f8d2b3195f54cd98627ac5394d3eecb-2)
Params: p1=6, p2=2

[mlrun] 2019-11-16 12:49:45,527 started training
Run: my-training (uid=2f8d2b3195f54cd98627ac5394d3eecb-3)
Params: p1=4, p2=2

type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 2f8d2b3195f54cd98627ac5394d3eecb 
[mlrun] 2019-11-16 12:49:45,741 run executed, status=completed


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...d3eecb,0,Nov 16 12:49:45,completed,my-training,stage=devrepo=https://github.com/mlrun/mlrun.gitcommit=80b2805654fd1895e141b35e8740c07b0eb01ae1kind=localowner=adminv3io_user=admin,,p1=9,best_iteration=2accuracy=12loss=18,model.txtiteration_results


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 2f8d2b3195f54cd98627ac5394d3eecb 
[mlrun] 2019-11-16 12:49:50,785 run executed, status=completed


In [15]:
# running validation, use the best model result from the previos step 
fn.run(run_base, handler=validation, name='my-validation', inputs={'model': model_path})

[mlrun] 2019-11-16 12:50:01,328 starting run my-validation uid=d64f374ef8824c6780beebc8bf78ee68  -> /User/mlrun
[mlrun] 2019-11-16 12:50:01,475 Pod my-validation-z9qvz created
..
[mlrun] 2019-11-16 12:50:06,873 starting run my-validation uid=d64f374ef8824c6780beebc8bf78ee68  -> /User/mlrun
[mlrun] 2019-11-16 12:50:06,964 started validation
Run: my-validation (uid=d64f374ef8824c6780beebc8bf78ee68)
file - /User/mlrun/data/2/model.txt:
b'abc is 123'


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid d64f374ef8824c6780beebc8bf78ee68 
[mlrun] 2019-11-16 12:50:07,028 run executed, status=completed


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...78ee68,0,Nov 16 12:50:06,completed,my-validation,stage=devrepo=https://github.com/mlrun/mlrun.gitcommit=80b2805654fd1895e141b35e8740c07b0eb01ae1kind=localowner=adminv3io_user=adminhost=my-validation-z9qvz,model,,,validation.html


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid d64f374ef8824c6780beebc8bf78ee68 
[mlrun] 2019-11-16 12:50:12,071 run executed, status=completed


<mlrun.model.RunObject at 0x7f25d55a0780>

In [16]:
# list all jobs
!mlrun get po 

[mlrun] 2019-11-16 12:51:26,103 using in-cluster config.
state      started          type     name
Succeeded  Nov 16 12:43:03  job      listfiles-fcp75
Succeeded  Nov 16 12:27:31  job      listfiles-wh8bz
Succeeded  Nov 16 12:01:34  build    mlrun-build-8qqhn
Succeeded  Nov 16 12:25:05  build    mlrun-build-lk5mh
Succeeded  Nov 16 12:16:34  build    mlrun-build-pjjht
Succeeded  Nov 16 12:49:39  job      my-training-brthj
Succeeded  Nov 16 12:44:41  job      my-training-dp8sj
Failed     Nov 16 12:43:53  job      my-validation-h45vn
Failed     Nov 16 12:45:03  job      my-validation-k7ptc
Succeeded  Nov 16 12:50:01  job      my-validation-z9qvz


In [17]:
# check job logs
!mlrun watch my-training-dp8sj

[mlrun] 2019-11-16 12:52:40,509 using in-cluster config.

[mlrun] 2019-11-16 12:44:47,139 starting run my-training uid=936e45a7e7e8472197466d2b3729fba5  -> /User/mlrun
[mlrun] 2019-11-16 12:44:47,228 started training
Run: my-training (uid=936e45a7e7e8472197466d2b3729fba5-1)
Params: p1=2, p2=2

[mlrun] 2019-11-16 12:44:47,335 started training
Run: my-training (uid=936e45a7e7e8472197466d2b3729fba5-2)
Params: p1=6, p2=2

[mlrun] 2019-11-16 12:44:47,437 started training
Run: my-training (uid=936e45a7e7e8472197466d2b3729fba5-3)
Params: p1=4, p2=2

type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 936e45a7e7e8472197466d2b3729fba5 
[mlrun] 2019-11-16 12:44:47,632 run executed, status=completed
Pod my-training-dp8sj last status is: succeeded


## Create a KubeFlow Pipeline

In [18]:
import kfp
from kfp import dsl

In [19]:
artifacts_path = 'v3io:///users/admin/mlrun/kfp/{{workflow.uid}}/'

In [20]:
@dsl.pipeline(
    name='job test',
    description='Shows how to use mlrun.'
)
def tr_pipeline(
   p1 = 9
):
    run = NewTask(handler='training', out_path=artifacts_path, outputs=['model.txt']).with_params(p1=p1)
    train = fn.as_step(run).apply(mount_v3io())

In [21]:
kfp.compiler.Compiler().compile(tr_pipeline, 'trpipe.yaml')



In [22]:
client = kfp.Client(namespace='default-tenant')
arguments = {'p1': 8}
run_result = client.create_run_from_pipeline_func(tr_pipeline, arguments, run_name='tr 1', experiment_name='tr')