# ML Pipeline example - XGBoost Training

In [18]:
# nuclio: ignore
# if the nuclio-jupyter package is not installed run !pip install nuclio-jupyter
import nuclio 

### Install and register package dependencied and build commands
Those will convert to container build instructions 

In [19]:
%%nuclio cmd -c
pip install sklearn
pip install xgboost
pip install matplotlib

In [20]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"
#%nuclio config spec.image = ".mlrun/xgb:latest"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


## ML Training code

In [21]:
import xgboost as xgb
import os
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

dtrain = dtest = Y_test = None

def load_dataset():
    global dtrain, dtest, Y_test
    iris = load_iris()
    y = iris['target']
    X = iris['data']
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)
    dtrain = xgb.DMatrix(X_train, label=Y_train)
    dtest = xgb.DMatrix(X_test, label=Y_test)


def xgb_train(context, model_name='model.bst',
            max_depth=6,
            num_class=10,
            eta=0.2,
            gamma=0.1,
            steps=20):
    global dtrain, dtest, Y_test

    if dtrain is None:
        load_dataset()

    # Get params from event
    param = {"max_depth": max_depth,
             "eta": eta, "nthread": 4,
             "num_class": num_class,
             "gamma": gamma,
             "objective": "multi:softprob"}

    # Train model
    xgb_model = xgb.train(param, dtrain, steps)

    preds = xgb_model.predict(dtest)
    best_preds = np.asarray([np.argmax(line) for line in preds])

    context.log_result('accuracy', float(accuracy_score(Y_test, best_preds)))

    os.makedirs('models', exist_ok=True)
    model_file = model_name #os.path.join('models', model_name)
    xgb_model.save_model(model_file)
    context.log_artifact('model', src_path=model_file, labels={'framework': 'xgboost'})

from mlrun.artifacts import PlotArtifact
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from io import BytesIO

def plot_iter(context, iterations, col='accuracy', num_bins=10):
    df = pd.read_csv(BytesIO(iterations.get()))
    x = df['output.{}'.format(col)]
    fig, ax = plt.subplots(figsize=(6,6))
    n, bins, patches = ax.hist(x, num_bins, density=1)
    ax.set_xlabel('Accuraccy')
    ax.set_ylabel('Count')
    context.log_artifact(PlotArtifact('myfig', body=fig))

In [22]:
from sklearn.datasets import load_iris
import pandas as pd

In [5]:
iris = load_iris()
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [7]:
iris_dataset = pd.DataFrame(
    data=iris.data,
    columns=iris.feature_names
)
iris_dataset

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


In [5]:
# nuclio: end-code
# (end-code marker tells nuclio to stop parsing the notebook from this cell)

In [23]:
from mlrun import new_function, code_to_function, NewTask, get_run_db, mlconf, mount_v3io, new_model_server
mlconf.dbpath = '/User/mlrun'
#mlconf.package_path = 'git+https://github.com/mlrun/mlrun.git@development'
import kfp
from kfp import dsl

## Test the code locally 

In [24]:
task = NewTask(handler=xgb_train, out_path='/User/mlrun/data').with_hyper_params({'eta': [0.1, 0.2, 0.3]}, selector='max.accuracy')
run = new_function().run(task)

[mlrun] 2019-11-17 13:39:35,117 starting run xgb_train uid=9bfef3e5be6d4ba5835f1b8fa086be61  -> /User/mlrun


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...86be61,0,Nov 17 13:39:35,completed,xgb_train,repo=https://github.com/zilbermanor/aiweek.gitcommit=8693fcfde2fa67ad7ae6992430ca675448925458kind=handlerowner=admin,,,best_iteration=1accuracy=0.9666666666666667,modeliteration_results


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 9bfef3e5be6d4ba5835f1b8fa086be61 
[mlrun] 2019-11-17 13:39:35,498 run executed, status=completed


## Create a containerized function from the notebook code
We create a function object which defined the code, metadata, execution and build instructions <br>

later on we build the image (so we dont have to repeat this every run)

In [25]:
# create a job from the notebook, attache it to iguazio data fabric (v3io)
fn = code_to_function('training')

In [26]:
fn.build(image='mlrun/xgb:latest')

[mlrun] 2019-11-17 13:40:40,570 building image (mlrun/xgb:latest)
FROM python:3.6-jessie
WORKDIR /run
RUN pip install sklearn
RUN pip install xgboost
RUN pip install matplotlib
RUN pip install mlrun
ENV PYTHONPATH /run
[mlrun] 2019-11-17 13:40:40,581 Pod mlrun-build-9jpzb created
..
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:0318d80cb241983eda20b905d77fa0bfb06e29e5aabf075c7941ea687f1c125a: no such file or directory 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Built cross stage deps: map[]                
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:031

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-26-21126060ad20>", line 1, in <module>
    fn.build(image='mlrun/xgb:latest')
  File "/User/.pythonlibs/lib/python3.6/site-packages/mlrun/runtimes/container.py", line 49, in build
    ready = self._build_image(watch, with_mlrun)
  File "/User/.pythonlibs/lib/python3.6/site-packages/mlrun/runtimes/container.py", line 78, in _build_image
    ready = build_runtime(self, with_mlrun, watch)
  File "/User/.pythonlibs/lib/python3.6/site-packages/mlrun/builder.py", line 204, in build_runtime
    with_mlrun=with_mlrun)
  File "/User/.pythonlibs/lib/python3.6/site-packages/mlrun/builder.py", line 180, in build_image
    return k8s.run_job(kpod)
  File "/User/.pythonlibs/lib/python3.6/site-packages/mlrun/k8s_utils.py", line 130, in run_job
    return self.watch(pod_name, namespace, timeo

KeyboardInterrupt: 

## Create and run the pipeline

In [12]:
artifacts_path = 'v3io:///users/admin/mlrun/kfp/{{workflow.uid}}/'

In [13]:
@dsl.pipeline(
    name='My XGBoost training pipeline',
    description='Shows how to use mlrun.'
)
def xgb_pipeline(
   eta = [0.1, 0.2, 0.3], gamma = [0.0, 0.1, 0.2, 0.3]
):
    fn.with_code()  # update the code from notebook
    train = fn.as_step(
        NewTask(handler='xgb_train', out_path=artifacts_path, outputs=['model'])\
                .with_hyper_params({'eta': eta, 'gamma': gamma}, selector='max.accuracy'),
        name='xgb_train').apply(mount_v3io())
    
    # deploy the model using nuclio functions
    srvfn = new_model_server('mysrv3', model_class='XGBoostModel', filename='nuclio_serving.ipynb')
    deploy = srvfn.with_v3io('User','~/').deploy_step(project = 'xgb', models={'netops_v1': train.outputs['model']})
    
    # feed 1st step results into the secound step
    plot = fn.as_step(
        NewTask(handler='plot_iter', out_path=artifacts_path, 
                inputs={'iterations': train.outputs['iteration_results']}), 
        name='plot').apply(mount_v3io()) 
    

### Create a KubeFlow client and submit the pipeline with parameters

In [14]:
# for debug generate the pipeline dsl
#kfp.compiler.Compiler().compile(xgb_pipeline, 'mlrunpipe.yaml')

In [15]:
client = kfp.Client(namespace='default-tenant')
arguments = {'eta': [0.05, 0.10, 0.20, 0.30], 'gamma': [0.0, 0.1, 0.2, 0.3]}
run_result = client.create_run_from_pipeline_func(xgb_pipeline, arguments, run_name='xgb 1', experiment_name='xgb')



### See the run status and results in the run database

In [16]:
# connect to the run db 
db = get_run_db().connect()

In [17]:
# query the DB with filter on workflow ID (only show this workflow) 
db.list_runs('', labels=f'workflow={run_result.run_id}').show()

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...80dc02,0,Nov 16 15:20:06,completed,plot,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-1544438956,iterations,,,myfig.html
...c0abf8,16,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.3gamma=0.3,accuracy=0.9666666666666667,model
...c0abf8,15,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.2gamma=0.3,accuracy=1.0,model
...c0abf8,14,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.1gamma=0.3,accuracy=0.9666666666666667,model
...c0abf8,13,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.05gamma=0.3,accuracy=1.0,model
...c0abf8,12,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.3gamma=0.2,accuracy=0.9666666666666667,model
...c0abf8,11,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.2gamma=0.2,accuracy=0.9,model
...c0abf8,10,Nov 16 15:19:56,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.1gamma=0.2,accuracy=1.0,model
...c0abf8,9,Nov 16 15:19:55,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.05gamma=0.2,accuracy=0.9333333333333333,model
...c0abf8,8,Nov 16 15:19:55,completed,xgb_train,workflow=8cb631e6-0c97-4766-ba06-86cd4b39a648v3io_user=adminkind=localowner=roothost=my-xgboost-training-pipeline-4f74g-704686975,,eta=0.3gamma=0.1,accuracy=1.0,model
