In [1]:
## see requirements in byoml_xgboost/environment.yml and byoml_xgboost/requirements.txt
# conda install --file env/byoml_xgboost/environment.yml
# conda activate byoml_xgboost 
# pip install -r env/byoml_xgboost/requirements.txt

In [15]:
from datetime import datetime
from random import random
run_id = f"{datetime.now():%Y-%m-%d}-{1000 * random():04.0f}"
MODEL_NAME=f'xgboost-{run_id}'
MODEL_NAME_PROBS = f'{MODEL_NAME}-probs'
MODEL_NAME_CLASSIFIER = f'{MODEL_NAME}-classifier'
PROFILE = 'render_notebook'

## BYOML XGBoost example

In this notebook we give an example on how to create and upload your XGBoost models. To keep things simple, we will use a toy dataset.

The key take away here to create your model is to either use the booster itself or the sklearn wrapper. Both will be accepted by the BYOML service.

In [16]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification

In [17]:
X, y = make_classification(
    n_samples=2000, n_features=7, n_informative=5, n_redundant=2, weights=[0.8, 0.2],
    scale=[5, 5, 1, 1000, 200, 20, 10], shuffle=False, random_state=42
)

X_data = pd.DataFrame(X, columns=[
    'heat', 'heat2', 'dust', 'light', 'humidity', 'pressure', 'nitrogen_concentration'
])
y_data = pd.DataFrame(y, columns=['ok'])

pd.concat(
    [X_data.head(), y_data.head()], 
    axis=1, 
    keys=['X_data', 'y_data']
)

Unnamed: 0_level_0,X_data,X_data,X_data,X_data,X_data,X_data,X_data,y_data
Unnamed: 0_level_1,heat,heat2,dust,light,humidity,pressure,nitrogen_concentration,ok
0,3.21464,-3.107601,2.663515,-599.783123,67.133968,-40.880974,22.175197,0
1,-0.354232,14.506926,-0.832169,-1501.497188,-42.529406,8.094005,-13.488557,0
2,4.380911,10.794966,1.566156,-1121.889667,-30.364312,-22.922713,10.072141,0
3,6.396967,1.283805,1.444611,-827.627508,220.112282,-37.722183,14.348605,0
4,13.324452,15.490906,1.597028,-673.973149,24.585876,-29.507437,19.428033,0


## Create the model

We can now create the model and save it:

In [18]:
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data, test_size=0.2, random_state=123
)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [20]:
# set xgboost params
param = {
    'max_depth': 3,
    'learning_rate': 0.1,
    'colsample_bytree': 0.3,
    'objective': 'binary:hinge'
}
num_round = 100
bst = xgb.train(param, dtrain, num_round)

## Upload to Waylay

After your model is saved, you can upload it to Waylay, using our Python SDK:

In [21]:
from waylay import WaylayClient, RestResponseError
waylay = WaylayClient.from_profile(PROFILE)

In [22]:
# remove existing models first
import time
for name in [MODEL_NAME, MODEL_NAME_PROBS, MODEL_NAME_CLASSIFIER]:
    try:
        waylay.byoml.model.remove(name)
        display(f'Removed: {name}')
        time.sleep(5) # it takes a few seconds until model name becomes available again
    except RestResponseError as e:
        display(f'Ignored: {e}')


"Ignored: ByomlActionError(404: 'No model 'xgboost-2023-05-09-0088''; DELETE 'https://api.waylay.io/ml/v1/models/xgboost-2023-05-09-0088')"

"Ignored: ByomlActionError(404: 'No model 'xgboost-2023-05-09-0088-probs''; DELETE 'https://api.waylay.io/ml/v1/models/xgboost-2023-05-09-0088-probs')"

"Ignored: ByomlActionError(404: 'No model 'xgboost-2023-05-09-0088-classifier''; DELETE 'https://api.waylay.io/ml/v1/models/xgboost-2023-05-09-0088-classifier')"

In [23]:
waylay.byoml.model.upload(
    model_name=MODEL_NAME, 
    trained_model=bst, 
    framework='xgboost', 
    description='xgboost example'
)

{'createdOn': '2021-01-04T00:00:00+00:00',
 'description': 'xgboost example',
 'framework': 'xgboost',
 'framework_version': '1.3',
 'message': 'Model successfully uploaded',
 'metadata': {'description': 'xgboost example'},
 'name': 'xgboost-2023-05-09-0088',
 'updatedOn': '2021-01-04T00:00:00+00:00',
 'version': '1'}

and check if the output is the same:

In [24]:
bst.predict(dtest)[:5]

array([0., 0., 0., 0., 1.], dtype=float32)

In [25]:
waylay.byoml.model.predict(MODEL_NAME, X_test[:5])

[0.0, 0.0, 0.0, 0.0, 1.0]

In [26]:
waylay.byoml.model.remove(MODEL_NAME)

{'message': 'Model xgboost-2023-05-09-0088: all versions deleted',
 'name': 'xgboost-2023-05-09-0088',
 'versions': ['0.0.1']}

## Create an XGBoost model that outputs probabilities

Sometimes you might want to work with the probabilities directly instead of using the `1` and `0` values. To achieve this we only need a small tweak in the model parameters

In [27]:
param = {
    'max_depth': 3,
    'learning_rate': 0.1,
    'colsample_bytree': 0.3,
    'objective': 'binary:logistic', # set this one to logistic
    'eval_metric': 'logloss' # and this one to error or logloss
}

In [28]:
bst = xgb.train(param, dtrain, num_round)

In [29]:
waylay.byoml.model.upload(
    model_name=MODEL_NAME_PROBS, 
    trained_model=bst, 
    framework='xgboost', 
    description='xgboost probabilities example'
)

{'createdOn': '2021-01-04T00:00:00+00:00',
 'description': 'xgboost probabilities example',
 'framework': 'xgboost',
 'framework_version': '1.3',
 'message': 'Model successfully uploaded',
 'metadata': {'description': 'xgboost probabilities example'},
 'name': 'xgboost-2023-05-09-0088-probs',
 'updatedOn': '2021-01-04T00:00:00+00:00',
 'version': '1'}

In [30]:
bst.predict(dtest)[:5]

array([0.03388389, 0.00636836, 0.07393802, 0.41373524, 0.59628487],
      dtype=float32)

In [31]:
waylay.byoml.model.predict(MODEL_NAME_PROBS, X_test[:5])

[0.03388388827443123,
 0.0063683632761240005,
 0.07393801957368851,
 0.4137352406978607,
 0.5962848663330078]

In [32]:
waylay.byoml.model.remove(MODEL_NAME_PROBS)

{'message': 'Model xgboost-2023-05-09-0088-probs: all versions deleted',
 'name': 'xgboost-2023-05-09-0088-probs',
 'versions': ['0.0.1']}

## Using the XGBClassifier
Instead of directly using a booster we can also use the XGBClassifier as is shown in the example below. 
Keep in mind although the predict method will return 1's and 0's, the uploaded model will output the probabilities as the underlying Booster object is used. 
If this is unwanted behaviour you can set the objective to `binary:hinge`.

In this case we should use the Dataframes and not the DMatrix:

In [33]:
from xgboost import XGBClassifier

In [34]:
model = XGBClassifier(
    max_depth=3,
    learning_rate=0.1,
    colsample_bytree=0.3,
    eval_metric='logloss'
)

In [35]:
model.fit(X_train, y_train.values.ravel())

XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.3, eval_metric='logloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints=None, learning_rate=0.1, max_delta_step=0,
              max_depth=3, min_child_weight=1, missing=nan,
              monotone_constraints=None, n_estimators=100, n_jobs=0,
              num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1,
              scale_pos_weight=1, subsample=1, tree_method=None,
              validate_parameters=False, verbosity=None)

In [36]:
waylay.byoml.model.upload(
    model_name=MODEL_NAME_CLASSIFIER, 
    trained_model=model, 
    framework='xgboost', 
    description='xgboost classifier example'
)

{'createdOn': '2021-01-04T00:00:00+00:00',
 'description': 'xgboost classifier example',
 'framework': 'xgboost',
 'framework_version': '1.3',
 'message': 'Model successfully uploaded',
 'metadata': {'description': 'xgboost classifier example'},
 'name': 'xgboost-2023-05-09-0088-classifier',
 'updatedOn': '2021-01-04T00:00:00+00:00',
 'version': '1'}

And as warned for above, the local model outputs the classes, while the deployed model uses the probabilities:

In [37]:
waylay.byoml.model.predict(MODEL_NAME_CLASSIFIER, X_test[:5])

[0.03388388827443123,
 0.0063683632761240005,
 0.07393801957368851,
 0.4137352406978607,
 0.5962848663330078]

In [38]:
waylay.byoml.model.remove(MODEL_NAME_CLASSIFIER)

{'message': 'Model xgboost-2023-05-09-0088-classifier: all versions deleted',
 'name': 'xgboost-2023-05-09-0088-classifier',
 'versions': ['0.0.1']}