In [2]:
import pandas as pd
import numpy as np
import bentoml

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer

from sklearn.ensemble import RandomForestClassifier

### Data preparation

In [3]:
data = 'CreditScoring.csv'
df = pd.read_csv(data)

In [4]:
df.columns = df.columns.str.lower()

status_values = {
    1: 'ok',
    2: 'default',
    0: 'unk'
}

df.status = df.status.map(status_values)

home_values = {
    1: 'rent',
    2: 'owner',
    3: 'private',
    4: 'ignore',
    5: 'parents',
    6: 'other',
    0: 'unk'
}

df.home = df.home.map(home_values)

marital_values = {
    1: 'single',
    2: 'married',
    3: 'widow',
    4: 'separated',
    5: 'divorced',
    0: 'unk'
}

df.marital = df.marital.map(marital_values)

records_values = {
    1: 'no',
    2: 'yes',
    0: 'unk'
}

df.records = df.records.map(records_values)

job_values = {
    1: 'fixed',
    2: 'partime',
    3: 'freelance',
    4: 'others',
    0: 'unk'
}

df.job = df.job.map(job_values)

for c in ['income', 'assets', 'debt']:
    df[c] = df[c].replace(to_replace=99999999, value=np.nan)

df = df[df.status != 'unk'].reset_index(drop=True)

In [5]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=11)

df_train = df_train.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

y_train = (df_train.status == 'default').astype('int').values
y_test = (df_test.status == 'default').astype('int').values

del df_train['status']
del df_test['status']

In [6]:
dv = DictVectorizer(sparse=False)

train_dicts = df_train.fillna(0).to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

test_dicts = df_test.fillna(0).to_dict(orient='records')
X_test = dv.transform(test_dicts)

### Random forest

In [7]:
rf = RandomForestClassifier(n_estimators=200,
                            max_depth=10,
                            min_samples_leaf=3,
                            random_state=1)
rf.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, min_samples_leaf=3, n_estimators=200,
                       random_state=1)

ModuleNotFoundError: No module named 'bentoml'

In [16]:

bentoml.sklearn.save("credit_risk_model", rf,     
        custom_objects={
        'dictVectorizer': dv
        }
    )



The "bentoml.sklearn.save" method is being deprecated. Use "bentoml.sklearn.save_model" instead


Model(tag="credit_risk_model:24jb4vcqmsawhlg6", path="/Users/jainendra/bentoml/models/credit_risk_model/24jb4vcqmsawhlg6/")

### BentoML

In [23]:
import numpy as np

import bentoml
from bentoml.io import JSON

model_ref = bentoml.sklearn.get("credit_risk_model:24jb4vcqmsawhlg6")
dv = model_ref.custom_objects['dictVectorizer']

model_runner = model_ref.to_runner()

svc = bentoml.Service("credit_risk_classifier", runners=[model_runner])

Test

In [24]:
application_data={
  "name": "Tim",
  "age": 37,
  "country": "US",
  "rating": 3.14
}

In [25]:
vector = dv.transform(application_data)
prediction = await model_runner.predict.async_run(vector)
print(prediction)

StateException: Runner is not initialized