# Import and prepare data

In [1]:
import sys
sys.version

'3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]'

In [2]:
import numpy as np
import pandas as pd

In [3]:
hmeq = pd.read_csv('hmeq.csv')
hmeq.head()

Unnamed: 0,BAD,LOAN,MORTDUE,VALUE,REASON,JOB,YOJ,DEROG,DELINQ,CLAGE,NINQ,CLNO,DEBTINC
0,1,1100,25860.0,39025.0,HomeImp,Other,10.5,0.0,0.0,94.366667,1.0,9.0,
1,1,1300,70053.0,68400.0,HomeImp,Other,7.0,0.0,2.0,121.833333,0.0,14.0,
2,1,1500,13500.0,16700.0,HomeImp,Other,4.0,0.0,0.0,149.466667,1.0,10.0,
3,1,1500,,,,,,,,,,,
4,0,1700,97800.0,112000.0,HomeImp,Office,3.0,0.0,0.0,93.333333,0.0,14.0,


In [4]:
hmeq.shape

(5960, 13)

In [5]:
# Get y values
y = hmeq.pop('BAD').values

In [6]:
hmeq.dtypes

LOAN         int64
MORTDUE    float64
VALUE      float64
REASON      object
JOB         object
YOJ        float64
DEROG      float64
DELINQ     float64
CLAGE      float64
NINQ       float64
CLNO       float64
DEBTINC    float64
dtype: object

In [7]:
# Get data types for X values
kinds = np.array([dt.kind for dt in hmeq.dtypes])
kinds

array(['i', 'f', 'f', 'O', 'O', 'f', 'f', 'f', 'f', 'f', 'f', 'f'],
      dtype='<U1')

In [8]:
# Select category columns
all_columns = hmeq.columns.values
is_cat = kinds == 'O'
cat_cols = all_columns[is_cat]
cat_cols

array(['REASON', 'JOB'], dtype=object)

# Train ML pipeline

In [9]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import xgboost as xgb

In [10]:
# Create an ML pipeline that imputes and one-hot encodes category columns then trains an xgboost model
cat_si_step = ('si', SimpleImputer(strategy='constant', fill_value='MISSING'))
cat_ohe_step = ('ohe', OneHotEncoder(sparse=False, handle_unknown='ignore'))
cat_steps = [cat_si_step, cat_ohe_step]
cat_pipe = Pipeline(cat_steps)
cat_transformers = [('cat', cat_pipe, cat_cols)]
ct = ColumnTransformer(transformers=cat_transformers)
ml_pipe = Pipeline([('transform', ct), ('xgb', xgb.XGBClassifier())])
ml_pipe.fit(hmeq, y)

Pipeline(memory=None,
     steps=[('transform', ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
         transformer_weights=None,
         transformers=[('cat', Pipeline(memory=None,
     steps=[('si', SimpleImputer(copy=True, fill_value='MISSING', missing_values=nan,
       strategy='constant', verbos...
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1))])

In [11]:
ml_pipe.score(hmeq, y)

0.8013422818791947

In [12]:
from sklearn.externals import joblib

In [13]:
# Save model
!mkdir -p /pymodels/hmeq2
joblib.dump(ml_pipe, '/pymodels/hmeq2/ml_pipe.pickle')

['/pymodels/hmeq2/ml_pipe.pickle']

In [14]:
import py2ds2

In [15]:
py2ds2.create_from_pickle('/pymodels/hmeq2/ml_pipe.pickle',
                          hmeq,
                          'CLASSIFICATION',
                          '/pymodels/hmeq2/score.py',
                          '/pymodels/hmeq2/score.sas',
                          '/pymodels/hmeq2/inputVar.json',
                          '/pymodels/hmeq2/outputVar.json')

In [16]:
%%writefile /pymodels/hmeq2/fileMetadata.json
[
  {
    "role": "inputVariables",
    "name": "inputVar.json"
  },
  {
    "role": "outputVariables",
    "name": "outputVar.json"
  },
  {
    "role": "score",
    "name": "score.sas"
  }
]

Overwriting /pymodels/hmeq2/fileMetadata.json


# Register model to SAS Model Manager

In [17]:
from viyapy import ViyaClient
from viyapy.variables import String, Decimal, Boolean, Date, DateTime, Integer
import getpass

In [18]:
client = ViyaClient('http://eyap-mas.gtp-americas.sashq-d.openstack.sas.com')
password = getpass.getpass()

········


In [19]:
token = client.logon.authenticate('viyademo01', password)

In [20]:
project = client.model_manager.get_project('ddfa342b-3c85-4afe-867e-a74de002c855')

In [21]:
project.create_model(
    name='Python_XGBoost',
    files=[
        '/pymodels/hmeq2/score.py',
        '/pymodels/hmeq2/score.sas',
        '/pymodels/hmeq2/inputVar.json',
        '/pymodels/hmeq2/outputVar.json',
        '/pymodels/hmeq2/fileMetadata.json'
    ]
)

Model <ID: '14dafcc1-3e78-46ae-b238-43abf552ad67', Name: 'Python_XGBoost'>