In [None]:
import pandas as pd
import boto3
import sagemaker
import json
import joblib
import xgboost as xgb
from sklearn.metrics import roc_auc_score

In [None]:
# Set SageMaker and S3 client variables
sess = sagemaker.Session()
region = sess.boto_region_name
s3_client = boto3.client("s3", region_name=region)
sagemaker_role = sagemaker.get_execution_role()

In [None]:
# Set read and write S3 buckets and locations
write_bucket = sess.default_bucket()
read_bucket = sess.default_bucket()

train_data_key = f"train.csv"
test_data_key = f"test.csv"
model_key = f"model"
output_key = f"output"

train_data_uri = f"s3://{read_bucket}/{train_data_key}"
test_data_uri = f"s3://{read_bucket}/{test_data_key}"

In [None]:
#MODEL PARAMETERS
hyperparams = {
                "max_depth": 3,
                "eta": 0.2,
                "objective": "binary:logistic",
                "subsample" : 0.8,
                "colsample_bytree" : 0.8,
                "min_child_weight" : 3
              }

num_boost_round = 100
nfold = 3
early_stopping_rounds = 10

In [None]:
# Set up data input
label_col = "LABEL_ENCODED"
data = pd.read_csv(train_data_uri)
del data['Unnamed: 0']

In [None]:
# Recolumnstraining data and target
train_features = data.drop(label_col, axis=1)
train_label = pd.DataFrame(data[label_col])
dtrain = xgb.DMatrix(train_features, label=train_label)

In [None]:
#TRAIN THE MODEL
model = (xgb.train(params=hyperparams, dtrain=dtrain, evals = [(dtrain,'train'), (dtest,'eval')], num_boost_round=num_boost_round, 
                  early_stopping_rounds=early_stopping_rounds, verbose_eval = 0)
        )

In [None]:
#DEPLOY THE MODEL
xgb_predictor = xgb.deploy(
    initial_instance_count= 1,
    instance_type = "ml.m4.xlarge",
    serializer=CSVSerializer()
    )