# Imports

In [6]:
import logging
import boto3
from botocore.exceptions import ClientError
import pandas as pd

from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.session import s3_input
from sagemaker import get_execution_role
from sagemaker import Session
from sagemaker.estimator import Estimator

In [7]:
bucket_name = 'ye-1468'
region = 'eu-central-1'

In [8]:
session = boto3.Session(
    region_name=region
)

In [9]:
# Now we can create low-level clients or resource clients from our custom session
s3_client = boto3.client('s3')
s3 = boto3.resource('s3')

# Training

## Data

In [10]:
train_data = 's3://aida-project/niy/train.csv'
test_data = 's3://aida-project/niy/test.csv'

# create s3 paths for the objects
s3_input_train = s3_input(train_data, content_type='text/csv')
s3_input_test = s3_input(test_data, content_type='text/csv')

data_channels = {'train': s3_input_train, 'validation': s3_input_test}

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


In [11]:
# get execution role
role = get_execution_role()
# create session
sess = Session()

## XGBoost

In [12]:
# Use a previously-built, AWS XGBoost model for training
container = get_image_uri(region_name=region,
                          repo_name='xgboost',
                          repo_version='1.0-1')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


In [13]:
# create an XGBoost Estimator
xgb_model = Estimator(container,
                      role, 
                      train_instance_count=1,
                      train_instance_type='ml.m4.xlarge',
                      output_path='s3://aida-project/niy/output',
                      sagemaker_session=sess,
                      base_job_name='niy')

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


In [14]:
# Select the your specific hyperparameters
xgb_model.set_hyperparameters(max_depth=5,
                            eta=0.2,
                            gamma=4,
                            min_child_weight=6,
                            subsample=0.8,
                            silent=0,
                            objective='reg:squarederror',
                            num_round=100)

In [15]:
xgb_model.fit({"train": s3_input_train, "validation": s3_input_test}, wait=True)

2020-11-18 13:38:57 Starting - Starting the training job...
2020-11-18 13:38:59 Starting - Launching requested ML instances......
2020-11-18 13:40:23 Starting - Preparing the instances for training......
2020-11-18 13:41:22 Downloading - Downloading input data...
2020-11-18 13:41:47 Training - Downloading the training image...
2020-11-18 13:42:21 Uploading - Uploading generated training model[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34m[13:42:1

# Deployment

In [16]:
## TO DO

In [20]:
'''
# Deploy your model to an endpoint to perform predictions
xgb_predictor = xgb_model.deploy(
    initial_instance_count = 1, 
    instance_type = 'ml.t2.medium')  
'''
print()




In [21]:
'''
# Configure the predictor's serializer and deserializer

from sagemaker.predictor import csv_serializer, csv_deserializer
xgb_predictor.content_type = 'text/csv' # set the data type for an inference
xgb_predictor.serializer = csv_serializer # set the serializer type
xgb_predictor.deserializer = csv_deserializer
'''
print()




## Prediction

In [17]:
## TO DO

In [22]:
# xgb_predictor.predict(df_test.values)