## Train xgboost using sagemaker local mode + buildin container

In [3]:
from sagemaker import get_execution_role
from sagemaker.xgboost.estimator import XGBoost
import sagemaker

role = "arn:aws:iam::342474125894:role/service-role/AmazonSageMaker-ExecutionRole-20190405T234154"

sagemaker_session = sagemaker.session.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'script-mode-container-xgb'


In [4]:
train_config = sagemaker.inputs.TrainingInput('s3://{0}/{1}/train/'.format(bucket, prefix), content_type='text/csv')
val_config = sagemaker.inputs.TrainingInput('s3://{0}/{1}/val/'.format(bucket, prefix), content_type='text/csv')
print(train_config.config)
print(val_config.config)

{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-ap-southeast-1-342474125894/script-mode-container-xgb/train/', 'S3DataDistributionType': 'FullyReplicated'}}, 'ContentType': 'text/csv'}
{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-ap-southeast-1-342474125894/script-mode-container-xgb/val/', 'S3DataDistributionType': 'FullyReplicated'}}, 'ContentType': 'text/csv'}


In [5]:
hyperparameters = {'hp1':'str1',
                   'hp2': 2,
                   'hp3': 0.1}

xgb_estimator = XGBoost(
    entry_point="train.py",
    source_dir="../docker/code",
    hyperparameters=hyperparameters,
    role=role,
    instance_count=1,
    #instance_type='local', 
    instance_type="ml.m5.2xlarge",
    framework_version="1.0-1",
)

In [6]:
xgb_estimator.fit({'train': train_config, 'validation': val_config })

2020-08-20 06:42:32 Starting - Starting the training job...
2020-08-20 06:42:34 Starting - Launching requested ML instances......
2020-08-20 06:43:43 Starting - Preparing the instances for training...
2020-08-20 06:44:21 Downloading - Downloading input data
2020-08-20 06:44:21 Training - Downloading the training image...
2020-08-20 06:45:01 Uploading - Uploading generated training model[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Invoking user training script.[0m
[34mINFO:sagemaker-containers:Module train does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34mINFO:sagemaker-containers:Generating setup.cfg[0m
[34mINFO:sagemaker-containers:Generating MANIFEST.in[0m
[34mINFO:sagemaker-containers:Installing module with the following command:[0m
[34m/miniconda3/bin/python -m pip install . [0m
[34

In [10]:
job_name = xgb_estimator.latest_training_job.name

In [14]:
from sagemaker.analytics import TrainingJobAnalytics

metric_name = 'validation:rmse'  # validation_1-mlogloss
metric_name = 'mlogloss'
metrics_dataframe = TrainingJobAnalytics(training_job_name=job_name, metric_names=[metric_name]).dataframe()

