In [2]:
import boto3
import sagemaker
from time import gmtime, strftime

In [3]:
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'xgboost')


The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


## Create Model

In [4]:
train_data = 's3://sagemaker-us-east-1-346023323361/knn/train/train.csv'
validation_data = 's3://sagemaker-us-east-1-346023323361/knn/valid/valid.csv'
#validation_data = 's3://{}/{}/{}'.format(bucket, prefix, 'validation')
s3_output_location = 's3://sagemaker-us-east-1-346023323361/knn/output/xgboost_model_sdk'
print(train_data)


s3://sagemaker-us-east-1-346023323361/knn/train/train.csv


In [6]:
role = sagemaker.get_execution_role()

xgb_model = sagemaker.estimator.Estimator(container,
 role,
 train_instance_count=1,
 train_instance_type='ml.m4.xlarge',
 train_volume_size = 5,
 output_path=s3_output_location,
 sagemaker_session=sagemaker.Session())

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_volume_size has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [7]:
xgb_model.set_hyperparameters(max_depth = 5,
 eta = .2,
 gamma = 4,
 min_child_weight = 6,
 silent = 0,
 objective = "multi:softmax",
 num_class = 10,
 num_round = 10)

In [8]:
train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
valid_channel = sagemaker.session.s3_input(validation_data, content_type='text/csv')
data_channels = {'train': train_channel, 'validation': valid_channel}


The class sagemaker.session.s3_input has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The class sagemaker.session.s3_input has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [9]:
xgb_model.fit(inputs=data_channels, logs=True)

INFO:sagemaker:Creating training-job with name: xgboost-2023-04-12-19-59-34-135


2023-04-12 19:59:36 Starting - Starting the training job...
2023-04-12 20:00:13 Starting - Preparing the instances for training............
2023-04-12 20:01:54 Downloading - Downloading input data...
2023-04-12 20:02:24 Training - Downloading the training image...
2023-04-12 20:03:09 Training - Training image download completed. Training in progress...[34mArguments: train[0m
[34m[2023-04-12:20:03:23:INFO] Running standalone xgboost training.[0m
[34m[2023-04-12:20:03:23:INFO] File size need to be processed in the node: 9.56mb. Available memory size in the node: 8600.33mb[0m
[34m[2023-04-12:20:03:23:INFO] Determined delimiter of CSV input is ','[0m
[34m[20:03:23] S3DistributionType set as FullyReplicated[0m
[34m[20:03:23] 100150x39 matrix with 3905850 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2023-04-12:20:03:23:INFO] Determined delimiter of CSV input is ','[0m
[34m[20:03:23] S3DistributionType set as FullyReplicated[0m
[3

## Create Training Job

In [12]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'xgboost')

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [13]:
# Run this cell to import the Data Wrangler widget to show automatic visualization and generate code to fix data quality issues

import sagemaker_datawrangler

# Display Pandas DataFrame to view the widget: df, display(df), df.sample()... 

In [14]:
#Ensure that the train and validation data folders generated above are reflected in the
bucket_path = 's3://sagemaker-us-east-1-346023323361/knn/train/train.csv'
bucket_valid_path = 's3://sagemaker-us-east-1-346023323361/knn/valid/valid.csv'
role = sagemaker.get_execution_role()
role_name = role.split("/")[-1]

common_training_params = \
{
 "AlgorithmSpecification": {
 "TrainingImage": container,
 "TrainingInputMode": "File"
 },
 "RoleArn": role,
 "OutputDataConfig": {
 "S3OutputPath": bucket_path + "/xgboost"
 },
 "ResourceConfig": {
 "InstanceCount": 1,
 "InstanceType": "ml.m4.xlarge",
 "VolumeSizeInGB": 5
 },
 "HyperParameters": {
 "max_depth":"5",
 "eta":"0.2",
 "gamma":"4",
 "min_child_weight":"6",
 "silent":"0",
 "objective": "multi:softmax",
 "num_class": "10",
 "num_round": "10"
 },
 "StoppingCondition": {
 "MaxRuntimeInSeconds": 86400
 },
 "InputDataConfig": [
 {
 "ChannelName": "train",
 "DataSource": {
 "S3DataSource": {
 "S3DataType": "S3Prefix",
 "S3Uri": bucket_path,
 "S3DataDistributionType": "FullyReplicated"
 }
 },
 "ContentType": "text/csv",
 "CompressionType": "None"
 },
 {
 "ChannelName": "validation",
 "DataSource": {
 "S3DataSource": {
 "S3DataType": "S3Prefix",
 "S3Uri": bucket_valid_path,
 "S3DataDistributionType": "FullyReplicated"
 }
 },
 "ContentType": "text/csv",
 "CompressionType": "None"
 }
 ]
}




In [15]:
#training job params
import copy

training_job_name = 'xgboost-mnist' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Job name is:", training_job_name)
training_job_params = copy.deepcopy(common_training_params)
training_job_params['TrainingJobName'] = training_job_name
training_job_params['ResourceConfig']['InstanceCount'] = 1


Job name is: xgboost-mnist2023-04-12-20-06-50


In [16]:
%%time

region = boto3.Session().region_name
sm = boto3.Session().client('sagemaker')
sm.create_training_job(**training_job_params)
status = sm.describe_training_job(TrainingJobName=training_job_name)['TrainingJobStatus']
print(status)

sm.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=training_job_name)
status = sm.describe_training_job(TrainingJobName=training_job_name)['TrainingJobStatus']
print("Training job ended with status: " + status)
if status == 'Failed':
     message = sm.describe_training_job(TrainingJobName=training_job_name)['FailureReason']
     print('Training failed with the following error: {}'.format(message))
     raise Exception('Training job failed')


InProgress
Training job ended with status: Completed
CPU times: user 127 ms, sys: 34 ms, total: 161 ms
Wall time: 4min 1s


## Deploy The Model EndPoint

In [17]:
xgb_predictor = xgb_model.deploy(initial_instance_count=1,
 instance_type='ml.m4.xlarge',
 )


INFO:sagemaker:Creating model with name: xgboost-2023-04-12-20-23-20-140
INFO:sagemaker:Creating endpoint-config with name xgboost-2023-04-12-20-23-20-140
INFO:sagemaker:Creating endpoint with name xgboost-2023-04-12-20-23-20-140


--------!

In [18]:
xgb_predictor.endpoint_name

'xgboost-2023-04-12-20-23-20-140'

## Validate The Model and Test The End Point

In [None]:
s3 = boto3.resource('s3')
test_key = "{}/test/examples".format(prefix)
s3.Bucket(bucket).download_file(test_key, 'test_data')

In [None]:
%matplotlib inline

for i in range (0, 10):
 img = test_set[0][i]
 label = test_set[1][i]
 img_reshape = img.reshape((28,28))
 imgplot = plt.imshow(img_reshape, cmap='gray')
 print('This is a {}'.format(label))
 plt.show()


In [None]:
with open('test_data', 'r') as f:
 for j in range(0,10):
 single_test = f.readline()
 result = xgb_predictor.predict(single_test)
 print(result)
