In [1]:
import pathlib
import os
import sys
import boto3

CURRENT_DIR = pathlib.Path('.').resolve().parent

# Adicione o diretório ao sys.path
sys.path.append(str(CURRENT_DIR))

from config import config

import pandas as pd

In [2]:
import sagemaker
from sagemaker import image_uris
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
xgboost_container = image_uris.retrieve('xgboost', boto3.Session().region_name, "1.2-2")
display(xgboost_container)

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-2'

In [4]:
hyperparameters = {
    'max_depth': '5'
    , 'subsample': '0.7'
    , 'objective': 'reg:squarederror'
    , 'early_stopping_rounds': 10
    , 'num_round': 1000
}

In [5]:
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container,
                                          hyperparameters=hyperparameters,
                                          role=sagemaker.get_execution_role(),
                                          instance_count=1,
                                          instance_type='ml.m4.xlarge',
                                          volume_size=5,
                                          output_path=config.OUTPUT,
                                          use_spot_instances=True,
                                          max_run=300,
                                          max_wait=600
)

In [6]:
content_type='csv'
train_input = TrainingInput(config.TRAIN_CSV_PATH, content_type=content_type)
test_input = TrainingInput(config.TEST_CSV_PATH, content_type=content_type)

In [7]:
estimator.fit({
    'train': train_input,
    'validation': test_input
})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-03-15-01-11-53-132


2024-03-15 01:11:53 Starting - Starting the training job...
2024-03-15 01:12:16 Starting - Preparing the instances for training......
2024-03-15 01:13:23 Downloading - Downloading input data...
2024-03-15 01:13:49 Downloading - Downloading the training image.........
2024-03-15 01:15:15 Training - Training image download completed. Training in progress.
2024-03-15 01:15:15 Uploading - Uploading generated training model[34m[2024-03-15 01:15:11.031 ip-10-2-201-226.ec2.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-03-15:01:15:11:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-03-15:01:15:11:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2024-03-15:01:15:11:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-03-15:01:15:11:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2024-03-15:01:15:11:INFO] Determined delimiter of CSV

# Deploy

In [9]:
from sagemaker.serializers import CSVSerializer

# initial instance count = the number of instances to deploy
# serializer = content type of the input data = csv format
xgb_predictor = estimator.deploy(initial_instance_count=1
                                 , instance_type='ml.m4.xlarge'
                                 , serializer=CSVSerializer())

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2024-03-15-01-29-16-895
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2024-03-15-01-29-16-895
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2024-03-15-01-29-16-895


------!

In [13]:
ENDPOINT_NAME = xgb_predictor.endpoint_name
ENDPOINT_NAME

'sagemaker-xgboost-2024-03-15-01-29-16-895'

In [16]:
import configparser

config = configparser.ConfigParser()
config['Sagemaker'] = {'endpoint_name': ENDPOINT_NAME}

with open('../config/sagemaker_config.ini', 'w') as configfile:
    config.write(configfile)

In [14]:
runtime = boto3.client('runtime.sagemaker')

In [17]:
runtime

<botocore.client.SageMakerRuntime at 0x7fbe42ce8a60>