# Setup

In [1]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
import boto3
import pandas as pd
from sklearn.model_selection import train_test_split 

sagemaker_session = sagemaker.Session()
role = get_execution_role()
region = boto3.Session().region_name

# S3 bucket for storing data
bucket = 'sagemaker-us-east-1-100163808729'
prefix = 'deployment'
output_path = f's3://{bucket}/{prefix}/output'



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


# Prepare dataset

In [2]:

file_path = 's3://sagemaker-us-east-1-100163808729/mlflow/input-data/Employee.csv'  # Replace with your actual file path in S3 if needed
employee_df = pd.read_csv(file_path)
employee_df.head()

# Convert categorical columns to numeric
employee_df['Education'] = employee_df['Education'].astype('category').cat.codes
employee_df['City'] = employee_df['City'].astype('category').cat.codes
employee_df['Gender'] = employee_df['Gender'].astype('category').cat.codes
employee_df['EverBenched'] = employee_df['EverBenched'].map({'Yes': 1, 'No': 0})

# Drop rows with NaN values in the target column
employee_df.dropna(subset=['LeaveOrNot'])

# Convert target column to numeric if needed
employee_df['LeaveOrNot'] = employee_df['LeaveOrNot'].astype(int)

# Ensure no missing values in feature columns
employee_df = employee_df.dropna()

# Verify all columns are numeric
print(employee_df.dtypes)

# Define features and target
feature_columns = [
    'Education', 'JoiningYear', 'City', 'PaymentTier', 'Age',
    'Gender', 'EverBenched', 'ExperienceInCurrentDomain'
]
target_column = 'LeaveOrNot'

employee_df = employee_df[[target_column] + feature_columns]

train_df, test_df = train_test_split(employee_df, test_size=0.2, random_state=42)

# Display the transformed dataset
employee_df.head()

# Initialize S3 client
s3 = boto3.client('s3')


# Save the data locally first
train_file = 'train.csv'
validation_file = 'validation.csv'
train_df.to_csv(train_file, index=False)
test_df.to_csv(validation_file, index=False)

# Upload the data to S3
s3.upload_file(train_file, bucket, f'{prefix}/train/{train_file}')
s3.upload_file(validation_file, bucket, f'{prefix}/validation/{validation_file}')

print(f"Training data uploaded to s3://{bucket}/{prefix}/train/{train_file}")
print(f"Validation data uploaded to s3://{bucket}/{prefix}/validation/{validation_file}")


severe performance issues, see also https://github.com/dask/dask/issues/10276

To fix, you should specify a lower version bound on s3fs, or
update the current installation.



Education                     int8
JoiningYear                  int64
City                          int8
PaymentTier                  int64
Age                          int64
Gender                        int8
EverBenched                  int64
ExperienceInCurrentDomain    int64
LeaveOrNot                   int64
dtype: object


Training data uploaded to s3://sagemaker-us-east-1-100163808729/deployment/train/train.csv
Validation data uploaded to s3://sagemaker-us-east-1-100163808729/deployment/validation/validation.csv


# Create model

In [None]:
# Setup XGBoost Estimator
xgboost_container = sagemaker.image_uris.retrieve("xgboost", boto3.Session().region_name, "1.3-1")

hyperparameters = {
    "max_depth":"5",
    "eta":"0.2",
    "gamma":"40",
    "min_child_weight":"6",
    "subsample":"0.7",
    "objective":"binary:logistic",
    "num_round":"50"
}

output_path = f's3://{bucket}/{prefix}/output'

estimator = sagemaker.estimator.Estimator(
    image_uri=xgboost_container, 
    hyperparameters=hyperparameters,
    role=sagemaker.get_execution_role(),
    instance_count=1, 
    instance_type='ml.m5.xlarge', 
    volume_size=5,  # 5 GB 
    output_path=output_path
)

# Define the data type and paths to the training and validation datasets
content_type = "csv"
train_input = TrainingInput(f"s3://{bucket}/{prefix}/train/{train_file}", content_type=content_type)
validation_input = TrainingInput(f"s3://{bucket}/{prefix}/validation/{validation_file}", content_type=content_type)

# Execute the XGBoost training job
estimator.fit({'train': train_input, 'validation': validation_input})

2025-02-12 08:39:42 Starting - Starting the training job...
..25-02-12 08:39:56 Starting - Preparing the instances for training.
..25-02-12 08:40:39 Downloading - Downloading the training image.
.[34m[2025-02-12 08:41:24.967 ip-10-0-92-218.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2025-02-12 08:41:24.994 ip-10-0-92-218.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2025-02-12:08:41:24:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2025-02-12:08:41:24:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34m[2025-02-12:08:41:25:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2025-02-12:08:41:25:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2025-02-12:08:41:25:INFO] Determined delimiter of CSV input is ','[0m
[34m[2025-02-12:08:41:25:INFO] Determined delimiter of CSV input is ','[0m
[34m

# Deployment type 1 - real-time endpoint

In [4]:
predictor = estimator.deploy(
    initial_instance_count=1,  # Number of instances to deploy
    instance_type='ml.m5.xlarge',  # Instance type for the endpoint
    endpoint_name='employee-attrition-predictor-1'  # Name of the endpoint
)

print(type(predictor))

-----!<class 'sagemaker.base_predictor.Predictor'>


# Predict using above endpoint

In [5]:
# Predictions for a batch of records

predictor.serializer = sagemaker.serializers.CSVSerializer()
predictor.deserializer = sagemaker.deserializers.JSONDeserializer()

# Example input data (in the same format as your training data)
# Assuming 'LeaveOrNot' is the target column
features_df = test_df.drop(columns=['LeaveOrNot'])

# Convert the features DataFrame to CSV format
test_csv = features_df.to_csv(index=False, header=False).strip()
test_data = test_df[feature_columns].head(20)  # Select the first row of test data for prediction
test_data_csv = test_data.to_csv(index=False, header=False).strip()  # Convert to CSV format

# Invoke the endpoint
response = predictor.predict(test_data_csv)
print(response)  # The response will be in JSON format, containing the predicted label

# Predictions for a single record
test_data = test_df[feature_columns].head(1)  # Select the first row of test data for prediction
test_data_csv = test_data.to_csv(index=False, header=False).strip()  # Convert to CSV format

# Invoke the endpoint
response = predictor.predict(test_data_csv)
print(response)  # The response will be in JSON format, containing the predicted label

# Delete the endpoint when no longer needed
predictor.delete_endpoint()

{'predictions': [{'score': 0.20303820073604584}, {'score': 0.20303820073604584}, {'score': 0.3774380087852478}, {'score': 0.5340070128440857}, {'score': 0.1691509336233139}, {'score': 0.37122291326522827}, {'score': 0.6700358986854553}, {'score': 0.8091390132904053}, {'score': 0.1691509336233139}, {'score': 0.8091390132904053}, {'score': 0.1691509336233139}, {'score': 0.20303820073604584}, {'score': 0.9251188635826111}, {'score': 0.1691509336233139}, {'score': 0.35528063774108887}, {'score': 0.8091390132904053}, {'score': 0.1633274257183075}, {'score': 0.38120347261428833}, {'score': 0.36576730012893677}, {'score': 0.1691509336233139}]}
{'predictions': [{'score': 0.20303820073604584}]}


# Deployment type 2 - real-time endpoint but using pre-built model

In [10]:
from sagemaker.model import Model

# Specify the S3 path to the pre-trained model artifact
model_artifact = "s3://sagemaker-us-east-1-100163808729/deployment/output/sagemaker-xgboost-2025-02-12-06-57-40-558/output/model.tar.gz"

# Retrieve the container image for the framework (e.g., XGBoost)
container = sagemaker.image_uris.retrieve(framework="xgboost", region=boto3.Session().region_name, version="1.3-1")

# Create the model object using the S3 path
model = Model(
    image_uri=container,
    model_data=model_artifact,
    role=sagemaker.get_execution_role()
)

# Deploy the model as a real-time endpoint
predictor = model.deploy(
    initial_instance_count=1,  # Number of instances
    instance_type='ml.m5.xlarge',  # Instance type
    endpoint_name='employee-attrition-predictor-2'  # Name of the endpoint
)

print(type(model))
print(type(predictor))

------!<class 'sagemaker.model.Model'>
<class 'NoneType'>


# Predict using above endpoint

In [11]:
# Note that, here we are not using the predictor.predict() method, because predictor is of class NoneType and we are unable to invoke methods
# we are using the sagemeker-runtime.invoke_endpoint method instead

import boto3

# Initialize the SageMaker runtime client
runtime_client = boto3.client('sagemaker-runtime')

# Specify the endpoint name
endpoint_name = 'employee-attrition-predictor-5'

# Prepare your input data (same format as before)
test_data = test_df[feature_columns].head(25)  # Select the first row of test data for prediction
test_data_csv = test_data.to_csv(index=False, header=False).strip()  # Convert to CSV format

# Invoke the endpoint directly using the runtime client
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="text/csv",  # Specify the content type
    Body=test_data_csv  # The input data as a CSV string
)

# Parse the response
result = response['Body'].read().decode('utf-8')
print(result)

0.20303820073604584
0.20303820073604584
0.3774380087852478
0.5340070128440857
0.1691509336233139
0.37122291326522827
0.6700358986854553
0.8091390132904053
0.1691509336233139
0.8091390132904053
0.1691509336233139
0.20303820073604584
0.9251188635826111
0.1691509336233139
0.35528063774108887
0.8091390132904053
0.1633274257183075
0.38120347261428833
0.36576730012893677
0.1691509336233139
0.1691509336233139
0.40060654282569885
0.7384953498840332
0.20303820073604584
0.3876122534275055



# Deployment Type 3 - serverless endpoint

In [16]:
import boto3
from sagemaker.model import Model
from sagemaker.serverless import ServerlessInferenceConfig
from sagemaker import get_execution_role

model_artifact = "s3://sagemaker-us-east-1-100163808729/deployment/output/sagemaker-xgboost-2025-02-12-06-57-40-558/output/model.tar.gz"
container = sagemaker.image_uris.retrieve(framework="xgboost", region=boto3.Session().region_name, version="1.3-1")

# Create the model object using the S3 path
model = Model(
    image_uri=container,
    model_data=model_artifact,
    role=get_execution_role()
)

# Define the Serverless Inference configuration
serverless_inference_config = ServerlessInferenceConfig(
    memory_size_in_mb=2048,  # Allocate memory
    max_concurrency=5  # Max concurrent invocations
)

# Deploy the model as a serverless endpoint
serverless_predictor = model.deploy(
    serverless_inference_config=serverless_inference_config,
    endpoint_name='employee-attrition-serverless-1'
)

----!

# Predict using above endpoint

In [19]:
import boto3

# Initialize the SageMaker runtime client
runtime_client = boto3.client('sagemaker-runtime')

# Specify the endpoint name
endpoint_name = 'employee-attrition-serverless-1'

# Prepare your input data (same format as before)
test_data = test_df[feature_columns].head(25)  # Select the first row of test data for prediction
test_data_csv = test_data.to_csv(index=False, header=False).strip()  # Convert to CSV format

# Invoke the endpoint directly using the runtime client
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="text/csv",  # Specify the content type
    Body=test_data_csv  # The input data as a CSV string
)

# Parse the response
result = response['Body'].read().decode('utf-8')
print(result)

# Note, this did not work as well because predictor is of class NoneType
# delete manually from UI

# Delete the endpoint when no longer needed
# predictor.delete_endpoint()

0.20303820073604584
0.20303820073604584
0.3774380087852478
0.5340070128440857
0.1691509336233139
0.37122291326522827
0.6700358986854553
0.8091390132904053
0.1691509336233139
0.8091390132904053
0.1691509336233139
0.20303820073604584
0.9251188635826111
0.1691509336233139
0.35528063774108887
0.8091390132904053
0.1633274257183075
0.38120347261428833
0.36576730012893677
0.1691509336233139
0.1691509336233139
0.40060654282569885
0.7384953498840332
0.20303820073604584
0.3876122534275055



# Deployment Type 4 - batch transform

In [39]:
# There is no endpoint created here

import boto3
from sagemaker.model import Model
from sagemaker.serverless import ServerlessInferenceConfig
from sagemaker import get_execution_role

features_only_batch_transform = test_df.iloc[:, 1:]  # Exclude the first column (label)
test_data_csv_batch_transform = features_only_batch_transform.to_csv(index=False, header=False).strip()

# Save to CSV without header
csv_file_path = 'validation_no_label_batch_transform.csv'
features_only_batch_transform.to_csv(csv_file_path, index=False, header=False)

# Upload the prepared CSV to S3
s3 = boto3.client('s3')
s3.upload_file(csv_file_path, bucket, f'{prefix}/validation/validation_no_label_batch_transform.csv')

# input data needs to be available in s3 unlike other methods
# probably without s3, if the input file is local, the code must change
model_artifact = "s3://sagemaker-us-east-1-100163808729/deployment/output/sagemaker-xgboost-2025-02-12-06-57-40-558/output/model.tar.gz"
container = sagemaker.image_uris.retrieve(framework="xgboost", region=boto3.Session().region_name, version="1.3-1")
input_path = 's3://sagemaker-us-east-1-100163808729/deployment/validation/validation_no_label_batch_transform.csv'
output_path = f's3://{bucket}/{prefix}/batch-transform-output/'

# Create the model object using the S3 path
model = Model(
    image_uri=container,
    model_data=model_artifact,
    role=get_execution_role()
)

# Create a transformer object
transformer = model.transformer(
    instance_count=1,
    instance_type='ml.m5.large',  # Choose an instance type
    output_path=output_path,
    strategy='MultiRecord',  # Strategy for processing records (SingleRecord or MultiRecord)
    assemble_with='Line',  # How to join results, e.g., 'Line' to join with newlines
    accept='text/csv'  # Output format
)

# Start the transform job
transformer.transform(
    data=input_path,  # Input data in S3
    content_type='text/csv',  # Input format
    split_type='Line'  # How the input data is split (e.g., by line)
)

# Wait for the job to finish
transformer.wait()

...............................[34m[2025-02-12:13:27:42:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2025-02-12:13:27:42:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2025-02-12:13:27:42:INFO] nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
      proxy_read_timeout 60s;
      proxy_pass http://gunicorn;
    }
    locati

# Deployment Type 5 - Asynchronous inference endpoint

In [None]:
from sagemaker.async_inference import AsyncInferenceConfig

# Deploy the model as an asynchronous endpoint
async_predictor = estimator.deploy(
    initial_instance_count=1,  # Number of instances
    instance_type='ml.m5.xlarge',  # Instance type
    async_inference_config=AsyncInferenceConfig(
        output_path=f's3://{bucket_name}/output/async-output',  # S3 path to store output
        max_concurrent_invocations_per_instance=2
    ),
    endpoint_name='life-expectancy-predictor-async-1'
)

-----!

# Predict using above endpoint

In [28]:
# Remove the label column and the header
features_only = test_df.iloc[:, 1:]  # Exclude the first column (label)
test_data_csv = features_only.to_csv(index=False, header=False).strip()

# Save to CSV without header
csv_file_path = 'validation_no_label.csv'
features_only.to_csv(csv_file_path, index=False, header=False)

# Upload the prepared CSV to S3
s3 = boto3.client('s3')
s3.upload_file(csv_file_path, bucket, f'{prefix}/validation/validation_no_label.csv')

# Update the input location for the asynchronous request
# here also, input data is in s3, not local
input_location = f's3://{bucket}/{prefix}/validation/validation_no_label.csv'

import boto3

# Initialize the SageMaker runtime client
runtime_client = boto3.client('sagemaker-runtime')

# Specify the endpoint name
endpoint_name = 'employee-attrition-async-1'

# Invoke the asynchronous endpoint with your input data location
response = runtime_client.invoke_endpoint_async(
    EndpointName=endpoint_name, 
    InputLocation=input_location,
    InvocationTimeoutSeconds=1600,  # Set timeout to allow sufficient time for processing
    ContentType='text/csv'
)

# Response contains metadata about the request, not the prediction itself.
# The actual predictions will be saved to the specified S3 output path.
# The output was available at s3://sagemaker-us-east-1-100163808729/deployment/async-output/8cbde29d-de1a-4566-8ed1-ceacc67b29a3.out
print("Asynchronous inference request sent. Check S3 for results.")

# Note, this did not work as well because predictor is of class NoneType
# delete manually from UI

# Delete the endpoint when no longer needed
# predictor.delete_endpoint()

Asynchronous inference request sent. Check S3 for results.


# Deployment Type 6 - multi-model endpoint

In [34]:
from sagemaker import Model
from sagemaker.multidatamodel import MultiDataModel

# S3 path to your model artifacts
# below path could have model.tar.gz, model-.tar.gz etc
model_artifact_path = 's3://sagemaker-us-east-1-100163808729/deployment/output/sagemaker-xgboost-2025-02-12-06-57-40-558/output/'

# Define the container image (for example, XGBoost)
container_image = sagemaker.image_uris.retrieve(framework="xgboost", region=boto3.Session().region_name, version="1.3-1")

# Create a Model object (this includes the image and role information)
model = Model(
    image_uri=container_image,
    role=sagemaker.get_execution_role()
)

# Create the MultiDataModel
mme = MultiDataModel(
    name="multi-model-endpoint-2",
    model_data_prefix=model_artifact_path,  # S3 path where models are stored
    model=model,  # Pass the Model object that defines the container image
    sagemaker_session=sagemaker.Session()
)

# Deploy the Multi-Model Endpoint
predictor = mme.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name="multi-model-endpoint-2"
)


-----!

# Predict using above endpoint

In [35]:
import boto3
from botocore.config import Config

# Initialize the SageMaker Runtime client with a retry strategy
config = Config(
    read_timeout=70,
    retries={
        'max_attempts': 2  # Adjust this value as needed (up to 5 for a max timeout of 360s)
    }
)
runtime_sagemaker_client = boto3.client('sagemaker-runtime', config=config)

# Define the endpoint name and the specific model to target
endpoint_name = "multi-model-endpoint-2"
target_model = "model.tar.gz"

# Example CSV input data

# Invoke the endpoint
response = runtime_sagemaker_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="text/csv",
    TargetModel=target_model,
    Body=test_data_csv
)

# Print the response from the model
print(response['Body'].read().decode('utf-8'))


[0.20303820073604584, 0.20303820073604584, 0.3774380087852478, 0.5340070128440857, 0.1691509336233139, 0.37122291326522827, 0.6700358986854553, 0.8091390132904053, 0.1691509336233139, 0.8091390132904053, 0.1691509336233139, 0.20303820073604584, 0.9251188635826111, 0.1691509336233139, 0.35528063774108887, 0.8091390132904053, 0.1633274257183075, 0.38120347261428833, 0.36576730012893677, 0.1691509336233139, 0.1691509336233139, 0.40060654282569885, 0.7384953498840332, 0.20303820073604584, 0.3876122534275055, 0.1691509336233139, 0.1691509336233139, 0.1633274257183075, 0.1633274257183075, 0.1633274257183075, 0.1691509336233139, 0.9031620621681213, 0.3774380087852478, 0.1691509336233139, 0.32054612040519714, 0.1691509336233139, 0.37556877732276917, 0.1691509336233139, 0.20303820073604584, 0.1691509336233139, 0.90727698802948, 0.1691509336233139, 0.8091390132904053, 0.1691509336233139, 0.1691509336233139, 0.1691509336233139, 0.1691509336233139, 0.6700358986854553, 0.9093160629272461, 0.1691509

# Ensure you delete all the endpoints after this exercise