In [2]:
import sagemaker
from sagemaker import Session
import boto3

sess = sagemaker.Session()
region = sess.boto_region_name
role = sagemaker.get_execution_role()   
sm_client = boto3.client('sagemaker', region_name=region)

print("Region:", region)
print("Role ARN:", role)

Region: us-east-1
Role ARN: arn:aws:iam::909817712600:role/SageMakerS3Access


In [3]:
bucket = 'ml-classification-aip19'
prefix = 'usedcars-xgb'

train_s3 = f's3://ml-classification-aip19/usedcars_xgb_train.csv'
val_s3   = f's3://ml-classification-aip19/usedcars_xgb_test.csv'
output_path = f's3://ml-classification-aip19/sagemaker-outputs/usedcars-xgb/'

print("Train S3 URI:", train_s3)
print("Validation S3 URI:", val_s3)
print("Model output path:", output_path)

Train S3 URI: s3://ml-classification-aip19/usedcars_xgb_train.csv
Validation S3 URI: s3://ml-classification-aip19/usedcars_xgb_test.csv
Model output path: s3://ml-classification-aip19/sagemaker-outputs/usedcars-xgb/


In [4]:
from sagemaker.image_uris import retrieve
from sagemaker.estimator import Estimator

xgb_version = '1.5-1'
xgb_image = retrieve('xgboost', region, xgb_version)

print("Using XGBoost image:", xgb_image)

xgb_estimator = Estimator(
    image_uri = xgb_image,
    role = role,
    instance_count = 1,
    instance_type = 'ml.m5.large',   # small but fine for development
    output_path = output_path,
    sagemaker_session = sess
)

print("Estimator created.")

Using XGBoost image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.5-1
Estimator created.


In [5]:
xgb_estimator.set_hyperparameters(
    objective='binary:logistic',
    num_round=100,
    eta=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='auc',
    scale_pos_weight=2.949303
)

print("Hyperparameters set.")

Hyperparameters set.


In [6]:
from sagemaker.inputs import TrainingInput
import time

train_input = TrainingInput(s3_data=train_s3, content_type='text/csv')
val_input = TrainingInput(s3_data=val_s3, content_type='text/csv')

xgb_estimator.fit({'train': train_input, 'validation': val_input}, wait=False)

job_name = xgb_estimator.latest_training_job.name
print("Started training job:", job_name)
print("Check SageMaker Console > Training jobs to monitor progress.")

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-09-14-01-36-45-511


Started training job: sagemaker-xgboost-2025-09-14-01-36-45-511
Check SageMaker Console > Training jobs to monitor progress.


In [7]:
import time

job_name = xgb_estimator.latest_training_job.name
resp = sm_client.describe_training_job(TrainingJobName=job_name)
status = resp['TrainingJobStatus']
print("Job name:", job_name)
print("Status:", status)

if 'ModelArtifacts' in resp and 'S3ModelArtifacts' in resp['ModelArtifacts']:
    print("Model artifact (S3):", resp['ModelArtifacts']['S3ModelArtifacts'])
else:
    print("Model artifact not yet available.")


Job name: sagemaker-xgboost-2025-09-14-01-36-45-511
Status: Completed
Model artifact (S3): s3://ml-classification-aip19/sagemaker-outputs/usedcars-xgb/sagemaker-xgboost-2025-09-14-01-36-45-511/output/model.tar.gz
