In [2]:
import sagemaker, boto3
from sagemaker.inputs import TrainingInput
from sagemaker.parameter import ContinuousParameter, IntegerParameter
from sagemaker.tuner import HyperparameterTuner

In [3]:
session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

In [4]:
bucket = "aip22-feature-engineering"  
prefix = "churn"
output_path = f"s3://{bucket}/{prefix}/output"

container = sagemaker.image_uris.retrieve("xgboost", region, "1.5-1")

In [5]:
estimator = sagemaker.estimator.Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    output_path=output_path,
    sagemaker_session=session
)

estimator.set_hyperparameters(objective="binary:logistic", eval_metric="auc", num_round=200)


In [6]:
hyperparameter_ranges = {
    "eta": ContinuousParameter(0.01, 0.3),
    "max_depth": IntegerParameter(3, 10),
    "min_child_weight": IntegerParameter(1, 10),
    "subsample": ContinuousParameter(0.5, 1.0),
    "colsample_bytree": ContinuousParameter(0.5, 1.0)
}

In [7]:
tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name="validation:auc",
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=12,
    max_parallel_jobs=2,
    strategy="Bayesian"
)

In [12]:
s3_train = f"s3://{bucket}/{prefix}/train_for_sagemaker.csv"
s3_val   = f"s3://{bucket}/{prefix}/validation_for_sagemaker.csv"

In [13]:
tuner.fit({"train": TrainingInput(s3_train, content_type="csv"),
           "validation": TrainingInput(s3_val, content_type="csv")})

No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config
No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


..................................................................................!


In [14]:
m = boto3.client("sagemaker")
# try to get the job name from the tuner object
job_name = None
if hasattr(tuner, "latest_tuning_job") and tuner.latest_tuning_job:
    job_name = getattr(tuner.latest_tuning_job, "job_name", None)
if not job_name:
    print("Could not get tuner.latest_tuning_job; please copy the tuning job name from the SageMaker console.")
else:
    print("Tuning job name:", job_name)

# Poll status (replace job_name with the real name if needed)
if job_name:
    while True:
        resp = sm.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=job_name)
        status = resp["HyperParameterTuningJobStatus"]
        print("Tuning job status:", status)
        if status in ("Completed", "Failed", "Stopped"):
            break
        time.sleep(30)
    print("Final status:", status)
else:
    print("No job_name available in the tuner object â€” check the SageMaker console to find the tuning job name.")

Tuning job name: sagemaker-xgboost-250923-2154
Tuning job status: Completed
Final status: Completed


In [15]:
best = tuner.best_estimator()
print("Best model S3 path:", best.model_data)


2025-09-23 22:01:21 Starting - Found matching resource for reuse
2025-09-23 22:01:21 Downloading - Downloading the training image
2025-09-23 22:01:21 Training - Training image download completed. Training in progress.
2025-09-23 22:01:21 Uploading - Uploading generated training model
2025-09-23 22:01:21 Completed - Resource released due to keep alive period expiry
Best model S3 path: s3://aip22-feature-engineering/churn/output/sagemaker-xgboost-250923-2154-012-02347c07/output/model.tar.gz
