In [22]:
import pandas as pd
import sagemaker
import os
from sagemaker.sklearn.estimator import SKLearn

# Initializing SageMaker session
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()

df = pd.read_csv('customer_segmentation_data.csv')

sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix


In [None]:
df['Spending_Power'] = df['Income'] / df['Purchases'].replace(0, 1)

In [34]:
train_df = df[['Age', 'Spending_Power']]

os.makedirs('data', exist_ok=True)
train_df.to_csv('data/train.csv', index=False, header=False) 

train_path = sagemaker_session.upload_data(
    path='data/train.csv', 
    bucket=bucket, 
    key_prefix='segmentation/train'
)

print(f"Data uploaded successfully to: {train_path}")

Data uploaded successfully to: s3://amazon-sagemaker-307424506479-us-east-2-49iiwwk8a95quh/segmentation/train/train.csv


In [36]:
sklearn_estimator = SKLearn(
    entry_point='script.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',   
    framework_version='1.2-1',      
    py_version='py3',
    hyperparameters={'n_clusters': 4}
)

print("Training starting...")
sklearn_estimator.fit({'train': train_path})

print("Deploying endpoint...")
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    serializer=sagemaker.serializers.JSONSerializer()
)

sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.TrainingJob.Environment
Training starting...
2026-01-30 23:18:25 Starting - Starting the training job...
2026-01-30 23:18:41 Starting - Preparing the instances for training...
2026-01-30 23:19:17 Downloading - Downloading the training image......
2026-01-30 23:20:23 Training - Training image download completed. Training in progress.
  import pkg_resources[0m
[34m2026-01-30 23:20:14,838 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2026-01-30 23:20:14,842 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2026-01-30 23:20:14,844 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons insta

In [None]:
import boto3
import json

runtime = boto3.client('sagemaker-runtime')

raw_data = [
    [25, 50000, 40],  # Customer A
    [60, 150000, 5],  # older , high spender
    [30, 30000, 10],  # Customer C
    [45, 90000, 45]   # middle-aged , high spender
]

json_payload = []
for p in raw_data:
    age = p[0]
    # Calculate Spending_Power = Income / Purchases
    spending_power = p[1] / p[2] if p[2] > 0 else p[1]
    json_payload.append([age, spending_power])

try:
    response = runtime.invoke_endpoint(
        EndpointName=predictor.endpoint_name,
        ContentType='application/json',
        Body=json.dumps(json_payload)
    )

    result = json.loads(response['Body'].read().decode())
    print("--- JSON Batch Prediction Results ---")
    for i, cluster in enumerate(result):
        print(f"Customer {i+1} assigned to Cluster: {cluster}")

except Exception as e:
    print(f"Error: {e}")
    print("If you still see a 500 error, check CloudWatch logs for a 'ValueError: query data has 3 features, model has 2'.")

--- JSON Batch Prediction Results ---
Customer 1 assigned to Cluster: 1
Customer 2 assigned to Cluster: 0
Customer 3 assigned to Cluster: 1
Customer 4 assigned to Cluster: 3


In [28]:
predictor.delete_endpoint()
