# SageMaker MLOps Quickstart

This notebook demonstrates the complete MLOps workflow for deploying a wine quality classification model.

## 1. Setup

In [None]:
import boto3
import sagemaker
import pandas as pd
import os

# Initialize SageMaker session
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
s3_bucket = sagemaker_session.default_bucket()

print(f"Region: {region}")
print(f"Role: {role}")
print(f"S3 Bucket: {s3_bucket}")

## 2. Download and Explore Data

In [None]:
# Download wine quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=';')

print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
# Data statistics
df.describe()

In [None]:
# Target distribution
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
df['quality'].value_counts().sort_index().plot(kind='bar')
plt.title('Wine Quality Distribution')
plt.xlabel('Quality Score')
plt.ylabel('Count')
plt.show()

## 3. Upload Data to S3

In [None]:
# Save locally
os.makedirs('../data', exist_ok=True)
df.to_csv('../data/wine-quality.csv', sep=';', index=False)

# Upload to S3
s3_data_path = f"s3://{s3_bucket}/data/wine-quality.csv"
sagemaker_session.upload_data(
    path='../data/wine-quality.csv',
    bucket=s3_bucket,
    key_prefix='data'
)

print(f"Data uploaded to: {s3_data_path}")

## 4. Run SageMaker Pipeline

In [None]:
# Set environment variables
os.environ['SAGEMAKER_ROLE_ARN'] = role
os.environ['S3_BUCKET'] = s3_bucket
os.environ['AWS_REGION'] = region

In [None]:
# Import pipeline
import sys
sys.path.append('..')

from pipelines.pipeline import get_pipeline

pipeline = get_pipeline(
    region=region,
    role=role,
    s3_bucket=s3_bucket,
)

# Create/update pipeline
pipeline.upsert(role_arn=role)

print("Pipeline created successfully!")

In [None]:
# Start pipeline execution
execution = pipeline.start()

print(f"Pipeline execution ARN: {execution.arn}")
print(f"\nMonitor pipeline at:")
print(f"https://{region}.console.aws.amazon.com/sagemaker/home?region={region}#/pipelines")

In [None]:
# Wait for pipeline to complete (this may take 10-15 minutes)
execution.wait()

## 5. Review Pipeline Results

In [None]:
# List pipeline execution steps
execution.list_steps()

## 6. Deploy Model to Endpoint

In [None]:
# Get latest approved model from registry
sm_client = boto3.client('sagemaker')

response = sm_client.list_model_packages(
    ModelPackageGroupName='wine-quality-models',
    ModelApprovalStatus='Approved',
    SortBy='CreationTime',
    SortOrder='Descending',
    MaxResults=1
)

if response['ModelPackageSummaryList']:
    model_package_arn = response['ModelPackageSummaryList'][0]['ModelPackageArn']
    print(f"Latest approved model: {model_package_arn}")
else:
    print("No approved model found. Please approve a model in the Model Registry first.")

## 7. Test Endpoint

In [None]:
import json

# Prepare test data
test_payload = {
    "instances": [
        [7.4, 0.7, 0.0, 1.9, 0.076, 11.0, 34.0, 0.9978, 3.51, 0.56, 9.4],
        [7.8, 0.88, 0.0, 2.6, 0.098, 25.0, 67.0, 0.9968, 3.2, 0.68, 9.8]
    ]
}

# Replace with your actual endpoint name
endpoint_name = 'wine-quality-endpoint'

runtime = boto3.client('sagemaker-runtime')

response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(test_payload)
)

result = json.loads(response['Body'].read().decode())
print(json.dumps(result, indent=2))

## 8. Clean Up (Optional)

In [None]:
# Delete endpoint
# sm_client.delete_endpoint(EndpointName=endpoint_name)
# print(f"Endpoint {endpoint_name} deleted")