In [1]:
import boto3
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import TrainingStep
from sagemaker.workflow.parameters import ParameterInteger, ParameterFloat, ParameterString
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.workflow.functions import Join
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.functions import JsonGet
from sagemaker.workflow.model_step import ModelStep
from sagemaker.model import Model
from sagemaker.workflow.step_collections import RegisterModel



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
# Initialize SageMaker session
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

In [5]:
# Define pipeline parameters
batch_size = ParameterInteger(name="BatchSize", default_value=32)
epochs = ParameterInteger(name="Epochs", default_value=15)
learning_rate = ParameterFloat(name="LearningRate", default_value=0.001)

sm_mlflow_arn = 'arn:aws:sagemaker:us-east-1:316413003582:mlflow-tracking-server/sample-mlflow-tracking'

mlflow_tracking_uri = ParameterString(
    name="MLflowTrackingURI",
    default_value=sm_mlflow_arn
)

In [21]:
# Define input data channels
input_data = ParameterString(
    name="InputDataPath",
    default_value="s3://sagemaker-us-east-1-316413003582/lego-classification/train"
)


In [22]:
# Create PyTorch estimator
pytorch_estimator = PyTorch(
    entry_point='train.py',
    source_dir='source_dir',
    role=role,
    framework_version='1.12.0',
    py_version='py38',
    instance_count=1,
    instance_type='ml.m5.xlarge',
    hyperparameters={
        'batch-size': batch_size,
        'epochs': epochs,
        'learning-rate': learning_rate,
        'mlflow-tracking-arn': mlflow_tracking_uri
    }
)

In [23]:
# Create training step
training_step = TrainingStep(
    name="LegoQualityModelTraining",
    estimator=pytorch_estimator,
    inputs={
        "train": input_data
    }
)

In [24]:
# Create pipeline
pipeline = Pipeline(
    name="LegoQualityPipeline",
    parameters=[
        batch_size,
        epochs,
        learning_rate,
        mlflow_tracking_uri,
        input_data
    ],
    steps=[training_step]
)

In [25]:
pipeline_parameters = {
    "BatchSize": 16,
    "Epochs": 15,
    "LearningRate": 0.001
}

pipeline.upsert(role_arn=role)

pipeline_execution = pipeline.start(parameters=pipeline_parameters)

In [None]:
# Check pipeline status
import time 

while True:
    response = pipeline_execution.describe()
    status = response['PipelineExecutionStatus']
    
    print(f"Pipeline status: {status}")
    
    if status in ['Completed', 'Failed', 'Stopped']:
        break
        
    time.sleep(60)

Pipeline status: Executing
Pipeline status: Executing
Pipeline status: Executing
Pipeline status: Executing
