In [7]:
from sagemaker import Session
import sagemaker
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline_context import PipelineSession
import boto3
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.processing import SKLearnProcessor
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sagemaker.sklearn import SKLearn
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep
from sagemaker.workflow.model_step import ModelStep
from sagemaker.sklearn.model import SKLearnModel

In [8]:
bucket='zohebmlops'
prefix = 'mlops'
input_source = sagemaker.Session().upload_data('./iris.csv', bucket=bucket, key_prefix=f'{prefix}')
train_path = f"s3://{bucket}/{prefix}/train"
test_path = f"s3://{bucket}/{prefix}/test"
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

In [14]:
sklearn_processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type="ml.m5.large",
    instance_count=1, 
    base_job_name='mlops-sklearnprocessing'
)

# Define processing step
processing_step = ProcessingStep(
    name='PreprocessingStep',
    processor=sklearn_processor,
    code='preprocess.py',  # Path to your preprocessing script
    inputs=[
        ProcessingInput(
            source=input_source, 
            destination="/opt/ml/processing/input",
            s3_input_mode="File",
            s3_data_distribution_type="ShardedByS3Key",
            
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="train_data", 
            source="/opt/ml/processing/output/train",
            destination=train_path,
            s3_upload_mode="EndOfJob",
        ),
        ProcessingOutput(
            output_name="test_data", 
            source="/opt/ml/processing/output/test",
            destination=test_path,
            s3_upload_mode="EndOfJob",
        ),
    ]
)
train_data_uri = train_path
test_data_uri = test_path

# Specify the output location for the model
model_output_uri = f"s3://{bucket}/{prefix}/model"

# Define hyperparameters
#n_estimators_param = ParameterInteger(name="NEstimators", default_value=50)
#max_depth_param = ParameterInteger(name="MaxDepth", default_value=5)

# Create the SKLearn estimator for training
train_input = TrainingInput(s3_data=train_data_uri, content_type='text/csv')
test_input = TrainingInput(s3_data=test_data_uri, content_type='text/csv')

estimator = SKLearn(entry_point='train.py',
                    framework_version="0.23-1",
                    py_version='py3',
                    instance_type='ml.m5.xlarge',
                    role=role,
                    output_path=model_output_uri,
                    base_job_name='sklearn-iris',
                    hyperparameters={'n_estimators': 50, 'max_depth': 5})
# Define the input data for training and testing
train_step = TrainingStep(
    name="TrainModel",
    estimator=estimator,
    inputs={
        "training": train_input,
        "testing": test_input
    },
)
train_step.add_depends_on([processing_step])

model_pickle_path = f"s3://{bucket}/{prefix}/model/pipelines-coq1igr3s6a5-TrainModel-WiVGCf9SFG/output/model.tar.gz"

sklearn_model = SKLearnModel(
    model_data=model_pickle_path,  # Model artifact path
    role=role,
    entry_point='train.py',  # Ensure this is the correct entry point if needed for inference
    framework_version="0.23-1",
    sagemaker_session=sagemaker_session
)

# Define the deployment step
model_step_args = sklearn_model.deploy(
    instance_type="ml.m5.large",
    initial_instance_count=1,
    endpoint_name="sklearn-iris-endpoint-test-1",
)

# Define the deployment step
model_step = ModelStep(
    name="DeployModel",
    step_args=model_step_args
)


# Ensure the deployment step depends on the training step
model_step.add_depends_on([train_step])


# Step 2: Create a pipeline
pipeline = Pipeline(
    name="SKLearnPipeline7",
    steps=[processing_step, train_step, model_step],
    sagemaker_session=sagemaker_session,
)

# Step 3: Submit the pipeline
pipeline.upsert(role_arn=role)

# Start the pipeline execution
execution = pipeline.start()
execution.wait()


INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-08-08-06-37-31-301
INFO:sagemaker:Creating endpoint-config with name sklearn-iris-endpoint-test-1
INFO:sagemaker:Creating endpoint with name sklearn-iris-endpoint-test-1


-----!

TypeError: The step_args of ModelStep must be obtained from model.create() or model.register(). For more, see: https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_model_building_pipeline.html#model-step