In [None]:
import boto3
from sagemaker import Session
from sagemaker.model import Model

# Initialize a SageMaker session
sagemaker_session = Session()

sagemaker_client = boto3.client('sagemaker')

# Replace these with your actual model package group name
model_package_group_name = 'diabetes'  # This is your model package group name

# Function to get the latest model version from the model registry
def get_latest_model_version(model_package_group_name):
    # List model packages in the specified group
    model_packages = sagemaker_client.list_model_packages(
        ModelPackageGroupName=model_package_group_name,
        SortBy='CreationTime',  # Sort by creation time to get the latest
        SortOrder='Descending'   # Get the most recent first
    )

    if model_packages['ModelPackageSummaryList']:
        # Get the latest model package
        latest_model_package = model_packages['ModelPackageSummaryList'][0]
        # Return the model package version
        return latest_model_package['ModelPackageVersion']
    
    return 0  # Return 0 if no models are found

# Call the function
latest_version = get_latest_model_version(model_package_group_name)
print(f"Latest model version: {latest_version}")


In [None]:
import boto3
from sagemaker import Session
from sagemaker.model import Model

model_package_name = 'diabetes'

def get_advance_model_version(model_package_name):
    import boto3

    # Create a Boto3 client for SageMaker
    sagemaker_client = boto3.client("sagemaker", region_name="ap-south-1")
    # Define the parameters for the list_model_packages API call
    params = {
        "ModelPackageGroupName": model_package_name,
        "SortBy": "CreationTime",
        "SortOrder": "Descending",
        "MaxResults": 1,
    }
    # Call the list_model_packages API
    response = sagemaker_client.list_model_packages(**params)
    # Extract the ModelPackageVersion from the response
    model_package_version = response["ModelPackageSummaryList"]
    if len(model_package_version) > 0:
        model_package_version_adv = (
            response["ModelPackageSummaryList"][0]["ModelPackageVersion"] + 1
        )
    else:
        model_package_version_adv = 1
    return model_package_version_adv



In [37]:
import boto3
from sagemaker.inputs import TrainingInput
from datetime import datetime
import sagemaker
from sagemaker import image_uris
from sagemaker import Session
from sagemaker.workflow.pipeline_context import PipelineSession

# Define your S3 bucket and prefix
bucket = 'diabates'
prefix = 'pipeline'
region = 'ap-south-1'
input_source = f"s3://{bucket}/datasets/diabetes.csv"
train_path = f"s3://{bucket}/{prefix}/train"
test_path = f"s3://{bucket}/{prefix}/test"
val_path = f"s3://{bucket}/{prefix}/val"
training_model_file = 'model.tar.gz'

# Generate a timestamp
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")  # Format: YYYYMMDD-HHMMSS
#model_output_uri
model_output_uri = f"s3://{bucket}/{prefix}/pipeline_run_version_{timestamp}"

train_input = TrainingInput(s3_data=train_path, content_type='text/csv')
evaluation_output_uri = evaluation_output_uri = f"s3://{bucket}/output/evaluation"

######instances-require for the pipeline###

processing_image_uri = image_uris.retrieve(framework='sklearn', region=region, version='1.0-1')
training_image_uri = image_uris.retrieve(framework='sklearn', region=region, version='1.0-1')
evaluation_image_uri = image_uris.retrieve(framework='sklearn', region=region, version='1.0-1')
instance_count = 1
instance_type = 'ml.m5.xlarge'


################# important for pipeline #####################
sagemaker_session = sagemaker.Session()
role = 'arn:aws:iam::590183717898:role/service-role/AmazonSageMaker-ExecutionRole-20240716T105741'  #sagemaker.get_execution_role()
pipeline_session = PipelineSession()

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.
INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.
INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.


In [30]:
########processsor##########


import sagemaker
from sagemaker.processing import (
    ProcessingInput,
    ProcessingOutput,
    ScriptProcessor,  # Use ScriptProcessor if you are using a script
)
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline import Pipeline

# Create a ScriptProcessor
sklearn_processor = ScriptProcessor(
    image_uri=processing_image_uri,
    role=role,
    instance_type=instance_type,
    instance_count=1, 
    base_job_name='diabetes_new',
    command=['python3'],
)

# Define processing step
processing_step = ProcessingStep(
    name='PreprocessingStep',
    processor=sklearn_processor,
    code='preprocess.py',  # Path to your preprocessing script
    inputs=[
        ProcessingInput(
            source=input_source, 
            destination="/opt/ml/processing/input",
            s3_input_mode="File",
            s3_data_distribution_type="ShardedByS3Key",
            
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="train_data", 
            source="/opt/ml/processing/output/train",
            destination=train_path,
            s3_upload_mode="EndOfJob",
        ),
        ProcessingOutput(
            output_name="test_data", 
            source="/opt/ml/processing/output/test",
            destination=test_path,
            s3_upload_mode="EndOfJob",
        ),
        ProcessingOutput(
            output_name="val_data", 
            source="/opt/ml/processing/output/validation",
            destination=val_path,
            s3_upload_mode="EndOfJob",
        ),
    ]
)
# Create the pipeline
pipeline = Pipeline(
    name='DiabetesProcessingPipeline-1',
    steps=[processing_step]
)

# Execute the pipeline
#pipeline.create(role_arn=role)  # Creates the pipeline
pipeline.start()  # Starts the pipeline execution

_PipelineExecution(arn='arn:aws:sagemaker:ap-south-1:590183717898:pipeline/DiabetesProcessingPipeline-1/execution/vb7umk2xn4iz', sagemaker_session=<sagemaker.session.Session object at 0x000001EAA8CB36B0>)

In [35]:
#########training step########

from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep
from sagemaker.workflow.pipeline import Pipeline
from datetime import datetime

# Define the input data for training
train_input = TrainingInput(s3_data=train_path, content_type='text/csv')

# Define the Estimator
estimator = Estimator(
    entry_point='train.py',  # Your training script
    image_uri=training_image_uri,
    py_version='py3',
    instance_type=instance_type,  # Specify the instance type
    instance_count=instance_count,  # Set the instance count
    role=role,
    output_path=model_output_uri,  # This should include the timestamp
    base_job_name='sklearn-diabetes',  # Update base job name if needed
    hyperparameters={
        'n_estimators': 50,
        'max_depth': 5
    }
)

# Define the training step
train_step = TrainingStep(
    name="TrainModel",
    estimator=estimator,
    inputs={
        "training": train_input,
    },
)

# Create the pipeline
pipeline = Pipeline(
    name='DiabetestrainingPipeline-4',
    steps=[train_step]  # Include the training step
)

# Create and start the pipeline
#pipeline.create(role_arn=role)  # Creates the pipeline
pipeline.start()  # Starts the pipeline execution


_PipelineExecution(arn='arn:aws:sagemaker:ap-south-1:590183717898:pipeline/DiabetestrainingPipeline-4/execution/cy0youwm3hmu', sagemaker_session=<sagemaker.session.Session object at 0x000001EAA87283E0>)

In [36]:
########evaluate#############


from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.processing import (
    ProcessingInput,
    ProcessingOutput,
    ScriptProcessor,
)

##### train artifacts ######
#model_artifact_path = train_step.properties.ModelArtifacts.S3ModelArtifacts
model_artifact_path = train_step.properties.ModelArtifacts.S3ModelArtifacts

evaluation_report = PropertyFile(
    name="EvaluationReport",
    output_name="evaluation",
    path="evaluation.json"
)

# Initialize the ScriptProcessor
evaluation_processor = ScriptProcessor(
    image_uri=evaluation_image_uri,
    command=["python3"],
    instance_type=instance_type,
    instance_count=instance_count,
    role=role,  # Replace with your actual role ARN
    sagemaker_session=pipeline_session
)

# Define the processing step for evaluation
evaluation_step = ProcessingStep(
    name="EvaluateModel",
    processor=evaluation_processor,
    inputs=[
        ProcessingInput(source=model_artifact_path, destination="/opt/ml/processing/model"),
        ProcessingInput(source=test_path, destination="/opt/ml/processing/test"),
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/output", destination=evaluation_output_uri),
    ],
    code="eval.py",
    property_files=[evaluation_report]
)

pipeline = Pipeline(
    name='DiabetesevaluationPipeline-11',
    steps=[evaluation_step]  # Include the training step
)

evaluation_step.add_depends_on([train_step])
# Create and start the pipeline
#pipeline.create(role_arn=role)  # Creates the pipeline
pipeline.start() 

_PipelineExecution(arn='arn:aws:sagemaker:ap-south-1:590183717898:pipeline/DiabetesevaluationPipeline-1/execution/v7zf3mltri33', sagemaker_session=<sagemaker.session.Session object at 0x000001EAABE464B0>)

In [None]:
####registerstep########
from sagemaker.workflow.conditions import ConditionGreaterThan
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.model import Model
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.model_metrics import MetricsSource, ModelMetrics
from sagemaker.workflow.model_step import ModelStep

model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri=f"{evaluation_output_uri}/evaluation.json",
        content_type="application/json"
    )
)


model = Model(
    image_uri=training_image_uri,
    model_data=model_artifact_path,
    role=role,  # Replace with your actual role ARN
    sagemaker_session=pipeline_session  # Attach the pipeline session here
)

register_args = model.register(
    content_types=["application/x-model"],
    response_types=["application/json"],
    inference_instances=["ml.m5.large"],
    transform_instances=["ml.m5.large"],
    model_package_group_name="diabates-main-structured",  # Specify the model group name here
    model_metrics=model_metrics  # Optional: Attach model metrics
)
step_register = ModelStep(name="diabates-main-structured", step_args=register_args)


# Define the condition to check accuracy
cond_gte = ConditionGreaterThan(
    left=JsonGet(
        step_name=evaluation_step.name,
        property_file=evaluation_report,
        json_path="accuracy"
    ),
    right=0.60
)

# Create a condition step
condition_step = ConditionStep(
    name="CheckAccuracy",
    conditions=[cond_gte],
    if_steps=[step_register],  # Register the model if accuracy > 60%
    else_steps=[]  # Do nothing if the accuracy is <= 60%
)

#train_step.add_depends_on([processing_step])
#evaluation_step.add_depends_on([train_step])

# Define the pipeline
pipeline = Pipeline(
    name="model-resgistration-master-2",
    steps=[evaluation_step, condition_step],
    sagemaker_session=pipeline_session,  # Ensure the session is passed here
)

# Create and start the pipeline using SageMaker client
pipeline.create(role_arn=role)  # Replace with your actual role ARN
pipeline.start()