In [1]:
# Upload to S3 Bucket
from sagemaker import Session
import sagemaker
bucket=sagemaker.Session().default_bucket()
prefix = 'mlops/activity-3'

sess = Session()
input_source = sess.upload_data('./iris.csv', bucket=bucket, key_prefix=f'{prefix}')
input_source

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


's3://sagemaker-ap-south-1-590183717898/mlops/activity-3/iris.csv'

In [2]:
train_path = f"s3://{bucket}/{prefix}/train"
test_path = f"s3://{bucket}/{prefix}/test"

train_path
test_path

's3://sagemaker-ap-south-1-590183717898/mlops/activity-3/test'

In [4]:
import sagemaker
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline_context import PipelineSession
import boto3
from sagemaker.sklearn.processing import SKLearnProcessor


sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()



# Define the ScriptProcessor
sklearn_processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type="ml.m5.large",
    instance_count=1, 
    base_job_name='mlops-sklearnprocessing'
)

# Define processing step
processing_step = ProcessingStep(
    name='PreprocessingStep',
    processor=sklearn_processor,
    code='preprocess.py',  # Path to your preprocessing script
    inputs=[
        ProcessingInput(
            source=input_source, 
            destination="/opt/ml/processing/input",
            s3_input_mode="File",
            s3_data_distribution_type="ShardedByS3Key",
            
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="train_data", 
            source="/opt/ml/processing/output/train",
            destination=train_path,
            s3_upload_mode="EndOfJob",
        ),
        ProcessingOutput(
            output_name="test_data", 
            source="/opt/ml/processing/output/test",
            destination=test_path,
            s3_upload_mode="EndOfJob",
        ),
    ]
)

# Define the pipeline
pipeline = Pipeline(
    name='MyPipeline10',
    steps=[processing_step],
    sagemaker_session=sagemaker_session
)

# Create and run the pipeline
pipeline.create(role_arn=role)
pipeline.start()


INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


_PipelineExecution(arn='arn:aws:sagemaker:ap-south-1:590183717898:pipeline/MyPipeline10/execution/bf0q14w1vqys', sagemaker_session=<sagemaker.session.Session object at 0x7f560c16afb0>)

In [10]:
from sagemaker.sklearn.estimator import SKLearn
import sagemaker
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline_context import PipelineSession
import boto3
from sagemaker.sklearn.processing import SKLearnProcessor
from sklearn.preprocessing import StandardScaler, LabelEncoder
import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker.inputs import TrainingInput

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# Define the S3 URIs for the train and test data
train_data_uri = train_path
test_data_uri = test_path

# Specify the output location for the model
model_output_uri = 's3://sagemaker-ap-south-1-590183717898/mlops/activity-3/model/'

# Create the SKLearn estimator for training

estimator = SKLearn(entry_point='train.py',
                    framework_version="0.23-1",
                    py_version='py3',
                    instance_type='ml.m5.xlarge',
                    role=role,
                    output_path=model_output_uri,
                    base_job_name='sklearn-iris',
                    hyperparameters={'n_estimators': 50, 'max_depth': 5})
# Define the input data for training and testing
train_input = TrainingInput(s3_data=train_data_uri, content_type='text/csv')
test_input = TrainingInput(s3_data=test_data_uri, content_type='text/csv')

# Start the training job
estimator.fit({'training': train_input, 'testing': test_input})


INFO:sagemaker:Creating training-job with name: sklearn-iris-2024-08-02-11-44-23-190


2024-08-02 11:44:23 Starting - Starting the training job...
2024-08-02 11:44:39 Starting - Preparing the instances for training...
2024-08-02 11:45:15 Downloading - Downloading the training image...
2024-08-02 11:45:51 Training - Training image download completed. Training in progress...[34m2024-08-02 11:45:57,308 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-08-02 11:45:57,311 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-02 11:45:57,349 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-08-02 11:45:57,486 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-02 11:45:57,498 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-02 11:45:57,509 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-02 11: