# Sagemaker Processing SDK

In [21]:
import os
import sagemaker
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput

In [22]:
sess = sagemaker.Session()

In [23]:
bucket = sess.default_bucket()
code_prefix = 'script'
input_prefix = 'raw-datasets'
output_prefix = 'sklearn-datasets'

In [24]:
sklearn_job = SKLearnProcessor(
    framework_version='0.23-1',
    role=sagemaker.get_execution_role(),
    instance_type='ml.m5.xlarge',
    instance_count=1, # single machine computing
    base_job_name='sklearn-sagemaker-processing-example'
)

In [25]:
sklearn_job.run(code='s3://' + os.path.join(bucket, code_prefix, 'sklearn-processing.py'),
                inputs=[ProcessingInput(
                    input_name='raw',
                    source='s3://' + os.path.join(bucket, input_prefix, 'part-00000-be25f590-e7b0-4e7c-b2c5-411437906e5e-c000.csv'),
                    destination='/opt/ml/processing/input')],
                outputs=[ProcessingOutput(output_name='result',
                                          source='/opt/ml/processing/output',
                                          destination='s3://' + os.path.join(bucket, output_prefix, 'output'))])


Job Name:  sklearn-sagemaker-processing-example-2021-03-02-07-45-29-693
Inputs:  [{'InputName': 'raw', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-ap-northeast-2-029498593638/raw-datasets/part-00000-be25f590-e7b0-4e7c-b2c5-411437906e5e-c000.csv', 'LocalPath': '/opt/ml/processing/input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-ap-northeast-2-029498593638/script/sklearn-processing.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-ap-northeast-2-029498593638/sklearn-datasets/output', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]
........................
..