In [None]:
%%sh
pip -q install sagemaker --upgrade
apt-get -y install unzip

In [None]:
import sagemaker

print(sagemaker.__version__)

In [None]:
import pandas as pd
import sagemaker

role = sagemaker.get_execution_role()
sess = sagemaker.Session()

In [None]:
dataset = pd.read_csv('Ad_click_prediction_train.csv')

In [None]:
prefix = 'sagemaker/DEMO-autogluon-click'
filename = 'dataset_autopilot.csv'

dataset_path = sess.upload_data(path=filename, key_prefix=prefix + '/input/training')
print(dataset_path)

In [None]:
from sagemaker.sklearn.processing import SKLearnProcessor

sklearn_processor = SKLearnProcessor(
    framework_version='0.23-1',
    role=role,
    instance_type='ml.c5.9xlarge',   # Can't use GPU instances with SKLearnProcessor
    instance_count=1
)

In [None]:
from sagemaker.processing import ProcessingInput, ProcessingOutput

sklearn_processor.run(code='smpreprocessing.py',
    inputs=[ProcessingInput(source=dataset_path, destination='/opt/ml/processing/input')],
    outputs=[ProcessingOutput(output_name='models', source='/opt/ml/processing/output')],
    arguments=[
        '--filename', filename,
        '--excluded-model-types', "['KNN', 'FASTAI']"
    ]
)

In [None]:
preprocessing_job_description = sklearn_processor.jobs[-1].describe()

output_config = preprocessing_job_description['ProcessingOutputConfig']
for output in output_config['Outputs']:
    print(output['S3Output']['S3Uri'])
    