## Downloading the dataset from the S3 bucket to the local directory

In [1]:
import sagemaker
import logging
import boto3
import sagemaker
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError

config = botocore.config.Config(user_agent_extra='dlai-pds/c2/w3')

# low-level service client of the boto3 session
sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
from sagemaker.inputs import TrainingInput

# TODO: set the path to the train data
train_data = TrainingInput(
    ..., 
    content_type='application/x-sagemaker-training-data'
)


In [6]:
from sagemaker.pytorch import PyTorch

# TODO: create the estimator
estimator = PyTorch(
    entry_point= ...,
    source_dir= ...,
    base_job_name="sagemaker-script-mode",
    role=role,
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    framework_version="2.1",
    py_version="py310",
    dependencies= ...,
    output_data_config={
        'S3OutputPath': ...
    },
    output_path= ...,
    environment={'PYTHONPATH': 'src'}
)

In [7]:
# Save the best model during training by specifying the output path
# (Note: The output path should be where the best model will be saved within the S3 bucket)
model_checkpoint = {
    'ModelCheckpoint': {
        'monitor': 'dev_loss',
        'dirpath': '/opt/ml/model/',
        'filename': 'best_model',
        'save_top_k': 1,
        'mode': 'min'
    }
}

# Attach the ModelCheckpoint callback to the estimator
estimator._hyperparameters['callbacks'] = [model_checkpoint]


In [1]:
# TODO: train the model
estimator.fit({...})


## Model Deployment

We need to copy the training artifacts, i.e, output.tar.gz, from the corresponding S3 bucket to the current working directory.

In [20]:
#TODO: copy the training artifacts from the S3 bucket to the current working directory
!aws s3 cp ...      

download: s3://sagemaker-us-east-1-820760248695/outputs/sagemaker-script-mode-2024-01-14-05-39-40-763/output/output.tar.gz to ./output.tar.gz


#### We can decompress the training artifacts to `extracted_files` for further exploration.

In [25]:
!tar -xzf output.tar.gz -C extracted_training_artifacts


tar: Ignoring unknown extended header keyword `LIBARCHIVE.creationtime'
tar: Ignoring unknown extended header keyword `LIBARCHIVE.creationtime'
tar: Ignoring unknown extended header keyword `LIBARCHIVE.creationtime'
tar: Ignoring unknown extended header keyword `LIBARCHIVE.creationtime'


We then create an endpoint 'sentiment-analysis-endpoint-2' and deploy the model to that endpoint.

In [22]:
# TODO: deploy the trained model
predictor = estimator.deploy(...)

INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-east-1-820760248695/outputs/sagemaker-script-mode-2024-01-14-05-39-40-763/output/model.tar.gz), script artifact (s3://sagemaker-us-east-1-820760248695/sagemaker-script-mode-2024-01-14-05-39-40-763/source/sourcedir.tar.gz), and dependencies (['requirements.txt']) into single tar.gz file located at s3://sagemaker-us-east-1-820760248695/sagemaker-script-mode-2024-01-14-05-50-49-158/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: sagemaker-script-mode-2024-01-14-05-50-49-158
INFO:sagemaker:Creating endpoint-config with name sentiment-analysis-endpoint-2
INFO:sagemaker:Creating endpoint with name sentiment-analysis-endpoint-2


-----!