In [1]:
#MD MLDS TP1 
# https://sagemaker-examples.readthedocs.io/en/latest/sagemaker-python-sdk/tensorflow_script_mode_training_and_serving/tensorflow_script_mode_training_and_serving.html

In [2]:
import os
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()
region = sagemaker_session.boto_session.region_name

print(region)



us-east-1


In [3]:
training_data_uri = "s3://sagemaker-sample-data-{}/tensorflow/mnist".format(region)
print(training_data_uri)

s3://sagemaker-sample-data-us-east-1/tensorflow/mnist


In [4]:
!pygmentize 'mnist.py'

# TensorFlow 2.1 script
!pygmentize 'mnist-2.py'

[37m# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.[39;49;00m[37m[39;49;00m
[37m#[39;49;00m[37m[39;49;00m
[37m# Licensed under the Apache License, Version 2.0 (the "License"). You[39;49;00m[37m[39;49;00m
[37m# may not use this file except in compliance with the License. A copy of[39;49;00m[37m[39;49;00m
[37m# the License is located at[39;49;00m[37m[39;49;00m
[37m#[39;49;00m[37m[39;49;00m
[37m#     http://aws.amazon.com/apache2.0/[39;49;00m[37m[39;49;00m
[37m#[39;49;00m[37m[39;49;00m
[37m# or in the "license" file accompanying this file. This file is[39;49;00m[37m[39;49;00m
[37m# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF[39;49;00m[37m[39;49;00m
[37m# ANY KIND, either express or implied. See the License for the specific[39;49;00m[37m[39;49;00m
[37m# language governing permissions and limitations under the License.[39;49;00m[37m[39;49;00m
[33m"""Convolutional Neural Network Estimator

In [9]:
from sagemaker.tensorflow import TensorFlow

# XVA: use ml.c4.xlarge no need to ask more permissions to AWS

mnist_estimator = TensorFlow(
    entry_point="mnist-2.py",
    role=role,
    instance_count=2,
    instance_type="ml.c4.xlarge",
    framework_version="2.1.0",
    py_version="py3",
    distribution={"parameter_server": {"enabled": True}},
)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [10]:
mnist_estimator.fit(training_data_uri)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Amazon SageMaker Debugger does not currently support Parameter Server distribution
INFO:sagemaker:Amazon SageMaker Debugger does not currently support Parameter Server distribution
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: tensorflow-training-2023-08-18-05-36-10-090


Using provided s3_resource
2023-08-18 05:36:10 Starting - Starting the training job......
2023-08-18 05:36:41 Starting - Preparing the instances for training......
2023-08-18 05:37:46 Downloading - Downloading input data...
2023-08-18 05:38:23 Training - Training image download completed. Training in progress.[34m2023-08-18 05:38:32,360 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2023-08-18 05:38:32,370 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-08-18 05:38:32,472 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[34m2023-08-18 05:38:32,472 sagemaker_tensorflow_container.training INFO     Launching parameter server process[0m
[34m2023-08-18 05:38:32,472 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[34m2023-08-18 05:38:32,517 sagemaker_tensorflow_container.trai

In [11]:
# Deploy the trained model to an endpoint

In [12]:
predictor = mnist_estimator.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge")

INFO:sagemaker.tensorflow.model:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating model with name: tensorflow-training-2023-08-18-05-42-14-181
INFO:sagemaker:Creating endpoint-config with name tensorflow-training-2023-08-18-05-42-14-181
INFO:sagemaker:Creating endpoint with name tensorflow-training-2023-08-18-05-42-14-181


----!

In [13]:
#invoke the endpoint
import numpy as np

!aws --region {region} s3 cp s3://sagemaker-sample-data-{region}/tensorflow/mnist/train_data.npy train_data.npy
!aws --region {region} s3 cp s3://sagemaker-sample-data-{region}/tensorflow/mnist/train_labels.npy train_labels.npy

train_data = np.load("train_data.npy")
train_labels = np.load("train_labels.npy")



1502.33s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


download: s3://sagemaker-sample-data-us-east-1/tensorflow/mnist/train_data.npy to ./train_data.npy


1512.15s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


download: s3://sagemaker-sample-data-us-east-1/tensorflow/mnist/train_labels.npy to ./train_labels.npy


In [15]:
train_data

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [18]:
predictions = predictor.predict(train_data[:50])
for i in range(0, 50):
    prediction = np.argmax(predictions["predictions"][i])
    label = train_labels[i]
    print(
        "prediction is {}, label is {}, matched: {}".format(prediction, label, prediction == label)
    )

prediction is 7, label is 7, matched: True
prediction is 3, label is 3, matched: True
prediction is 9, label is 4, matched: False
prediction is 6, label is 6, matched: True
prediction is 1, label is 1, matched: True
prediction is 8, label is 8, matched: True
prediction is 1, label is 1, matched: True
prediction is 0, label is 0, matched: True
prediction is 9, label is 9, matched: True
prediction is 8, label is 8, matched: True
prediction is 0, label is 0, matched: True
prediction is 3, label is 3, matched: True
prediction is 1, label is 1, matched: True
prediction is 2, label is 2, matched: True
prediction is 7, label is 7, matched: True
prediction is 0, label is 0, matched: True
prediction is 2, label is 2, matched: True
prediction is 9, label is 9, matched: True
prediction is 6, label is 6, matched: True
prediction is 0, label is 0, matched: True
prediction is 1, label is 1, matched: True
prediction is 6, label is 6, matched: True
prediction is 7, label is 7, matched: True
prediction

In [20]:
# Delete the endpoint
predictor.delete_endpoint()

INFO:sagemaker:Deleting endpoint configuration with name: tensorflow-training-2023-08-18-05-42-14-181
INFO:sagemaker:Deleting endpoint with name: tensorflow-training-2023-08-18-05-42-14-181
