In [13]:
%%time 
import boto3
from sagemaker import get_execution_role
role = get_execution_role()

CPU times: user 36 ms, sys: 0 ns, total: 36 ms
Wall time: 52.7 ms


### Please provide your own S3 bucket below. The name for your bucket must contain the prefix ‘deeplens’. In this example, the bucket is ‘deeplens-imageclassification’. Make Sure S3 bucket name is unique, e.g. deeplens-imageclassfication-name-date

In [14]:
bucket='deeplens-image-classification-varunrao'
s3_key = 'imagenet_updated'

In [15]:
containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/image-classification:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/image-classification:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/image-classification:latest'}
training_image = containers[boto3.Session().region_name]

In [16]:
import os 
import urllib.request
import boto3

# def download(url):
#     filename = url.split("/")[-1]
#     if not os.path.exists(filename):
#         urllib.request.urlretrieve(url, filename)

        
# def upload_to_s3(channel, file):
#     s3 = boto3.resource('s3')
#     data = open(file, "rb")
#     key = channel + '/' + file
#     s3.Bucket(bucket).put_object(Key=key, Body=data)
# # caltech-256
# download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')
# upload_to_s3('train', 'caltech-256-60-train.rec')
# download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')
# upload_to_s3('validation', 'caltech-256-60-val.rec')

### Please make sure number set for epochs is same as checkpoint_frequency

In [17]:
# # The algorithm supports multiple network depth (number of layers). They are 18, 34, 50, 101, 152 and 200
# # For this training, we will use 18 layers
# # This can be tuned
# #num_layers = "18" 
# num_layers = "152" 
# # we need to specify the input image shape for the training data
# image_shape = "3,480,480"
# # RGB, Image size
# #image_shape = "3,224,224"
# # we also need to specify the number of training samples in the training set
# # for caltech it is 15420
# #num_training_samples = "2527"
# num_training_samples = "4712"
# # specify the number of output classes
# num_classes = "4"
# # batch size for training
# #mini_batch_size =  "50"
# mini_batch_size =  "50"
# # number of epochs
# #epochs = "10"
# epochs = "30"
# # learning rate
# # Tune
# learning_rate = "1.25e-5"
# #learning_rate = "0.5e-3"
# #optimizer
# #optimizer ='Adam'
# #checkpoint_frequency
# checkpoint_frequency = "10"
# #scheduler_step
# lr_scheduler_step="30,90,180"
# #scheduler_factor
# lr_scheduler_factor="0.1"
# #augmentation_type
# augmentation_type="crop_color_transform"

In [18]:
# The algorithm supports multiple network depth (number of layers). They are 18, 34, 50, 101, 152 and 200
# For this training, we will use 18 layers
num_layers = "200" 
# we need to specify the input image shape for the training data
image_shape = "3,224,224"
# we also need to specify the number of training samples in the training set
# for caltech it is 15420
num_training_samples = "5085"
# specify the number of output classes
num_classes = "7"
# batch size for training
mini_batch_size =  "128"
# number of epochs
epochs = "150"
# learning rate
learning_rate = "0.1"
lr_scheduler_factor="0.1"
lr_scheduler_step="30,60,90"
#augmentation_type="crop_color_transform"
checkpoint_frequency = "150"

### Please make job_name_prefix below unique so that its easy to remember in Deeplens projects.
### InstanceType below is using ml.p3.2xlarge but if you dont have these instances in your account then you can use non GPU instaces such as ml.c5.xlarge. please check https://aws.amazon.com/sagemaker/pricing/instance-types for more different instance types available for Sagemaker

In [19]:
%%time
import time
import boto3
from time import gmtime, strftime


s3 = boto3.client('s3')
# create unique job name 
job_name_prefix = 'trashnet-imageclassification'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp
training_params = \
{
    # specify the training docker image
    "AlgorithmSpecification": {
        "TrainingImage": training_image,
        "TrainingInputMode": "File"
    },
    "RoleArn": role,
    "OutputDataConfig": {
        "S3OutputPath": 's3://{}/{}/{}/output'.format(bucket, s3_key, job_name_prefix)
    },
    "ResourceConfig": {
        "InstanceCount": 1,
        "InstanceType": "ml.p3.16xlarge",
        "VolumeSizeInGB": 100
    },
    "TrainingJobName": job_name,
    "HyperParameters": {
        "image_shape": image_shape,
        "num_layers": str(num_layers),
        "num_training_samples": str(num_training_samples),
        "num_classes": str(num_classes),
        "mini_batch_size": str(mini_batch_size),
        "epochs": str(epochs),
        "learning_rate": str(learning_rate),
        "lr_scheduler_step": str(lr_scheduler_step),
        "lr_scheduler_factor": str(lr_scheduler_factor),
#         "augmentation_type": str(augmentation_type),
        "checkpoint_frequency": str(checkpoint_frequency),
#         "augmentation_type" : str(augmentation_type)
    },
    "StoppingCondition": {
        "MaxRuntimeInSeconds": 360000
    },
#Training data should be inside a subdirectory called "train"
#Validation data should be inside a subdirectory called "validation"
#The algorithm currently only supports fullyreplicated model (where data is copied onto each machine)
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": 's3://{}/{}/train/'.format(bucket, s3_key),
#                     "S3DataDistributionType": "FullyReplicated"
                    "S3DataDistributionType": "ShardedByS3Key"
                }
            },
            "ContentType": "application/x-recordio",
            "CompressionType": "None"
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": 's3://{}/{}/validation/'.format(bucket, s3_key),
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "ContentType": "application/x-recordio",
            "CompressionType": "None"
        }
    ]
}
print('Training job name: {}'.format(job_name))
print('\nInput Data Location: {}'.format(training_params['InputDataConfig'][0]['DataSource']['S3DataSource']))

Training job name: trashnet-imageclassification-2018-11-27-18-58-35

Input Data Location: {'S3DataType': 'S3Prefix', 'S3Uri': 's3://deeplens-image-classification-varunrao/imagenet_updated/train/', 'S3DataDistributionType': 'ShardedByS3Key'}
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.78 ms


In [20]:
# from sagemaker.tuner import HyperparameterTuner, ContinuousParameter

# # Configure HyperparameterTuner
# my_tuner = HyperparameterTuner(estimator=my_estimator,  # previously-configured Estimator object
#                                objective_metric_name='validation-accuracy',
#                                hyperparameter_ranges={'learning-rate': ContinuousParameter(0.05, 0.06)},
#                                metric_definitions=[{'Name': 'validation-accuracy', 'Regex': 'validation-accuracy=(\d\.\d+)'}],
#                                max_jobs=100,
#                                max_parallel_jobs=10)

# # Start hyperparameter tuning job
# my_tuner.fit({'train': 's3://my_bucket/my_training_data', 'test': 's3://my_bucket_my_testing_data'})

# # Deploy best model
# my_predictor = my_tuner.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

In [22]:
# create the Amazon SageMaker training job
sagemaker = boto3.client(service_name='sagemaker')
sagemaker.create_training_job(**training_params)

# confirm that the training job has started
status = sagemaker.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']
print('Training job current status: {}'.format(status))

try:
    # wait for the job to finish and report the ending status
    sagemaker.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=job_name)
    training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
    status = training_info['TrainingJobStatus']
    print("Training job ended with status: " + status)
except:
    print('Training failed to start')
     # if exception is raised, that means it has failed
    message = sagemaker.describe_training_job(TrainingJobName=job_name)['FailureReason']
    print('Training failed with the following error: {}'.format(message))

ResourceInUse: An error occurred (ResourceInUse) when calling the CreateTrainingJob operation: Training job names must be unique within an AWS account and region, and a training job with this name already exists (arn:aws:sagemaker:us-east-1:649615449669:training-job/trashnet-imageclassification-2018-11-27-18-58-35)

In [23]:
import boto3
sagemaker = boto3.client(service_name='sagemaker')
job_name = 'trashnet-imageclassification-2018-11-27-18-58-35'

# confirm that the training job has started
status = sagemaker.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']
print('Training job current status: {}'.format(status))

try:
    # wait for the job to finish and report the ending status
    sagemaker.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=job_name)
    training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
    status = training_info['TrainingJobStatus']
    print("Training job ended with status: " + status)
except:
    print('Training failed to start')
     # if exception is raised, that means it has failed
    message = sagemaker.describe_training_job(TrainingJobName=job_name)['FailureReason']
    print('Training failed with the following error: {}'.format(message))

Training job current status: Completed
Training job ended with status: Completed


In [None]:
# #Delete existing endpoint configuration + endpoint + update model
# ## Create EndPoint Config
# primary_container = ""
# create_model_response = sagemaker.create_model(
#     ModelName = model_name,
#     ExecutionRoleArn = role,
#     PrimaryContainer = primary_container)

# response = sagemaker.create_endpoint_config(
#     EndpointConfigName=endpoint_config_name,
#     ProductionVariants=[
#         {
#             'VariantName': 'trashnet-imagenet-new',
#             'ModelName': model_name,
#             'InitialInstanceCount': 1,
#             'InstanceType': 'ml.m4.xlarge'
#         },
#     ])

# print (response)

# ## Create Sagemaker Endpoint
# response = sagemaker.create_endpoint(
#     EndpointName=endpoint_name,
#     EndpointConfigName=endpoint_config_name,
# )

# print (response)

In [24]:
import json
import numpy as np
from IPython.display import Image
import boto3
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

object_categories = ['bottle', 'cardboard','coffee_cup','glass', 'plastic', 'plastic_bag', 'soda_can', 'trash']
endpoint_name = 'trashnet-imagenet-new'

image_list = {"bottle" : "http://farm2.static.flickr.com/1101/1030700323_7e3f92b758.jpg",
              "cardboard": "http://www.toyhalloffame.org/sites/www.toyhalloffame.org/files/toys/square/cardboard-box-square.jpg",
              "coffee_cup": "http://farm3.static.flickr.com/2146/2503344744_9d65322922.jpg",
              "glass": "http://farm3.static.flickr.com/2146/2503344744_9d65322922.jpg",
              "plastic": "http://farm1.static.flickr.com/45/105983430_a0d3326d20.jpg",
              "plastic_bag": "http://farm3.static.flickr.com/2089/2352391295_422eeec9bc.jpg",
              "soda_can" : "http://farm4.static.flickr.com/3446/3393921436_a7b6be433e.jpg",
             "trash" : "https://s3.amazonaws.com/reinvent2018-recycle-arm-us-east-1/samples/trash/trash1.jpg"}

for image_category, image_category_file_path in image_list.items():
    print(image_category_file_path)
    !wget -O /tmp/test.jpg $image_category_file_path
    file_name = '/tmp/test.jpg'
    
    with open(file_name, 'rb') as f:
        payload = f.read()
        payload = bytearray(payload)
    response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                       ContentType='application/x-image', 
                                       Body=payload)
    result = response['Body'].read()
    # result will be in json format and convert it to ndarray
    result = json.loads(result)
    print ("-------------\n")
    print ("Category input - {}\n".format(image_category))
    print ("Results: {} \n".format(result))
    index = np.argmax(result)
    print("Resulting label : {} , probability : {} ".format(object_categories[index],str(result[index])))
    print ("-------------")

http://farm2.static.flickr.com/1101/1030700323_7e3f92b758.jpg
--2018-11-27 23:41:20--  http://farm2.static.flickr.com/1101/1030700323_7e3f92b758.jpg
Resolving farm2.static.flickr.com (farm2.static.flickr.com)... 67.195.205.33
Connecting to farm2.static.flickr.com (farm2.static.flickr.com)|67.195.205.33|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 143830 (140K) [image/jpeg]
Saving to: ‘/tmp/test.jpg’


2018-11-27 23:41:20 (2.04 MB/s) - ‘/tmp/test.jpg’ saved [143830/143830]

-------------

Category input - bottle

Results: [0.9887984395027161, 1.0231772762381297e-08, 5.2821011453652034e-11, 6.565922205936658e-08, 4.653782980312826e-06, 0.011194834485650063, 2.0016968846903183e-06, 2.6980877926817415e-13] 

Resulting label : bottle , probability : 0.9887984395027161 
-------------
http://www.toyhalloffame.org/sites/www.toyhalloffame.org/files/toys/square/cardboard-box-square.jpg
--2018-11-27 23:41:21--  http://www.toyhalloffame.org/sites/www.toyhalloffame.org/f