In [None]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker import image_uris

role = get_execution_role()
sess = sagemaker.Session()
bucket = "plant-disease-detection-tutorial"

training_image = image_uris.retrieve(
    region=boto3.Session().region_name, framework="image-classification"
)

In [None]:
training_image

In [None]:
s3train = "s3://plant-disease-detection-tutorial/medium-tutorial/s3train/"
s3validation = "s3://plant-disease-detection-tutorial/medium-tutorial/s3validation/"
s3train_lst = "s3://plant-disease-detection-tutorial/medium-tutorial/s3train_lst/"
s3validation_lst = "s3://plant-disease-detection-tutorial/medium-tutorial/s3validation_lst/"

In [None]:
# The algorithm supports multiple network depth (number of layers). They are 18, 34, 50, 101, 152 and 200
# For this training, we will use 18 layers
num_layers = 18
# we need to specify the input image shape for the training data
image_shape = "3,224,224"
# we also need to specify the number of training samples in the training set
num_training_samples = 10000
# specify the number of output classes
num_classes = 10
# batch size for training
mini_batch_size = 128
# number of epochs
epochs = 6
# learning rate
learning_rate = 0.01
# report top_5 accuracy
top_k = 5
# resize image before training
resize = 256
# period to store model parameters (in number of epochs), in this case, we will save parameters from epoch 2, 4, and 6
checkpoint_frequency = 2
# Since we are using transfer learning, we set use_pretrained_model to 1 so that weights can be
# initialized with pre-trained weights
use_pretrained_model = 1

In [None]:
%%time
import time
import boto3
from time import gmtime, strftime


s3 = boto3.client("s3", aws_access_key_id='',
    aws_secret_access_key='')
# create unique job name
job_name_prefix = "sagemaker-imageclassification-notebook"
timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
job_name = job_name_prefix + timestamp
training_params = {
    # specify the training docker image
    "AlgorithmSpecification": {"TrainingImage": training_image, "TrainingInputMode": "File"},
    "RoleArn": role,
    "OutputDataConfig": {"S3OutputPath": "s3://{}/{}/output".format(bucket, job_name_prefix)},
    "ResourceConfig": {"InstanceCount": 1, "InstanceType": "ml.g4dn.xlarge", "VolumeSizeInGB": 50},
    "TrainingJobName": job_name,
    "HyperParameters": {
        "image_shape": image_shape,
        "num_layers": str(num_layers),
        "num_training_samples": str(num_training_samples),
        "num_classes": str(num_classes),
        "mini_batch_size": str(mini_batch_size),
        "epochs": str(epochs),
        "learning_rate": str(learning_rate),
        "top_k": str(top_k),
        "resize": str(resize),
        "checkpoint_frequency": str(checkpoint_frequency),
        "use_pretrained_model": str(use_pretrained_model),
    },
    "StoppingCondition": {"MaxRuntimeInSeconds": 360000},
    # Training data should be inside a subdirectory called "train"
    # Validation data should be inside a subdirectory called "validation"
    # The algorithm currently only supports fullyreplicated model (where data is copied onto each machine)
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3train,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "application/x-image",
            "CompressionType": "None",
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3validation,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "application/x-image",
            "CompressionType": "None",
        },
        {
            "ChannelName": "train_lst",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3train_lst,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "application/x-image",
            "CompressionType": "None",
        },
        {
            "ChannelName": "validation_lst",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": s3validation_lst,
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "ContentType": "application/x-image",
            "CompressionType": "None",
        },
    ],
}
print("Training job name: {}".format(job_name))
print(
    "\nInput Data Location: {}".format(
        training_params["InputDataConfig"][0]["DataSource"]["S3DataSource"]
    )
)

In [None]:
# create the Amazon SageMaker training job
sagemaker = boto3.client(service_name="sagemaker")
sagemaker.create_training_job(**training_params)

# confirm that the training job has started
status = sagemaker.describe_training_job(TrainingJobName=job_name)["TrainingJobStatus"]
print("Training job current status: {}".format(status))

try:
    # wait for the job to finish and report the ending status
    sagemaker.get_waiter("training_job_completed_or_stopped").wait(TrainingJobName=job_name)
    training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
    status = training_info["TrainingJobStatus"]
    print("Training job ended with status: " + status)
except:
    print("Training failed to start")
    # if exception is raised, that means it has failed
    message = sagemaker.describe_training_job(TrainingJobName=job_name)["FailureReason"]
    print("Training failed with the following error: {}".format(message))

In [None]:
training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
status = training_info["TrainingJobStatus"]
print("Training job ended with status: " + status)
print(training_info)

In [None]:
%%time
import boto3
from time import gmtime, strftime

sage = boto3.Session().client(service_name="sagemaker")

timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
model_name = "image-classification-model" + timestamp
print(model_name)
info = sage.describe_training_job(TrainingJobName=job_name)
model_data = info["ModelArtifacts"]["S3ModelArtifacts"]
print(model_data)

hosting_image = image_uris.retrieve(
    region=boto3.Session().region_name, framework="image-classification"
)

primary_container = {
    "Image": hosting_image,
    "ModelDataUrl": model_data,
}

create_model_response = sage.create_model(
    ModelName=model_name, ExecutionRoleArn=role, PrimaryContainer=primary_container
)

print(create_model_response["ModelArn"])

In [None]:
timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
batch_job_name = "image-classification-model" + timestamp
batch_input = s3validation + "001.Apple___Apple_scab/"
request = {
    "TransformJobName": batch_job_name,
    "ModelName": model_name,
    "MaxConcurrentTransforms": 16,
    "MaxPayloadInMB": 6,
    "BatchStrategy": "SingleRecord",
    "TransformOutput": {"S3OutputPath": "s3://{}/{}/output".format(bucket, batch_job_name)},
    "TransformInput": {
        "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": batch_input}},
        "ContentType": "application/x-image",
        "SplitType": "None",
        "CompressionType": "None",
    },
    "TransformResources": {"InstanceType": "ml.m5.large", "InstanceCount": 1},
}

print("Transform job name: {}".format(batch_job_name))
print("\nInput Data Location: {}".format(batch_input))

In [None]:
sagemaker = boto3.client("sagemaker")
sagemaker.create_transform_job(**request)

print("Created Transform job with name: ", batch_job_name)

while True:
    response = sagemaker.describe_transform_job(TransformJobName=batch_job_name)
    status = response["TransformJobStatus"]
    if status == "Completed":
        print("Transform job ended with status: " + status)
        break
    if status == "Failed":
        message = response["FailureReason"]
        print("Transform failed with the following error: {}".format(message))
        raise Exception("Transform job failed")
    time.sleep(30)

In [None]:
!aws sagemaker list-models

In [None]:
from urllib.parse import urlparse
import json
import numpy as np

s3_client = boto3.client("s3")
object_categories = [
    "001.Apple___Apple_scab",
    "002.Apple___Black_rot",
    "003.Apple___Cedar_apple_rust",
    "004.Apple___healthy",
    "005.Blueberry___healthy",
    "006.Cherry_(including_sour)___healthy",
    "007.Cherry_(including_sour)___Powdery_mildew",
    "008.Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot",
    "009.Corn_(maize)___Common_rust_",
    "010.Corn_(maize)___healthy"
]


def list_objects(s3_client, bucket, prefix):
    response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    objects = [content["Key"] for content in response["Contents"]]
    return objects


def get_label(s3_client, bucket, prefix):
    filename = prefix.split("/")[-1]
    s3_client.download_file(bucket, prefix, filename)
    with open(filename) as f:
        data = json.load(f)
        index = np.argmax(data["prediction"])
        probability = data["prediction"][index]
    print("Result: label - " + object_categories[index] + ", probability - " + str(probability))
    return object_categories[index], probability

print(batch_input)

inputs = list_objects(s3_client, 'plant-disease-detection-tutorial', 'medium-tutorial/s3validation/001.Apple___Apple_scab/')
print("Sample inputs: " + str(inputs[:2]))

outputs = list_objects(s3_client, bucket, batch_job_name + "/output")
print("Sample output: " + str(outputs[:2]))

# Check prediction result of the first 2 images
[get_label(s3_client, bucket, prefix) for prefix in outputs[0:2]]

In [None]:
from time import gmtime, strftime

timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
endpoint_config_name = job_name_prefix + "-epc-" + timestamp
endpoint_config_response = sage.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.t2.medium",
            "InitialInstanceCount": 1,
            "ModelName": model_name,
            "VariantName": "AllTraffic",
        }
    ],
)

print("Endpoint configuration name: {}".format(endpoint_config_name))
print("Endpoint configuration arn:  {}".format(endpoint_config_response["EndpointConfigArn"]))

In [None]:
%%time
import time

timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
endpoint_name = job_name_prefix + "-ep-" + timestamp
print("Endpoint name: {}".format(endpoint_name))

endpoint_params = {
    "EndpointName": endpoint_name,
    "EndpointConfigName": endpoint_config_name,
}
endpoint_response = sagemaker.create_endpoint(**endpoint_params)
print("EndpointArn = {}".format(endpoint_response["EndpointArn"]))

In [None]:
# get the status of the endpoint
response = sagemaker.describe_endpoint(EndpointName=endpoint_name)
status = response["EndpointStatus"]
print("EndpointStatus = {}".format(status))

try:
    sagemaker.get_waiter("endpoint_in_service").wait(EndpointName=endpoint_name)
finally:
    resp = sagemaker.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Arn: " + resp["EndpointArn"])
    print("Create endpoint ended with status: " + status)

    if status != "InService":
        message = sagemaker.describe_endpoint(EndpointName=endpoint_name)["FailureReason"]
        print("Training failed with the following error: {}".format(message))
        raise Exception("Endpoint creation did not succeed")

In [None]:
print(endpoint_name)