In [2]:
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import datetime


# This code is derived from AWS SageMaker Samples:
# https://github.com/awslabs/amazon-sagemaker-examples/tree/master/introduction_to_amazon_algorithms/deepar_electricity
# https://github.com/awslabs/amazon-sagemaker-examples/tree/master/introduction_to_amazon_algorithms/deepar_synthetic

In [3]:
import boto3
import sagemaker
from sagemaker import get_execution_role

In [48]:
# Set a good base job name when building different models
# It will help in identifying trained models and endpoints
with_categories = True
if with_categories:
    base_job_name = 'deepar-biketrain-with-categories'
else:
    base_job_name = 'deepar-biketrain-no-categories'

In [49]:
bucket = 'weisurya-sagemaker-playground'
prefix = 'deepar/bikerental'

# This structure allows multiple training and test files for model development and testing
if with_categories:
    s3_data_path = "{}/{}/data_with_categories".format(bucket, prefix)
else:
    s3_data_path = "{}/{}/data".format(bucket, prefix)
    

s3_output_path = "{}/{}/output".format(bucket, prefix)

In [50]:
s3_data_path,s3_output_path

('weisurya-sagemaker-playground/deepar/bikerental/data_with_categories',
 'weisurya-sagemaker-playground/deepar/bikerental/output')

In [51]:
# File name is referred as key name in S3
# Files stored in S3 are automatically replicated across
# three different availability zones in the region where the bucket was created.
# http://boto3.readthedocs.io/en/latest/guide/s3.html
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [52]:
# Upload one or more training files and test files to S3
if with_categories:
    write_to_s3('train_with_categories.json',bucket,'deepar/bikerental/data_with_categories/train/train_with_categories.json')
    write_to_s3('test_with_categories.json',bucket,'deepar/bikerental/data_with_categories/test/test_with_categories.json')
else:
    write_to_s3('train.json',bucket,'deepar/bikerental/data/train/train.json')
    write_to_s3('test.json',bucket,'deepar/bikerental/data/test/test.json')

In [53]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

In [54]:
# We no longer have to maintain a mapping of container images by region
# Simply use the convenience method provided by sagemaker
# https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')

In [55]:
image_name

'156387875391.dkr.ecr.us-west-2.amazonaws.com/forecasting-deepar:1'

In [56]:
freq='H' # Timeseries consists Hourly Data and we need to predict hourly rental count

# how far in the future predictions can be made
# 12 days worth of hourly forecast 
prediction_length = 288 

# aws recommends setting context same as prediction length as a starting point. 
# This controls how far in the past the network can see
context_length = 288

In [57]:
# Check Free Tier (if you are still under free-tier)
# At this time, m4.xlarge is offered as part of 2 months free tier
# https://aws.amazon.com/sagemaker/pricing/
# If you are outside of free-tier, you can also use ml.m5.xlarge  (newer generation instance)
# In this example, I am using ml.m5.xlarge for training
estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_name=image_name,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m5.xlarge',
    base_job_name=base_job_name,
    output_path="s3://" + s3_output_path
)

In [58]:
freq, context_length, prediction_length

('H', 288, 288)

In [59]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/deepar_hyperparameters.html
hyperparameters = {
    "time_freq": freq,
    "epochs": "400",
    "early_stopping_patience": "40",
    "mini_batch_size": "64",
    "learning_rate": "5E-4",
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
    "cardinality" : "auto" if with_categories else ''
}

In [60]:
hyperparameters

{'time_freq': 'H',
 'epochs': '400',
 'early_stopping_patience': '40',
 'mini_batch_size': '64',
 'learning_rate': '5E-4',
 'context_length': '288',
 'prediction_length': '288',
 'cardinality': 'auto'}

In [61]:
estimator.set_hyperparameters(**hyperparameters)

In [63]:
# Here, we are simply referring to train path and test path
# You can have multiple files in each path
# SageMaker will use all the files
data_channels = {
    "train": "s3://{}/train/".format(s3_data_path),
    "test": "s3://{}/test/".format(s3_data_path)
}

In [64]:
data_channels

{'train': 's3://weisurya-sagemaker-playground/deepar/bikerental/data_with_categories/train/',
 'test': 's3://weisurya-sagemaker-playground/deepar/bikerental/data_with_categories/test/'}

In [65]:
# This step takes around 35 minutes to train the model with m4.xlarge instance
estimator.fit(inputs=data_channels)

2020-01-29 03:55:20 Starting - Starting the training job...
2020-01-29 03:55:21 Starting - Launching requested ML instances......
2020-01-29 03:56:23 Starting - Preparing the instances for training...
2020-01-29 03:57:04 Downloading - Downloading input data...
2020-01-29 03:57:36 Training - Training image download completed. Training in progress.[34mArguments: train[0m
[34m[01/29/2020 03:57:38 INFO 139797266573120] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'num_dynamic_feat': u'auto', u'dropout_rate': u'0.10', u'mini_batch_size': u'128', u'test_quantiles': u'[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'num_eval_samples': u'100', u'learning_rate': u'0.001', u'num_cells': u'40', u'num_layers': u'2', u'embedding_dimension': u'10', u'_kvstore': u'auto', u'_num_kv_servers': u'auto', u'cardinality': u'auto', u'likelihood': u'student-t', u'early_stopping_

[34m[01/29/2020 03:58:29 INFO 139797266573120] Saved checkpoint to "/opt/ml/model/state_3be546db-c049-4d37-8d78-ed391736d62b-0000.params"[0m
[34m#metrics {"Metrics": {"state.serialize.time": {"count": 1, "max": 212.52202987670898, "sum": 212.52202987670898, "min": 212.52202987670898}}, "EndTime": 1580270309.024831, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270308.811948}
[0m
[34m[01/29/2020 03:58:30 INFO 139797266573120] Epoch[2] Batch[0] avg_epoch_loss=3.495903[0m
[34m[01/29/2020 03:58:30 INFO 139797266573120] #quality_metric: host=algo-1, epoch=2, batch=0 train loss <loss>=3.4959025383[0m
[34m[01/29/2020 03:58:35 INFO 139797266573120] Epoch[2] Batch[5] avg_epoch_loss=3.570497[0m
[34m[01/29/2020 03:58:35 INFO 139797266573120] #quality_metric: host=algo-1, epoch=2, batch=5 train loss <loss>=3.57049675783[0m
[34m[01/29/2020 03:58:35 INFO 139797266573120] Epoch[2] Batch [5]#011Speed: 62.57 samples/sec#011loss=3.570

[34m[01/29/2020 03:59:21 INFO 139797266573120] Epoch[6] Batch[5] avg_epoch_loss=3.162816[0m
[34m[01/29/2020 03:59:21 INFO 139797266573120] #quality_metric: host=algo-1, epoch=6, batch=5 train loss <loss>=3.16281580925[0m
[34m[01/29/2020 03:59:21 INFO 139797266573120] Epoch[6] Batch [5]#011Speed: 61.97 samples/sec#011loss=3.162816[0m
[34m[01/29/2020 03:59:25 INFO 139797266573120] processed a total of 616 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10984.293937683105, "sum": 10984.293937683105, "min": 10984.293937683105}}, "EndTime": 1580270365.480115, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270354.495764}
[0m
[34m[01/29/2020 03:59:25 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=56.079337261 records/second[0m
[34m[01/29/2020 03:59:25 INFO 139797266573120] #progress_metric: host=algo-1, completed 1 % of epochs[0m
[34m[01/29/2020 03:59:25 INFO 13979726657312

[34m[01/29/2020 04:00:17 INFO 139797266573120] Epoch[11] Batch[5] avg_epoch_loss=2.962284[0m
[34m[01/29/2020 04:00:17 INFO 139797266573120] #quality_metric: host=algo-1, epoch=11, batch=5 train loss <loss>=2.96228408813[0m
[34m[01/29/2020 04:00:17 INFO 139797266573120] Epoch[11] Batch [5]#011Speed: 62.75 samples/sec#011loss=2.962284[0m
[34m[01/29/2020 04:00:22 INFO 139797266573120] Epoch[11] Batch[10] avg_epoch_loss=3.016880[0m
[34m[01/29/2020 04:00:22 INFO 139797266573120] #quality_metric: host=algo-1, epoch=11, batch=10 train loss <loss>=3.0823946476[0m
[34m[01/29/2020 04:00:22 INFO 139797266573120] Epoch[11] Batch [10]#011Speed: 62.29 samples/sec#011loss=3.082395[0m
[34m[01/29/2020 04:00:22 INFO 139797266573120] processed a total of 653 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11894.230127334595, "sum": 11894.230127334595, "min": 11894.230127334595}}, "EndTime": 1580270422.566994, "Dimensions": {"Host": "algo-1", "Operation": "training"

[34m[01/29/2020 04:01:09 INFO 139797266573120] Epoch[16] Batch[0] avg_epoch_loss=2.884969[0m
[34m[01/29/2020 04:01:09 INFO 139797266573120] #quality_metric: host=algo-1, epoch=16, batch=0 train loss <loss>=2.88496923447[0m
[34m[01/29/2020 04:01:14 INFO 139797266573120] Epoch[16] Batch[5] avg_epoch_loss=2.880578[0m
[34m[01/29/2020 04:01:14 INFO 139797266573120] #quality_metric: host=algo-1, epoch=16, batch=5 train loss <loss>=2.88057835897[0m
[34m[01/29/2020 04:01:14 INFO 139797266573120] Epoch[16] Batch [5]#011Speed: 62.09 samples/sec#011loss=2.880578[0m
[34m[01/29/2020 04:01:18 INFO 139797266573120] processed a total of 634 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10995.32699584961, "sum": 10995.32699584961, "min": 10995.32699584961}}, "EndTime": 1580270478.810898, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270467.815142}
[0m
[34m[01/29/2020 04:01:18 INFO 139797266573120] #throug

[34m[01/29/2020 04:02:11 INFO 139797266573120] Epoch[21] Batch[5] avg_epoch_loss=2.814417[0m
[34m[01/29/2020 04:02:11 INFO 139797266573120] #quality_metric: host=algo-1, epoch=21, batch=5 train loss <loss>=2.81441668669[0m
[34m[01/29/2020 04:02:11 INFO 139797266573120] Epoch[21] Batch [5]#011Speed: 62.20 samples/sec#011loss=2.814417[0m
[34m[01/29/2020 04:02:17 INFO 139797266573120] Epoch[21] Batch[10] avg_epoch_loss=2.825153[0m
[34m[01/29/2020 04:02:17 INFO 139797266573120] #quality_metric: host=algo-1, epoch=21, batch=10 train loss <loss>=2.83803744316[0m
[34m[01/29/2020 04:02:17 INFO 139797266573120] Epoch[21] Batch [10]#011Speed: 62.10 samples/sec#011loss=2.838037[0m
[34m[01/29/2020 04:02:17 INFO 139797266573120] processed a total of 650 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12008.004903793335, "sum": 12008.004903793335, "min": 12008.004903793335}}, "EndTime": 1580270537.01107, "Dimensions": {"Host": "algo-1", "Operation": "training"

[34m[01/29/2020 04:03:10 INFO 139797266573120] Epoch[26] Batch[5] avg_epoch_loss=2.815939[0m
[34m[01/29/2020 04:03:10 INFO 139797266573120] #quality_metric: host=algo-1, epoch=26, batch=5 train loss <loss>=2.81593914827[0m
[34m[01/29/2020 04:03:10 INFO 139797266573120] Epoch[26] Batch [5]#011Speed: 61.75 samples/sec#011loss=2.815939[0m
[34m[01/29/2020 04:03:15 INFO 139797266573120] processed a total of 623 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11058.558940887451, "sum": 11058.558940887451, "min": 11058.558940887451}}, "EndTime": 1580270595.007688, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270583.948809}
[0m
[34m[01/29/2020 04:03:15 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=56.3359854348 records/second[0m
[34m[01/29/2020 04:03:15 INFO 139797266573120] #progress_metric: host=algo-1, completed 6 % of epochs[0m
[34m[01/29/2020 04:03:15 INFO 1397972665

[34m[01/29/2020 04:04:09 INFO 139797266573120] Epoch[31] Batch[5] avg_epoch_loss=2.699894[0m
[34m[01/29/2020 04:04:09 INFO 139797266573120] #quality_metric: host=algo-1, epoch=31, batch=5 train loss <loss>=2.69989399115[0m
[34m[01/29/2020 04:04:09 INFO 139797266573120] Epoch[31] Batch [5]#011Speed: 61.76 samples/sec#011loss=2.699894[0m
[34m[01/29/2020 04:04:13 INFO 139797266573120] processed a total of 592 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10994.853973388672, "sum": 10994.853973388672, "min": 10994.853973388672}}, "EndTime": 1580270653.497783, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270642.50285}
[0m
[34m[01/29/2020 04:04:13 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=53.8425232139 records/second[0m
[34m[01/29/2020 04:04:13 INFO 139797266573120] #progress_metric: host=algo-1, completed 8 % of epochs[0m
[34m[01/29/2020 04:04:13 INFO 13979726657

[34m[01/29/2020 04:05:13 INFO 139797266573120] Epoch[37] Batch[0] avg_epoch_loss=2.539564[0m
[34m[01/29/2020 04:05:13 INFO 139797266573120] #quality_metric: host=algo-1, epoch=37, batch=0 train loss <loss>=2.53956365585[0m
[34m[01/29/2020 04:05:18 INFO 139797266573120] Epoch[37] Batch[5] avg_epoch_loss=2.625996[0m
[34m[01/29/2020 04:05:18 INFO 139797266573120] #quality_metric: host=algo-1, epoch=37, batch=5 train loss <loss>=2.62599591414[0m
[34m[01/29/2020 04:05:18 INFO 139797266573120] Epoch[37] Batch [5]#011Speed: 62.44 samples/sec#011loss=2.625996[0m
[34m[01/29/2020 04:05:22 INFO 139797266573120] processed a total of 623 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10983.40392112732, "sum": 10983.40392112732, "min": 10983.40392112732}}, "EndTime": 1580270722.640997, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270711.657101}
[0m
[34m[01/29/2020 04:05:22 INFO 139797266573120] #throug

[34m[01/29/2020 04:06:16 INFO 139797266573120] Epoch[42] Batch[5] avg_epoch_loss=2.660656[0m
[34m[01/29/2020 04:06:16 INFO 139797266573120] #quality_metric: host=algo-1, epoch=42, batch=5 train loss <loss>=2.66065565745[0m
[34m[01/29/2020 04:06:16 INFO 139797266573120] Epoch[42] Batch [5]#011Speed: 60.30 samples/sec#011loss=2.660656[0m
[34m[01/29/2020 04:06:21 INFO 139797266573120] Epoch[42] Batch[10] avg_epoch_loss=2.693820[0m
[34m[01/29/2020 04:06:21 INFO 139797266573120] #quality_metric: host=algo-1, epoch=42, batch=10 train loss <loss>=2.73361654282[0m
[34m[01/29/2020 04:06:21 INFO 139797266573120] Epoch[42] Batch [10]#011Speed: 61.58 samples/sec#011loss=2.733617[0m
[34m[01/29/2020 04:06:21 INFO 139797266573120] processed a total of 670 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12219.67101097107, "sum": 12219.67101097107, "min": 12219.67101097107}}, "EndTime": 1580270781.910006, "Dimensions": {"Host": "algo-1", "Operation": "training", 

[34m[01/29/2020 04:07:25 INFO 139797266573120] Epoch[48] Batch[5] avg_epoch_loss=2.615657[0m
[34m[01/29/2020 04:07:25 INFO 139797266573120] #quality_metric: host=algo-1, epoch=48, batch=5 train loss <loss>=2.61565677325[0m
[34m[01/29/2020 04:07:25 INFO 139797266573120] Epoch[48] Batch [5]#011Speed: 63.05 samples/sec#011loss=2.615657[0m
[34m[01/29/2020 04:07:30 INFO 139797266573120] Epoch[48] Batch[10] avg_epoch_loss=2.654871[0m
[34m[01/29/2020 04:07:30 INFO 139797266573120] #quality_metric: host=algo-1, epoch=48, batch=10 train loss <loss>=2.70192837715[0m
[34m[01/29/2020 04:07:30 INFO 139797266573120] Epoch[48] Batch [10]#011Speed: 62.48 samples/sec#011loss=2.701928[0m
[34m[01/29/2020 04:07:30 INFO 139797266573120] processed a total of 655 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11884.73892211914, "sum": 11884.73892211914, "min": 11884.73892211914}}, "EndTime": 1580270850.31636, "Dimensions": {"Host": "algo-1", "Operation": "training", "

[34m[01/29/2020 04:08:32 INFO 139797266573120] Epoch[54] Batch[5] avg_epoch_loss=2.590002[0m
[34m[01/29/2020 04:08:32 INFO 139797266573120] #quality_metric: host=algo-1, epoch=54, batch=5 train loss <loss>=2.59000174205[0m
[34m[01/29/2020 04:08:32 INFO 139797266573120] Epoch[54] Batch [5]#011Speed: 62.81 samples/sec#011loss=2.590002[0m
[34m[01/29/2020 04:08:37 INFO 139797266573120] Epoch[54] Batch[10] avg_epoch_loss=2.615038[0m
[34m[01/29/2020 04:08:37 INFO 139797266573120] #quality_metric: host=algo-1, epoch=54, batch=10 train loss <loss>=2.64508194923[0m
[34m[01/29/2020 04:08:37 INFO 139797266573120] Epoch[54] Batch [10]#011Speed: 62.31 samples/sec#011loss=2.645082[0m
[34m[01/29/2020 04:08:37 INFO 139797266573120] processed a total of 667 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11916.575908660889, "sum": 11916.575908660889, "min": 11916.575908660889}}, "EndTime": 1580270917.92505, "Dimensions": {"Host": "algo-1", "Operation": "training"

[34m[01/29/2020 04:09:36 INFO 139797266573120] Epoch[59] Batch[10] avg_epoch_loss=2.537834[0m
[34m[01/29/2020 04:09:36 INFO 139797266573120] #quality_metric: host=algo-1, epoch=59, batch=10 train loss <loss>=2.49253993034[0m
[34m[01/29/2020 04:09:36 INFO 139797266573120] Epoch[59] Batch [10]#011Speed: 61.89 samples/sec#011loss=2.492540[0m
[34m[01/29/2020 04:09:36 INFO 139797266573120] processed a total of 667 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11964.980125427246, "sum": 11964.980125427246, "min": 11964.980125427246}}, "EndTime": 1580270976.779627, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580270964.814123}
[0m
[34m[01/29/2020 04:09:36 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=55.7455905531 records/second[0m
[34m[01/29/2020 04:09:36 INFO 139797266573120] #progress_metric: host=algo-1, completed 15 % of epochs[0m
[34m[01/29/2020 04:09:36 INFO 139797

[34m[01/29/2020 04:10:35 INFO 139797266573120] Epoch[64] Batch[10] avg_epoch_loss=2.636081[0m
[34m[01/29/2020 04:10:35 INFO 139797266573120] #quality_metric: host=algo-1, epoch=64, batch=10 train loss <loss>=2.70165305138[0m
[34m[01/29/2020 04:10:35 INFO 139797266573120] Epoch[64] Batch [10]#011Speed: 61.86 samples/sec#011loss=2.701653[0m
[34m[01/29/2020 04:10:35 INFO 139797266573120] processed a total of 661 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12034.975051879883, "sum": 12034.975051879883, "min": 12034.975051879883}}, "EndTime": 1580271035.024559, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271022.989191}
[0m
[34m[01/29/2020 04:10:35 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=54.9228347068 records/second[0m
[34m[01/29/2020 04:10:35 INFO 139797266573120] #progress_metric: host=algo-1, completed 16 % of epochs[0m
[34m[01/29/2020 04:10:35 INFO 139797

[34m[01/29/2020 04:11:39 INFO 139797266573120] Epoch[70] Batch[5] avg_epoch_loss=2.554444[0m
[34m[01/29/2020 04:11:39 INFO 139797266573120] #quality_metric: host=algo-1, epoch=70, batch=5 train loss <loss>=2.55444435279[0m
[34m[01/29/2020 04:11:39 INFO 139797266573120] Epoch[70] Batch [5]#011Speed: 62.23 samples/sec#011loss=2.554444[0m
[34m[01/29/2020 04:11:44 INFO 139797266573120] Epoch[70] Batch[10] avg_epoch_loss=2.520675[0m
[34m[01/29/2020 04:11:44 INFO 139797266573120] #quality_metric: host=algo-1, epoch=70, batch=10 train loss <loss>=2.48015136719[0m
[34m[01/29/2020 04:11:44 INFO 139797266573120] Epoch[70] Batch [10]#011Speed: 62.09 samples/sec#011loss=2.480151[0m
[34m[01/29/2020 04:11:44 INFO 139797266573120] processed a total of 653 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11963.309049606323, "sum": 11963.309049606323, "min": 11963.309049606323}}, "EndTime": 1580271104.898417, "Dimensions": {"Host": "algo-1", "Operation": "training

[34m[01/29/2020 04:12:38 INFO 139797266573120] Epoch[75] Batch[5] avg_epoch_loss=2.642168[0m
[34m[01/29/2020 04:12:38 INFO 139797266573120] #quality_metric: host=algo-1, epoch=75, batch=5 train loss <loss>=2.64216824373[0m
[34m[01/29/2020 04:12:38 INFO 139797266573120] Epoch[75] Batch [5]#011Speed: 62.60 samples/sec#011loss=2.642168[0m
[34m[01/29/2020 04:12:43 INFO 139797266573120] Epoch[75] Batch[10] avg_epoch_loss=2.555568[0m
[34m[01/29/2020 04:12:43 INFO 139797266573120] #quality_metric: host=algo-1, epoch=75, batch=10 train loss <loss>=2.45164835453[0m
[34m[01/29/2020 04:12:43 INFO 139797266573120] Epoch[75] Batch [10]#011Speed: 62.77 samples/sec#011loss=2.451648[0m
[34m[01/29/2020 04:12:43 INFO 139797266573120] processed a total of 659 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11881.94489479065, "sum": 11881.94489479065, "min": 11881.94489479065}}, "EndTime": 1580271163.293231, "Dimensions": {"Host": "algo-1", "Operation": "training", 

[34m[01/29/2020 04:13:45 INFO 139797266573120] Epoch[81] Batch[5] avg_epoch_loss=2.535136[0m
[34m[01/29/2020 04:13:45 INFO 139797266573120] #quality_metric: host=algo-1, epoch=81, batch=5 train loss <loss>=2.53513646126[0m
[34m[01/29/2020 04:13:45 INFO 139797266573120] Epoch[81] Batch [5]#011Speed: 62.87 samples/sec#011loss=2.535136[0m
[34m[01/29/2020 04:13:49 INFO 139797266573120] processed a total of 614 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10827.65793800354, "sum": 10827.65793800354, "min": 10827.65793800354}}, "EndTime": 1580271229.607358, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271218.779308}
[0m
[34m[01/29/2020 04:13:49 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=56.7061424802 records/second[0m
[34m[01/29/2020 04:13:49 INFO 139797266573120] #progress_metric: host=algo-1, completed 20 % of epochs[0m
[34m[01/29/2020 04:13:49 INFO 139797266573

[34m[01/29/2020 04:14:58 INFO 139797266573120] Epoch[87] Batch[10] avg_epoch_loss=2.556320[0m
[34m[01/29/2020 04:14:58 INFO 139797266573120] #quality_metric: host=algo-1, epoch=87, batch=10 train loss <loss>=2.52488660812[0m
[34m[01/29/2020 04:14:58 INFO 139797266573120] Epoch[87] Batch [10]#011Speed: 62.33 samples/sec#011loss=2.524887[0m
[34m[01/29/2020 04:14:58 INFO 139797266573120] processed a total of 684 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11973.366975784302, "sum": 11973.366975784302, "min": 11973.366975784302}}, "EndTime": 1580271298.53101, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271286.55724}
[0m
[34m[01/29/2020 04:14:58 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=57.1263252895 records/second[0m
[34m[01/29/2020 04:14:58 INFO 139797266573120] #progress_metric: host=algo-1, completed 22 % of epochs[0m
[34m[01/29/2020 04:14:58 INFO 13979726

[34m[01/29/2020 04:15:56 INFO 139797266573120] Epoch[92] Batch[10] avg_epoch_loss=2.553357[0m
[34m[01/29/2020 04:15:56 INFO 139797266573120] #quality_metric: host=algo-1, epoch=92, batch=10 train loss <loss>=2.54828767776[0m
[34m[01/29/2020 04:15:56 INFO 139797266573120] Epoch[92] Batch [10]#011Speed: 62.44 samples/sec#011loss=2.548288[0m
[34m[01/29/2020 04:15:56 INFO 139797266573120] processed a total of 677 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12023.453950881958, "sum": 12023.453950881958, "min": 12023.453950881958}}, "EndTime": 1580271356.600609, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271344.576876}
[0m
[34m[01/29/2020 04:15:56 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=56.3061522609 records/second[0m
[34m[01/29/2020 04:15:56 INFO 139797266573120] #progress_metric: host=algo-1, completed 23 % of epochs[0m
[34m[01/29/2020 04:15:56 INFO 139797

[34m[01/29/2020 04:17:00 INFO 139797266573120] Epoch[98] Batch[5] avg_epoch_loss=2.556734[0m
[34m[01/29/2020 04:17:00 INFO 139797266573120] #quality_metric: host=algo-1, epoch=98, batch=5 train loss <loss>=2.55673404535[0m
[34m[01/29/2020 04:17:00 INFO 139797266573120] Epoch[98] Batch [5]#011Speed: 62.76 samples/sec#011loss=2.556734[0m
[34m[01/29/2020 04:17:04 INFO 139797266573120] processed a total of 610 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10913.086891174316, "sum": 10913.086891174316, "min": 10913.086891174316}}, "EndTime": 1580271424.601851, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271413.688308}
[0m
[34m[01/29/2020 04:17:04 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=55.8957157327 records/second[0m
[34m[01/29/2020 04:17:04 INFO 139797266573120] #progress_metric: host=algo-1, completed 24 % of epochs[0m
[34m[01/29/2020 04:17:04 INFO 139797266

[34m[01/29/2020 04:18:09 INFO 139797266573120] Epoch[104] Batch[5] avg_epoch_loss=2.534099[0m
[34m[01/29/2020 04:18:09 INFO 139797266573120] #quality_metric: host=algo-1, epoch=104, batch=5 train loss <loss>=2.53409874439[0m
[34m[01/29/2020 04:18:09 INFO 139797266573120] Epoch[104] Batch [5]#011Speed: 61.55 samples/sec#011loss=2.534099[0m
[34m[01/29/2020 04:18:13 INFO 139797266573120] processed a total of 631 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11092.368841171265, "sum": 11092.368841171265, "min": 11092.368841171265}}, "EndTime": 1580271493.186277, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271482.093541}
[0m
[34m[01/29/2020 04:18:13 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=56.8854627291 records/second[0m
[34m[01/29/2020 04:18:13 INFO 139797266573120] #progress_metric: host=algo-1, completed 26 % of epochs[0m
[34m[01/29/2020 04:18:13 INFO 139797

[34m[01/29/2020 04:19:23 INFO 139797266573120] Epoch[110] Batch[10] avg_epoch_loss=2.544039[0m
[34m[01/29/2020 04:19:23 INFO 139797266573120] #quality_metric: host=algo-1, epoch=110, batch=10 train loss <loss>=2.58165788651[0m
[34m[01/29/2020 04:19:23 INFO 139797266573120] Epoch[110] Batch [10]#011Speed: 62.16 samples/sec#011loss=2.581658[0m
[34m[01/29/2020 04:19:23 INFO 139797266573120] processed a total of 650 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12024.817943572998, "sum": 12024.817943572998, "min": 12024.817943572998}}, "EndTime": 1580271563.053389, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271551.028223}
[0m
[34m[01/29/2020 04:19:23 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=54.0544544534 records/second[0m
[34m[01/29/2020 04:19:23 INFO 139797266573120] #progress_metric: host=algo-1, completed 27 % of epochs[0m
[34m[01/29/2020 04:19:23 INFO 139

[34m[01/29/2020 04:20:21 INFO 139797266573120] Epoch[116] Batch[0] avg_epoch_loss=2.449689[0m
[34m[01/29/2020 04:20:21 INFO 139797266573120] #quality_metric: host=algo-1, epoch=116, batch=0 train loss <loss>=2.44968914986[0m
[34m[01/29/2020 04:20:26 INFO 139797266573120] Epoch[116] Batch[5] avg_epoch_loss=2.514868[0m
[34m[01/29/2020 04:20:26 INFO 139797266573120] #quality_metric: host=algo-1, epoch=116, batch=5 train loss <loss>=2.51486802101[0m
[34m[01/29/2020 04:20:26 INFO 139797266573120] Epoch[116] Batch [5]#011Speed: 61.88 samples/sec#011loss=2.514868[0m
[34m[01/29/2020 04:20:31 INFO 139797266573120] processed a total of 633 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11086.788892745972, "sum": 11086.788892745972, "min": 11086.788892745972}}, "EndTime": 1580271631.153376, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271620.066156}
[0m
[34m[01/29/2020 04:20:31 INFO 139797266573120]

[34m[01/29/2020 04:21:36 INFO 139797266573120] Epoch[122] Batch[5] avg_epoch_loss=2.507869[0m
[34m[01/29/2020 04:21:36 INFO 139797266573120] #quality_metric: host=algo-1, epoch=122, batch=5 train loss <loss>=2.50786884626[0m
[34m[01/29/2020 04:21:36 INFO 139797266573120] Epoch[122] Batch [5]#011Speed: 62.35 samples/sec#011loss=2.507869[0m
[34m[01/29/2020 04:21:41 INFO 139797266573120] Epoch[122] Batch[10] avg_epoch_loss=2.467563[0m
[34m[01/29/2020 04:21:41 INFO 139797266573120] #quality_metric: host=algo-1, epoch=122, batch=10 train loss <loss>=2.41919641495[0m
[34m[01/29/2020 04:21:41 INFO 139797266573120] Epoch[122] Batch [10]#011Speed: 62.16 samples/sec#011loss=2.419196[0m
[34m[01/29/2020 04:21:41 INFO 139797266573120] processed a total of 651 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12009.521961212158, "sum": 12009.521961212158, "min": 12009.521961212158}}, "EndTime": 1580271701.467798, "Dimensions": {"Host": "algo-1", "Operation": "tr

[34m[01/29/2020 04:22:43 INFO 139797266573120] Epoch[128] Batch[5] avg_epoch_loss=2.477312[0m
[34m[01/29/2020 04:22:43 INFO 139797266573120] #quality_metric: host=algo-1, epoch=128, batch=5 train loss <loss>=2.47731248538[0m
[34m[01/29/2020 04:22:43 INFO 139797266573120] Epoch[128] Batch [5]#011Speed: 62.79 samples/sec#011loss=2.477312[0m
[34m[01/29/2020 04:22:48 INFO 139797266573120] processed a total of 632 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 10892.277002334595, "sum": 10892.277002334595, "min": 10892.277002334595}}, "EndTime": 1580271768.036551, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271757.143823}
[0m
[34m[01/29/2020 04:22:48 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=58.0220854268 records/second[0m
[34m[01/29/2020 04:22:48 INFO 139797266573120] #progress_metric: host=algo-1, completed 32 % of epochs[0m
[34m[01/29/2020 04:22:48 INFO 139797

[34m[01/29/2020 04:23:52 INFO 139797266573120] Epoch[134] Batch[5] avg_epoch_loss=2.525973[0m
[34m[01/29/2020 04:23:52 INFO 139797266573120] #quality_metric: host=algo-1, epoch=134, batch=5 train loss <loss>=2.52597339948[0m
[34m[01/29/2020 04:23:52 INFO 139797266573120] Epoch[134] Batch [5]#011Speed: 62.67 samples/sec#011loss=2.525973[0m
[34m[01/29/2020 04:23:57 INFO 139797266573120] Epoch[134] Batch[10] avg_epoch_loss=2.480496[0m
[34m[01/29/2020 04:23:57 INFO 139797266573120] #quality_metric: host=algo-1, epoch=134, batch=10 train loss <loss>=2.42592298985[0m
[34m[01/29/2020 04:23:57 INFO 139797266573120] Epoch[134] Batch [10]#011Speed: 62.33 samples/sec#011loss=2.425923[0m
[34m[01/29/2020 04:23:57 INFO 139797266573120] processed a total of 650 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11967.622995376587, "sum": 11967.622995376587, "min": 11967.622995376587}}, "EndTime": 1580271837.990278, "Dimensions": {"Host": "algo-1", "Operation": "tr

[34m[01/29/2020 04:24:55 INFO 139797266573120] Epoch[139] Batch[10] avg_epoch_loss=2.455846[0m
[34m[01/29/2020 04:24:55 INFO 139797266573120] #quality_metric: host=algo-1, epoch=139, batch=10 train loss <loss>=2.34593527317[0m
[34m[01/29/2020 04:24:55 INFO 139797266573120] Epoch[139] Batch [10]#011Speed: 62.32 samples/sec#011loss=2.345935[0m
[34m[01/29/2020 04:24:55 INFO 139797266573120] processed a total of 650 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 11943.78399848938, "sum": 11943.78399848938, "min": 11943.78399848938}}, "EndTime": 1580271895.87371, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580271883.929589}
[0m
[34m[01/29/2020 04:24:55 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=54.4212227106 records/second[0m
[34m[01/29/2020 04:24:55 INFO 139797266573120] #progress_metric: host=algo-1, completed 35 % of epochs[0m
[34m[01/29/2020 04:24:55 INFO 1397972

[34m[01/29/2020 04:25:56 INFO 139797266573120] Epoch[145] Batch[0] avg_epoch_loss=2.531512[0m
[34m[01/29/2020 04:25:56 INFO 139797266573120] #quality_metric: host=algo-1, epoch=145, batch=0 train loss <loss>=2.5315117836[0m
[34m[01/29/2020 04:26:01 INFO 139797266573120] Epoch[145] Batch[5] avg_epoch_loss=2.481858[0m
[34m[01/29/2020 04:26:01 INFO 139797266573120] #quality_metric: host=algo-1, epoch=145, batch=5 train loss <loss>=2.48185801506[0m
[34m[01/29/2020 04:26:01 INFO 139797266573120] Epoch[145] Batch [5]#011Speed: 62.03 samples/sec#011loss=2.481858[0m
[34m[01/29/2020 04:26:07 INFO 139797266573120] Epoch[145] Batch[10] avg_epoch_loss=2.460518[0m
[34m[01/29/2020 04:26:07 INFO 139797266573120] #quality_metric: host=algo-1, epoch=145, batch=10 train loss <loss>=2.43490986824[0m
[34m[01/29/2020 04:26:07 INFO 139797266573120] Epoch[145] Batch [10]#011Speed: 61.54 samples/sec#011loss=2.434910[0m
[34m[01/29/2020 04:26:07 INFO 139797266573120] processed a total of 647 ex

[34m[01/29/2020 04:27:06 INFO 139797266573120] Epoch[150] Batch[10] avg_epoch_loss=2.469075[0m
[34m[01/29/2020 04:27:06 INFO 139797266573120] #quality_metric: host=algo-1, epoch=150, batch=10 train loss <loss>=2.42104635239[0m
[34m[01/29/2020 04:27:06 INFO 139797266573120] Epoch[150] Batch [10]#011Speed: 62.06 samples/sec#011loss=2.421046[0m
[34m[01/29/2020 04:27:06 INFO 139797266573120] processed a total of 662 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12012.593984603882, "sum": 12012.593984603882, "min": 12012.593984603882}}, "EndTime": 1580272026.121459, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580272014.108482}
[0m
[34m[01/29/2020 04:27:06 INFO 139797266573120] #throughput_metric: host=algo-1, train throughput=55.1084078298 records/second[0m
[34m[01/29/2020 04:27:06 INFO 139797266573120] #progress_metric: host=algo-1, completed 37 % of epochs[0m
[34m[01/29/2020 04:27:06 INFO 139

[34m[01/29/2020 04:28:09 INFO 139797266573120] Epoch[156] Batch[5] avg_epoch_loss=2.516651[0m
[34m[01/29/2020 04:28:09 INFO 139797266573120] #quality_metric: host=algo-1, epoch=156, batch=5 train loss <loss>=2.51665103436[0m
[34m[01/29/2020 04:28:09 INFO 139797266573120] Epoch[156] Batch [5]#011Speed: 62.29 samples/sec#011loss=2.516651[0m
[34m[01/29/2020 04:28:14 INFO 139797266573120] Epoch[156] Batch[10] avg_epoch_loss=2.522857[0m
[34m[01/29/2020 04:28:14 INFO 139797266573120] #quality_metric: host=algo-1, epoch=156, batch=10 train loss <loss>=2.53030352592[0m
[34m[01/29/2020 04:28:14 INFO 139797266573120] Epoch[156] Batch [10]#011Speed: 61.32 samples/sec#011loss=2.530304[0m
[34m[01/29/2020 04:28:14 INFO 139797266573120] processed a total of 641 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 12050.902128219604, "sum": 12050.902128219604, "min": 12050.902128219604}}, "EndTime": 1580272094.752579, "Dimensions": {"Host": "algo-1", "Operation": "tr

[34m#metrics {"Metrics": {"get_graph.time": {"count": 1, "max": 32726.91798210144, "sum": 32726.91798210144, "min": 32726.91798210144}}, "EndTime": 1580272183.531955, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580272150.804291}
[0m
[34m[01/29/2020 04:29:44 INFO 139797266573120] Number of GPUs being used: 0[0m
[34m#metrics {"Metrics": {"finalize.time": {"count": 1, "max": 33793.54906082153, "sum": 33793.54906082153, "min": 33793.54906082153}}, "EndTime": 1580272184.598534, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1580272183.53377}
[0m
[34m[01/29/2020 04:29:44 INFO 139797266573120] Serializing to /opt/ml/model/model_algo-1[0m
[34m[01/29/2020 04:29:44 INFO 139797266573120] Saved checkpoint to "/opt/ml/model/model_algo-1-0000.params"[0m
[34m#metrics {"Metrics": {"model.serialize.time": {"count": 1, "max": 149.9190330505371, "sum": 149.9190330505371, "min": 149.91903

In [66]:
job_name = estimator.latest_training_job.name

In [67]:
# Hard code name for now as we stopped the notebook.  
# If you do this in a single sitting, you don't need to hard code
# job_name = 'deepar-biketrain-with-categories-2018-12-21-04-05-44-478'

In [68]:
print ('job name: {0}'.format(job_name))

job name: deepar-biketrain-with-categories-2020-01-29-03-55-20-132


In [69]:
# Create an endpoint for real-time predictions
endpoint_name = sagemaker_session.endpoint_from_job(
    job_name=job_name,
    initial_instance_count=1,
    instance_type='ml.m4.xlarge',
    deployment_image=image_name,
    role=role
)

-----------------!

In [70]:
print ('endpoint name: {0}'.format(endpoint_name))

endpoint name: deepar-biketrain-with-categories-2020-01-29-03-55-20-132


In [None]:
# Don't forget to terminate the end point after completing the demo
# Otherwise, you account will accumulate hourly charges

# you can delete from sagemaker management console or through command line or throught code

# sagemaker_session.delete_endpoint(endpoint_name)