In [1]:
# S3 prefix
prefix = "DEMO-convect-byo"

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd

# role = get_execution_role()
role = "arn:aws:iam::932478379847:role/service-role/AmazonSageMaker-ExecutionRole-20210820T133633"

print(role)

arn:aws:iam::932478379847:role/service-role/AmazonSageMaker-ExecutionRole-20210820T133633


In [2]:
import sagemaker as sage
from time import gmtime, strftime



In [3]:
sess = sage.Session()


In [4]:
bucket = sess.default_bucket()

print(bucket)

sagemaker-us-west-2-932478379847


# Upload data 

In [5]:
WORK_DIRECTORY = "../../tests/data/"

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

In [27]:

config_str = """
ml_sum_model:
  cv_param:
    split_train_frac: 0.8
    split_type: time_split
    train_folder_cnt: 3
  dt_feature_extraction_params:
    - feature_name: isoweekday
    - feature_name: is_workday_CN
  encoder_param:
    autoOneHotThreshold: 5
  impute_param:
    fill_value: 0
    strategy: constant
  main_ts_rolling_params:
    - rolling_method: mean
      size:
        - 1
        - 3
        - 5
        - 7
    - rolling_method: lag
      size:
        - 7
        - 14
    - rolling_method: std
      size:
        - 3
        - 5
        - 7
  ml_model_params:
    - method_ind: lgb
      param_opt_ind: random
      param_model:
        objective: tweedie
        tweedie_variance_power: 1.4
  model_select_method: manual
  model_select_param:
    metrics_funcs:
      - mae
    only_use_out_sample: true
  other_ts_rolling_params:
    - rolling_method: mean
      size:
        - 1
        - 7
        - 14
  scaler_param:
    scaler_method: MaxAbs
  train_expand_param:
    - long_jump_interval_list: [30, 60, 90]
      num_short_jumps: 6
      short_jump_interval: 7
  ts_window_param:
    time_window: 60
predict_target_params:
  - predict_horizon: 7
    predict_offset: 0
quantile_model_alpha_params:
  - 0.1
run_ml_quantile_model: false
"""

# beacuase the config string is too long, we cannot pass it as a hyper parameter
sess.upload_string_as_file_body(config_str, bucket, prefix + "/config.yaml")

's3://sagemaker-us-west-2-932478379847/DEMO-convect-byo/config.yaml'

In [28]:
!ls {WORK_DIRECTORY}

cate_info.csv  stock_ts.csv  target_ts.csv


In [29]:
!aws s3 ls {bucket}/{prefix} --recursive

2021-08-22 22:35:35       2060 DEMO-convect-byo/cate_info.csv
2021-08-22 22:50:05       1254 DEMO-convect-byo/config.yaml
2021-08-22 18:06:01       2629 DEMO-convect-byo/predict-payload.json
2021-08-22 22:35:34    1004419 DEMO-convect-byo/stock_ts.csv
2021-08-22 22:35:33    1020803 DEMO-convect-byo/target_ts.csv


In [30]:
# Estimator and fit


account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-algoflow:latest".format(account, region)

algo = sage.estimator.Estimator(
    image,
    role,
    1,
    "ml.m4.xlarge",
    output_path="s3://{}/byo-algoflow/output".format(sess.default_bucket()),
    sagemaker_session=sess,
)

In [31]:
# set the hyperparameters
algo.set_hyperparameters(
    time_col="date",
    key_col="item_id",
    value_col="sales",
)

In [33]:
algo.fit({
    "target_ts": "s3://{}/{}/target_ts.csv".format(bucket, prefix),
    "item_meta": "s3://{}/{}/cate_info.csv".format(bucket, prefix),
    "related_ts": "s3://{}/{}/stock_ts.csv".format(bucket, prefix),
    "config": "s3://{}/{}/config.yaml".format(bucket, prefix)
})

2021-08-23 05:59:20 Starting - Starting the training job...
2021-08-23 05:59:43 Starting - Launching requested ML instancesProfilerReport-1629698359: InProgress
......
2021-08-23 06:00:46 Starting - Preparing the instances for training......
2021-08-23 06:01:54 Downloading - Downloading input data
2021-08-23 06:01:54 Training - Downloading the training image.........
2021-08-23 06:03:27 Training - Training image download completed. Training in progress...[34mStarting the training.[0m
[34m("Training params: {'value_col': 'sales', 'key_col': 'item_id', 'time_col': "
 "'date'}")[0m
[34m("Schemas generated: [{'type': 'TARGET_TIME_SERIES', 'path': "
 "'/opt/ml/input/data/target_ts/target_ts.csv', 'format': 'csv', 'schema': "
 "{'key': ['item_id'], 'time': 'date', 'values': ['sales']}}, {'type': "
 "'ITEM_METADATA', 'path': '/opt/ml/input/data/item_meta/cate_info.csv', "
 "'format': 'csv', 'schema': {'key': ['item_id']}}, {'type': "
 "'RELATED_TIME_SERIES', 'path': '/opt/ml/input/data/r

In [34]:
!aws s3 ls s3://{bucket}/byo-algoflow/output/ --recursive

2021-08-22 13:11:59    4060749 byo-algoflow/output/sagemaker-algoflow-2021-08-22-20-11-28-318/model.tar.gz
2021-08-22 13:11:59        194 byo-algoflow/output/sagemaker-algoflow-2021-08-22-20-11-28-318/output.tar.gz
2021-08-22 17:59:33    4060736 byo-algoflow/output/sagemaker-algoflow-2021-08-23-00-59-02-005/model.tar.gz
2021-08-22 17:59:33        194 byo-algoflow/output/sagemaker-algoflow-2021-08-23-00-59-02-005/output.tar.gz
2021-08-22 22:55:01     159390 byo-algoflow/output/sagemaker-algoflow-2021-08-23-05-51-07-457/profiler-output/system/incremental/2021082305/1629697980.algo-1.json
2021-08-22 22:55:30     242034 byo-algoflow/output/sagemaker-algoflow-2021-08-23-05-51-07-457/profiler-output/system/incremental/2021082305/1629698040.algo-1.json
2021-08-22 22:55:30     118565 byo-algoflow/output/sagemaker-algoflow-2021-08-23-05-51-07-457/profiler-output/system/incremental/2021082305/1629698100.algo-1.json
2021-08-22 22:55:45     322349 byo-algoflow/output/sagemaker-algoflow-2021-08-23-

In [35]:
# transformation

transform_output_folder = "batch-transform-output"
output_path = "s3://{}/byo-algoflow/{}".format(sess.default_bucket(), transform_output_folder)

transformer = algo.transformer(
    instance_count=1,
    instance_type="ml.m4.xlarge",
    output_path=output_path,
    assemble_with="Line",
    accept="application/json",
    strategy=None,
    max_concurrent_transforms=2,
)

In [36]:
sess.upload_data("../local_test/predict-payload.json", key_prefix=prefix)

's3://sagemaker-us-west-2-932478379847/DEMO-convect-byo/predict-payload.json'

In [37]:
# prepare the payload 


transformer.transform(
    "s3://sagemaker-us-west-2-932478379847/DEMO-convect-byo/predict-payload.json",
    content_type="application/json", split_type=None,
)
transformer.wait()

................................[34mStarting the inference server with 1 workers.[0m
[35mStarting the inference server with 1 workers.[0m
[34m[2021-08-23 06:10:52 +0000] [9] [INFO] Starting gunicorn 20.1.0[0m
[34m[2021-08-23 06:10:52 +0000] [9] [INFO] Listening at: unix:/tmp/gunicorn.sock (9)[0m
[34m[2021-08-23 06:10:52 +0000] [9] [INFO] Using worker: sync[0m
[34m[2021-08-23 06:10:52 +0000] [13] [INFO] Booting worker with pid: 13[0m
[35m[2021-08-23 06:10:52 +0000] [9] [INFO] Starting gunicorn 20.1.0[0m
[35m[2021-08-23 06:10:52 +0000] [9] [INFO] Listening at: unix:/tmp/gunicorn.sock (9)[0m
[35m[2021-08-23 06:10:52 +0000] [9] [INFO] Using worker: sync[0m
[35m[2021-08-23 06:10:52 +0000] [13] [INFO] Booting worker with pid: 13[0m
[34m169.254.255.130 - - [23/Aug/2021:06:10:57 +0000] "GET /ping HTTP/1.1" 200 1 "-" "Go-http-client/1.1"[0m
[35m169.254.255.130 - - [23/Aug/2021:06:10:57 +0000] "GET /ping HTTP/1.1" 200 1 "-" "Go-http-client/1.1"[0m
[34m169.254.255.130 - -

In [38]:
!aws s3 ls {output_path} --recursive

2021-08-22 23:11:24       5934 byo-algoflow/batch-transform-output/predict-payload.json.out
2021-08-22 18:07:39       5937 byo-algoflow/batch-transform-output/sagemaker-algoflow-2021-08-23-01-06-36-135/predict-payload.json.out


In [39]:
import pandas as pd

pd.read_csv("s3://{}/byo-algoflow/batch-transform-output/predict-payload.json.out".format(bucket)).head()

Unnamed: 0.1,Unnamed: 0,item_id,predict_sum,predict_start_date,predict_horizon
0,dummy_item_0,dummy_item_0,784.793026,2018-12-27,7
1,dummy_item_1,dummy_item_1,671.690725,2018-12-27,7
2,dummy_item_10,dummy_item_10,622.471802,2018-12-27,7
3,dummy_item_11,dummy_item_11,593.507004,2018-12-27,7
4,dummy_item_12,dummy_item_12,693.077343,2018-12-27,7
