# AutoMLをAPIを利用して使う

データは時系列データ、関連時系列データ、アイテムメタデータの3種を使う

＜利用データ＞

https://github.com/aws-samples/amazon-forecast-samples/tree/main/notebooks/advanced/Item_Level_Explainability/InstrumentData

## 参考：waitコードなど

https://github.com/aws-samples/amazon-forecast-samples/blob/main/notebooks/basic/Getting_Started/Amazon_Forecast_Quick_Start_Guide.ipynb

# まずは、RETAILドメインで学習を通す

In [None]:
DATA_VERSION = 'demo--forecast'

In [None]:
import sys
import os
import boto3
import pandas as pd

In [None]:
import datetime
date = datetime.datetime.now()
now = datetime.datetime.today()
DATA_VERSION = now.strftime('%m%d%H%M')

### S3へデータをアップロード

In [None]:
datasetArns = []
REGION = "ap-northeast-1"
DOMEIN = "RETAIL"

In [None]:
s3_client = boto3.client('s3', region_name= REGION)

In [None]:
bucket = 'demo--forecast'
key = f'automl/input'

tts_filename= 'TTS.csv'
s3_client.upload_file(f'input/{tts_filename}', bucket, f'{key}/{tts_filename}')

In [None]:
rts_filename= 'RTS.csv'
s3_client.upload_file(f'input/{rts_filename}', bucket, f'{key}/{rts_filename}')

In [None]:
im_filename= 'IM.csv'
s3_client.upload_file(f'input/{im_filename}', bucket, f'{key}/{im_filename}')

# 2. DatasetGroup作成

In [None]:
forecast = boto3.client('forecast', region_name= REGION)

In [None]:
#データセットグループを作成
create_dataset_group_response = forecast.create_dataset_group(
    DatasetGroupName=f'forecast_dataset{DATA_VERSION}',
    Domain=DOMEIN,
)
datasetGroupArn = create_dataset_group_response['DatasetGroupArn']

# 3.DataSet作成

In [None]:
# Target time series dataのデータセットを作成
create_dataset_response_tts = forecast.create_dataset(
    DatasetName=f'forecast_ttsd{DATA_VERSION}',
    Domain=DOMEIN,
    DatasetType='TARGET_TIME_SERIES',
    DataFrequency='1M',
    Schema={
        'Attributes': [
            {
                'AttributeName': 'Timestamp',
                'AttributeType': 'timestamp'
            },
            {
                'AttributeName': 'Model_ID',
                'AttributeType': 'string'
            },
            {
                'AttributeName': 'Order_Quality',
                'AttributeType': 'float'
            }
        ]
    }
)

# データセットグループの配列にデータセットを追加
datasetArn = create_dataset_response_tts['DatasetArn']
datasetArns.append(datasetArn)

In [None]:
# Related time series dataのデータセットを作成
create_dataset_response_rts = forecast.create_dataset(
    DatasetName=f'forecast_rtsd{DATA_VERSION}',
    Domain=DOMEIN,
    DatasetType='RELATED_TIME_SERIES',
    DataFrequency='1M',
    Schema={
        'Attributes': [
            {
                'AttributeName': 'Timestamp',
                'AttributeType': 'timestamp'
            },
            {
                'AttributeName': 'Model_ID',
                'AttributeType': 'string'
            },

            {
                'AttributeName': 'Loss_Rate',
                'AttributeType': 'float'
            },
            {
                'AttributeName': 'Customer_Request',
                'AttributeType': 'float'
            }
        ]
    }
)

In [None]:
# データセットグループの配列にデータセットを追加
datasetArn = create_dataset_response_rts['DatasetArn']
datasetArns.append(datasetArn)

In [None]:
# Item metadata dataのデータセットを作成
create_dataset_response_im = forecast.create_dataset(
    DatasetName=f'forecast_imd{DATA_VERSION}',
    Domain=DOMEIN,
    DatasetType='ITEM_METADATA',
    DataFrequency='1M',
    Schema={
        'Attributes': [
            {
                'AttributeName': 'Model_ID',
                'AttributeType': 'string'
            },
            {
                'AttributeName': 'Model_Type',
                'AttributeType': 'string'
            }
        ]
    }
)
# データセットグループの配列にデータセットを追加
datasetArn = create_dataset_response_im['DatasetArn']
datasetArns.append(datasetArn)

In [None]:
# データセットグループにデータセットを登録
forecast.update_dataset_group(DatasetGroupArn=datasetGroupArn, DatasetArns=datasetArns)
print("DataSet Creation OK")

# 4.学習データのimport

In [None]:
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

In [None]:
f's3://{bucket}/{key}/{tts_filename}'

In [None]:
#学習データをアップロード(TargetTimeSeries)
response = forecast.create_dataset_import_job(
    DatasetImportJobName='forecast_ttsd_import',
    DatasetArn=create_dataset_response_tts['DatasetArn'],
    DataSource={
        'S3Config': {
            'Path': f's3://{bucket}/{key}/{tts_filename}',
            'RoleArn': role,
        }
    },
)
dataset_import_job_arn_tts=response['DatasetImportJobArn']

In [None]:
#学習データをアップロード(RelatedTimeSeries)
response = forecast.create_dataset_import_job(
    DatasetImportJobName='forecast_rtsd_import',
    DatasetArn=create_dataset_response_rts['DatasetArn'],
    DataSource={
        'S3Config': {
            'Path': f's3://{bucket}/{key}/{rts_filename}',
            'RoleArn': role,
        }
    },
    TimestampFormat='yyyy-MM-dd HH:mm:ss'
)
dataset_import_job_arn_rts=response['DatasetImportJobArn']

In [None]:
#学習データをアップロード(item_meta)
response = forecast.create_dataset_import_job(
    DatasetImportJobName='forecast_imd_import',
    DatasetArn=create_dataset_response_im['DatasetArn'],
    DataSource={
        'S3Config': {
            'Path': f's3://{bucket}/{key}/{im_filename}',
            'RoleArn': role,
        }
    },
    TimestampFormat='yyyy-MM-dd HH:mm:ss'
)
dataset_import_job_arn_im=response['DatasetImportJobArn']

## jobの確認
CreateDatasetImportJobのQuota limit は 3

stopする場合はstop_resource()を利用

https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/forecast.html#ForecastService.Client.stop_resource


importジョブを表示

https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/forecast.html#ForecastService.Client.list_dataset_import_jobs

In [None]:
forecast.list_dataset_import_jobs(Filters=[{"Condition": "IS",
                                            "Key": "Status",
                                            "Value": "CREATE_IN_PROGRESS" },
                                          ]
                                 )

In [None]:
#forecast.list_dataset_import_jobs()

## importが完了するまでwait

https://github.com/aws-samples/amazon-forecast-samples/blob/main/notebooks/basic/Getting_Started/Amazon_Forecast_Quick_Start_Guide.ipynb

wait関数

https://github.com/aws-samples/amazon-forecast-samples/blob/65b83b2f53e11274567ab545baf8ebeca101540b/notebooks/common/util/fcst_utils.py#L35

In [None]:
class StatusIndicator:
    
    def __init__(self):
        self.previous_status = None
        self.need_newline = False
        
    def update( self, status ):
        if self.previous_status != status:
            if self.need_newline:
                sys.stdout.write("\n")
            sys.stdout.write( status + " ")
            self.need_newline = True
            self.previous_status = status
        else:
            sys.stdout.write(".")
            self.need_newline = True
        sys.stdout.flush()

    def end(self):
        if self.need_newline:
            sys.stdout.write("\n")

In [None]:
import time

def wait(callback, time_interval = 10):

    status_indicator = StatusIndicator()

    while True:
        status = callback()['Status']
        status_indicator.update(status)
        if status in ('ACTIVE', 'CREATE_FAILED'): break
        time.sleep(time_interval)

    status_indicator.end()
    
    return (status=="ACTIVE")

In [None]:
status = wait(lambda: forecast.describe_dataset_import_job(DatasetImportJobArn=dataset_import_job_arn_tts))
assert status
print("Import TTS OK")
status = wait(lambda: forecast.describe_dataset_import_job(DatasetImportJobArn=dataset_import_job_arn_rts))
assert status
print("Import RTS OK")
status = wait(lambda: forecast.describe_dataset_import_job(DatasetImportJobArn=dataset_import_job_arn_im))
assert status
print("Import IM OK")

# predictor作成

In [None]:
# 確認
datasetGroupArn

In [None]:
#予測モデル作成
response = forecast.create_predictor(
    PredictorName=f'forecast_dataset_predictor{DATA_VERSION}',
    ForecastHorizon=12,
    PerformAutoML=True,
    InputDataConfig={
        #'DatasetGroupArn': f'arn:aws:forecast:{REGION}:00000000:dataset-group/forecast_dataset{DATA_VERSION}',
        'DatasetGroupArn': datasetGroupArn,
    },
    FeaturizationConfig={
        'ForecastFrequency': '1M'
    }
)



predictor_arn = response['PredictorArn']
print(predictor_arn)
status = wait(lambda: forecast.describe_predictor(PredictorArn=predictor_arn))
assert status

# forecast作成

In [None]:
# 確認
predictor_arn

In [None]:
#予測実行
response = forecast.create_forecast(
    ForecastName=f'create_forecast_data{DATA_VERSION}',
    #PredictorArn=f'arn:aws:forecast:{REGION}:00000000:predictor/forecast_dataset_predictor{DATA_VERSION}',
    PredictorArn=predictor_arn,
    ForecastTypes=['0.1, 0.5, 0.9']
)

forecast_create_arn = response['ForecastArn']
print(forecast_create_arn)
status = wait(lambda: forecast_client.describe_forecast(ForecastArn=forecast_create_arn))
assert status

# forecast結果をS3にexport

In [None]:
out_dir = f'{bucket}/{key}'
out_dir

In [None]:
#S3に結果を格納
response = forecast.create_forecast_export_job(
    ForecastExportJobName='export_forecast',
    #ForecastArn=f'arn:aws:forecast:{REGION}:0000000:forecast/create_forecast_data{DATA_VERSION}',
    ForecastArn=forecast_create_arn,
    Destination={
        'S3Config': {
            #'Path': f's3://forecastfile/data{DATA_VERSION}',
            'Path': f's3://forecastfile/export_forecast{DATA_VERSION}',
            #'RoleArn': 'arn:aws:iam::000000:role/service-role/AmazonForecast-ExecutionRole-1660098289634'
            'RoleArn': role
        }
    }
)

# clean up


In [None]:
import boto3
forecast = boto3.client('forecast')

In [None]:
datase_group_arn = 'arn:aws:forecast:ap-northeast-1:805433377179:dataset-group/forecast_dataset11150703'

In [None]:
forecast.delete_resource_tree(ResourceArn = datase_group_arn)

# 確認



In [None]:
list_datasets = forecast.list_datasets()

https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/forecast.html#ForecastService.Client.delete_dataset

In [None]:
for i in range(len(list_datasets['Datasets'])):
    print(list_datasets['Datasets'][i]['DatasetArn'])
    forecast.delete_dataset(DatasetArn=list_datasets['Datasets'][i]['DatasetArn'])
    

In [None]:
#forecast.list_datasets()